vendor: remove unused dependencies

This commit is contained in:
Fabian Reinartz 2016-12-31 09:32:55 +01:00
parent e631a1260d
commit 89b6b3be9f
55 changed files with 0 additions and 14241 deletions

View file

@ -1,24 +0,0 @@
Copyright 2012 Suryandaru Triandana <syndtr@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,349 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"encoding/binary"
"fmt"
"io"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
// ErrBatchCorrupted records reason of batch corruption. This error will be
// wrapped with errors.ErrCorrupted.
type ErrBatchCorrupted struct {
Reason string
}
func (e *ErrBatchCorrupted) Error() string {
return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
}
func newErrBatchCorrupted(reason string) error {
return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
}
const (
batchHeaderLen = 8 + 4
batchGrowRec = 3000
batchBufioSize = 16
)
// BatchReplay wraps basic batch operations.
type BatchReplay interface {
Put(key, value []byte)
Delete(key []byte)
}
type batchIndex struct {
keyType keyType
keyPos, keyLen int
valuePos, valueLen int
}
func (index batchIndex) k(data []byte) []byte {
return data[index.keyPos : index.keyPos+index.keyLen]
}
func (index batchIndex) v(data []byte) []byte {
if index.valueLen != 0 {
return data[index.valuePos : index.valuePos+index.valueLen]
}
return nil
}
func (index batchIndex) kv(data []byte) (key, value []byte) {
return index.k(data), index.v(data)
}
// Batch is a write batch.
type Batch struct {
data []byte
index []batchIndex
// internalLen is sums of key/value pair length plus 8-bytes internal key.
internalLen int
}
func (b *Batch) grow(n int) {
o := len(b.data)
if cap(b.data)-o < n {
div := 1
if len(b.index) > batchGrowRec {
div = len(b.index) / batchGrowRec
}
ndata := make([]byte, o, o+n+o/div)
copy(ndata, b.data)
b.data = ndata
}
}
func (b *Batch) appendRec(kt keyType, key, value []byte) {
n := 1 + binary.MaxVarintLen32 + len(key)
if kt == keyTypeVal {
n += binary.MaxVarintLen32 + len(value)
}
b.grow(n)
index := batchIndex{keyType: kt}
o := len(b.data)
data := b.data[:o+n]
data[o] = byte(kt)
o++
o += binary.PutUvarint(data[o:], uint64(len(key)))
index.keyPos = o
index.keyLen = len(key)
o += copy(data[o:], key)
if kt == keyTypeVal {
o += binary.PutUvarint(data[o:], uint64(len(value)))
index.valuePos = o
index.valueLen = len(value)
o += copy(data[o:], value)
}
b.data = data[:o]
b.index = append(b.index, index)
b.internalLen += index.keyLen + index.valueLen + 8
}
// Put appends 'put operation' of the given key/value pair to the batch.
// It is safe to modify the contents of the argument after Put returns but not
// before.
func (b *Batch) Put(key, value []byte) {
b.appendRec(keyTypeVal, key, value)
}
// Delete appends 'delete operation' of the given key to the batch.
// It is safe to modify the contents of the argument after Delete returns but
// not before.
func (b *Batch) Delete(key []byte) {
b.appendRec(keyTypeDel, key, nil)
}
// Dump dumps batch contents. The returned slice can be loaded into the
// batch using Load method.
// The returned slice is not its own copy, so the contents should not be
// modified.
func (b *Batch) Dump() []byte {
return b.data
}
// Load loads given slice into the batch. Previous contents of the batch
// will be discarded.
// The given slice will not be copied and will be used as batch buffer, so
// it is not safe to modify the contents of the slice.
func (b *Batch) Load(data []byte) error {
return b.decode(data, -1)
}
// Replay replays batch contents.
func (b *Batch) Replay(r BatchReplay) error {
for _, index := range b.index {
switch index.keyType {
case keyTypeVal:
r.Put(index.k(b.data), index.v(b.data))
case keyTypeDel:
r.Delete(index.k(b.data))
}
}
return nil
}
// Len returns number of records in the batch.
func (b *Batch) Len() int {
return len(b.index)
}
// Reset resets the batch.
func (b *Batch) Reset() {
b.data = b.data[:0]
b.index = b.index[:0]
b.internalLen = 0
}
func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error {
for i, index := range b.index {
if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil {
return err
}
}
return nil
}
func (b *Batch) append(p *Batch) {
ob := len(b.data)
oi := len(b.index)
b.data = append(b.data, p.data...)
b.index = append(b.index, p.index...)
b.internalLen += p.internalLen
// Updating index offset.
if ob != 0 {
for ; oi < len(b.index); oi++ {
index := &b.index[oi]
index.keyPos += ob
if index.valueLen != 0 {
index.valuePos += ob
}
}
}
}
func (b *Batch) decode(data []byte, expectedLen int) error {
b.data = data
b.index = b.index[:0]
b.internalLen = 0
err := decodeBatch(data, func(i int, index batchIndex) error {
b.index = append(b.index, index)
b.internalLen += index.keyLen + index.valueLen + 8
return nil
})
if err != nil {
return err
}
if expectedLen >= 0 && len(b.index) != expectedLen {
return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index)))
}
return nil
}
func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error {
var ik []byte
for i, index := range b.index {
ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
if err := mdb.Put(ik, index.v(b.data)); err != nil {
return err
}
}
return nil
}
func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error {
var ik []byte
for i, index := range b.index {
ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
if err := mdb.Delete(ik); err != nil {
return err
}
}
return nil
}
func newBatch() interface{} {
return &Batch{}
}
func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
var index batchIndex
for i, o := 0, 0; o < len(data); i++ {
// Key type.
index.keyType = keyType(data[o])
if index.keyType > keyTypeVal {
return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType)))
}
o++
// Key.
x, n := binary.Uvarint(data[o:])
o += n
if n <= 0 || o+int(x) > len(data) {
return newErrBatchCorrupted("bad record: invalid key length")
}
index.keyPos = o
index.keyLen = int(x)
o += index.keyLen
// Value.
if index.keyType == keyTypeVal {
x, n = binary.Uvarint(data[o:])
o += n
if n <= 0 || o+int(x) > len(data) {
return newErrBatchCorrupted("bad record: invalid value length")
}
index.valuePos = o
index.valueLen = int(x)
o += index.valueLen
} else {
index.valuePos = 0
index.valueLen = 0
}
if err := fn(i, index); err != nil {
return err
}
}
return nil
}
func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) {
seq, batchLen, err = decodeBatchHeader(data)
if err != nil {
return 0, 0, err
}
if seq < expectSeq {
return 0, 0, newErrBatchCorrupted("invalid sequence number")
}
data = data[batchHeaderLen:]
var ik []byte
var decodedLen int
err = decodeBatch(data, func(i int, index batchIndex) error {
if i >= batchLen {
return newErrBatchCorrupted("invalid records length")
}
ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType)
if err := mdb.Put(ik, index.v(data)); err != nil {
return err
}
decodedLen++
return nil
})
if err == nil && decodedLen != batchLen {
err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen))
}
return
}
func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte {
dst = ensureBuffer(dst, batchHeaderLen)
binary.LittleEndian.PutUint64(dst, seq)
binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen))
return dst
}
func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) {
if len(data) < batchHeaderLen {
return 0, 0, newErrBatchCorrupted("too short")
}
seq = binary.LittleEndian.Uint64(data)
batchLen = int(binary.LittleEndian.Uint32(data[8:]))
if batchLen < 0 {
return 0, 0, newErrBatchCorrupted("invalid records length")
}
return
}
func batchesLen(batches []*Batch) int {
batchLen := 0
for _, batch := range batches {
batchLen += batch.Len()
}
return batchLen
}
func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error {
if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil {
return err
}
for _, batch := range batches {
if _, err := wr.Write(batch.data); err != nil {
return err
}
}
return nil
}

View file

@ -1,705 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package cache provides interface and implementation of a cache algorithms.
package cache
import (
"sync"
"sync/atomic"
"unsafe"
"github.com/syndtr/goleveldb/leveldb/util"
)
// Cacher provides interface to implements a caching functionality.
// An implementation must be safe for concurrent use.
type Cacher interface {
// Capacity returns cache capacity.
Capacity() int
// SetCapacity sets cache capacity.
SetCapacity(capacity int)
// Promote promotes the 'cache node'.
Promote(n *Node)
// Ban evicts the 'cache node' and prevent subsequent 'promote'.
Ban(n *Node)
// Evict evicts the 'cache node'.
Evict(n *Node)
// EvictNS evicts 'cache node' with the given namespace.
EvictNS(ns uint64)
// EvictAll evicts all 'cache node'.
EvictAll()
// Close closes the 'cache tree'
Close() error
}
// Value is a 'cacheable object'. It may implements util.Releaser, if
// so the the Release method will be called once object is released.
type Value interface{}
// NamespaceGetter provides convenient wrapper for namespace.
type NamespaceGetter struct {
Cache *Cache
NS uint64
}
// Get simply calls Cache.Get() method.
func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
return g.Cache.Get(g.NS, key, setFunc)
}
// The hash tables implementation is based on:
// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
// Kunlong Zhang, and Michael Spear.
// ACM Symposium on Principles of Distributed Computing, Jul 2014.
const (
mInitialSize = 1 << 4
mOverflowThreshold = 1 << 5
mOverflowGrowThreshold = 1 << 7
)
type mBucket struct {
mu sync.Mutex
node []*Node
frozen bool
}
func (b *mBucket) freeze() []*Node {
b.mu.Lock()
defer b.mu.Unlock()
if !b.frozen {
b.frozen = true
}
return b.node
}
func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
b.mu.Lock()
if b.frozen {
b.mu.Unlock()
return
}
// Scan the node.
for _, n := range b.node {
if n.hash == hash && n.ns == ns && n.key == key {
atomic.AddInt32(&n.ref, 1)
b.mu.Unlock()
return true, false, n
}
}
// Get only.
if noset {
b.mu.Unlock()
return true, false, nil
}
// Create node.
n = &Node{
r: r,
hash: hash,
ns: ns,
key: key,
ref: 1,
}
// Add node to bucket.
b.node = append(b.node, n)
bLen := len(b.node)
b.mu.Unlock()
// Update counter.
grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
if bLen > mOverflowThreshold {
grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
}
// Grow.
if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
nhLen := len(h.buckets) << 1
nh := &mNode{
buckets: make([]unsafe.Pointer, nhLen),
mask: uint32(nhLen) - 1,
pred: unsafe.Pointer(h),
growThreshold: int32(nhLen * mOverflowThreshold),
shrinkThreshold: int32(nhLen >> 1),
}
ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
if !ok {
panic("BUG: failed swapping head")
}
go nh.initBuckets()
}
return true, true, n
}
func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
b.mu.Lock()
if b.frozen {
b.mu.Unlock()
return
}
// Scan the node.
var (
n *Node
bLen int
)
for i := range b.node {
n = b.node[i]
if n.ns == ns && n.key == key {
if atomic.LoadInt32(&n.ref) == 0 {
deleted = true
// Call releaser.
if n.value != nil {
if r, ok := n.value.(util.Releaser); ok {
r.Release()
}
n.value = nil
}
// Remove node from bucket.
b.node = append(b.node[:i], b.node[i+1:]...)
bLen = len(b.node)
}
break
}
}
b.mu.Unlock()
if deleted {
// Call OnDel.
for _, f := range n.onDel {
f()
}
// Update counter.
atomic.AddInt32(&r.size, int32(n.size)*-1)
shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
if bLen >= mOverflowThreshold {
atomic.AddInt32(&h.overflow, -1)
}
// Shrink.
if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
nhLen := len(h.buckets) >> 1
nh := &mNode{
buckets: make([]unsafe.Pointer, nhLen),
mask: uint32(nhLen) - 1,
pred: unsafe.Pointer(h),
growThreshold: int32(nhLen * mOverflowThreshold),
shrinkThreshold: int32(nhLen >> 1),
}
ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
if !ok {
panic("BUG: failed swapping head")
}
go nh.initBuckets()
}
}
return true, deleted
}
type mNode struct {
buckets []unsafe.Pointer // []*mBucket
mask uint32
pred unsafe.Pointer // *mNode
resizeInProgess int32
overflow int32
growThreshold int32
shrinkThreshold int32
}
func (n *mNode) initBucket(i uint32) *mBucket {
if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
return b
}
p := (*mNode)(atomic.LoadPointer(&n.pred))
if p != nil {
var node []*Node
if n.mask > p.mask {
// Grow.
pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
if pb == nil {
pb = p.initBucket(i & p.mask)
}
m := pb.freeze()
// Split nodes.
for _, x := range m {
if x.hash&n.mask == i {
node = append(node, x)
}
}
} else {
// Shrink.
pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
if pb0 == nil {
pb0 = p.initBucket(i)
}
pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
if pb1 == nil {
pb1 = p.initBucket(i + uint32(len(n.buckets)))
}
m0 := pb0.freeze()
m1 := pb1.freeze()
// Merge nodes.
node = make([]*Node, 0, len(m0)+len(m1))
node = append(node, m0...)
node = append(node, m1...)
}
b := &mBucket{node: node}
if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
if len(node) > mOverflowThreshold {
atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
}
return b
}
}
return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
}
func (n *mNode) initBuckets() {
for i := range n.buckets {
n.initBucket(uint32(i))
}
atomic.StorePointer(&n.pred, nil)
}
// Cache is a 'cache map'.
type Cache struct {
mu sync.RWMutex
mHead unsafe.Pointer // *mNode
nodes int32
size int32
cacher Cacher
closed bool
}
// NewCache creates a new 'cache map'. The cacher is optional and
// may be nil.
func NewCache(cacher Cacher) *Cache {
h := &mNode{
buckets: make([]unsafe.Pointer, mInitialSize),
mask: mInitialSize - 1,
growThreshold: int32(mInitialSize * mOverflowThreshold),
shrinkThreshold: 0,
}
for i := range h.buckets {
h.buckets[i] = unsafe.Pointer(&mBucket{})
}
r := &Cache{
mHead: unsafe.Pointer(h),
cacher: cacher,
}
return r
}
func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
h := (*mNode)(atomic.LoadPointer(&r.mHead))
i := hash & h.mask
b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
if b == nil {
b = h.initBucket(i)
}
return h, b
}
func (r *Cache) delete(n *Node) bool {
for {
h, b := r.getBucket(n.hash)
done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
if done {
return deleted
}
}
return false
}
// Nodes returns number of 'cache node' in the map.
func (r *Cache) Nodes() int {
return int(atomic.LoadInt32(&r.nodes))
}
// Size returns sums of 'cache node' size in the map.
func (r *Cache) Size() int {
return int(atomic.LoadInt32(&r.size))
}
// Capacity returns cache capacity.
func (r *Cache) Capacity() int {
if r.cacher == nil {
return 0
}
return r.cacher.Capacity()
}
// SetCapacity sets cache capacity.
func (r *Cache) SetCapacity(capacity int) {
if r.cacher != nil {
r.cacher.SetCapacity(capacity)
}
}
// Get gets 'cache node' with the given namespace and key.
// If cache node is not found and setFunc is not nil, Get will atomically creates
// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
//
// The returned 'cache handle' should be released after use by calling Release
// method.
func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
r.mu.RLock()
defer r.mu.RUnlock()
if r.closed {
return nil
}
hash := murmur32(ns, key, 0xf00)
for {
h, b := r.getBucket(hash)
done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
if done {
if n != nil {
n.mu.Lock()
if n.value == nil {
if setFunc == nil {
n.mu.Unlock()
n.unref()
return nil
}
n.size, n.value = setFunc()
if n.value == nil {
n.size = 0
n.mu.Unlock()
n.unref()
return nil
}
atomic.AddInt32(&r.size, int32(n.size))
}
n.mu.Unlock()
if r.cacher != nil {
r.cacher.Promote(n)
}
return &Handle{unsafe.Pointer(n)}
}
break
}
}
return nil
}
// Delete removes and ban 'cache node' with the given namespace and key.
// A banned 'cache node' will never inserted into the 'cache tree'. Ban
// only attributed to the particular 'cache node', so when a 'cache node'
// is recreated it will not be banned.
//
// If onDel is not nil, then it will be executed if such 'cache node'
// doesn't exist or once the 'cache node' is released.
//
// Delete return true is such 'cache node' exist.
func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
r.mu.RLock()
defer r.mu.RUnlock()
if r.closed {
return false
}
hash := murmur32(ns, key, 0xf00)
for {
h, b := r.getBucket(hash)
done, _, n := b.get(r, h, hash, ns, key, true)
if done {
if n != nil {
if onDel != nil {
n.mu.Lock()
n.onDel = append(n.onDel, onDel)
n.mu.Unlock()
}
if r.cacher != nil {
r.cacher.Ban(n)
}
n.unref()
return true
}
break
}
}
if onDel != nil {
onDel()
}
return false
}
// Evict evicts 'cache node' with the given namespace and key. This will
// simply call Cacher.Evict.
//
// Evict return true is such 'cache node' exist.
func (r *Cache) Evict(ns, key uint64) bool {
r.mu.RLock()
defer r.mu.RUnlock()
if r.closed {
return false
}
hash := murmur32(ns, key, 0xf00)
for {
h, b := r.getBucket(hash)
done, _, n := b.get(r, h, hash, ns, key, true)
if done {
if n != nil {
if r.cacher != nil {
r.cacher.Evict(n)
}
n.unref()
return true
}
break
}
}
return false
}
// EvictNS evicts 'cache node' with the given namespace. This will
// simply call Cacher.EvictNS.
func (r *Cache) EvictNS(ns uint64) {
r.mu.RLock()
defer r.mu.RUnlock()
if r.closed {
return
}
if r.cacher != nil {
r.cacher.EvictNS(ns)
}
}
// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
func (r *Cache) EvictAll() {
r.mu.RLock()
defer r.mu.RUnlock()
if r.closed {
return
}
if r.cacher != nil {
r.cacher.EvictAll()
}
}
// Close closes the 'cache map' and forcefully releases all 'cache node'.
func (r *Cache) Close() error {
r.mu.Lock()
if !r.closed {
r.closed = true
h := (*mNode)(r.mHead)
h.initBuckets()
for i := range h.buckets {
b := (*mBucket)(h.buckets[i])
for _, n := range b.node {
// Call releaser.
if n.value != nil {
if r, ok := n.value.(util.Releaser); ok {
r.Release()
}
n.value = nil
}
// Call OnDel.
for _, f := range n.onDel {
f()
}
n.onDel = nil
}
}
}
r.mu.Unlock()
// Avoid deadlock.
if r.cacher != nil {
if err := r.cacher.Close(); err != nil {
return err
}
}
return nil
}
// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but
// unlike Close it doesn't forcefully releases 'cache node'.
func (r *Cache) CloseWeak() error {
r.mu.Lock()
if !r.closed {
r.closed = true
}
r.mu.Unlock()
// Avoid deadlock.
if r.cacher != nil {
r.cacher.EvictAll()
if err := r.cacher.Close(); err != nil {
return err
}
}
return nil
}
// Node is a 'cache node'.
type Node struct {
r *Cache
hash uint32
ns, key uint64
mu sync.Mutex
size int
value Value
ref int32
onDel []func()
CacheData unsafe.Pointer
}
// NS returns this 'cache node' namespace.
func (n *Node) NS() uint64 {
return n.ns
}
// Key returns this 'cache node' key.
func (n *Node) Key() uint64 {
return n.key
}
// Size returns this 'cache node' size.
func (n *Node) Size() int {
return n.size
}
// Value returns this 'cache node' value.
func (n *Node) Value() Value {
return n.value
}
// Ref returns this 'cache node' ref counter.
func (n *Node) Ref() int32 {
return atomic.LoadInt32(&n.ref)
}
// GetHandle returns an handle for this 'cache node'.
func (n *Node) GetHandle() *Handle {
if atomic.AddInt32(&n.ref, 1) <= 1 {
panic("BUG: Node.GetHandle on zero ref")
}
return &Handle{unsafe.Pointer(n)}
}
func (n *Node) unref() {
if atomic.AddInt32(&n.ref, -1) == 0 {
n.r.delete(n)
}
}
func (n *Node) unrefLocked() {
if atomic.AddInt32(&n.ref, -1) == 0 {
n.r.mu.RLock()
if !n.r.closed {
n.r.delete(n)
}
n.r.mu.RUnlock()
}
}
// Handle is a 'cache handle' of a 'cache node'.
type Handle struct {
n unsafe.Pointer // *Node
}
// Value returns the value of the 'cache node'.
func (h *Handle) Value() Value {
n := (*Node)(atomic.LoadPointer(&h.n))
if n != nil {
return n.value
}
return nil
}
// Release releases this 'cache handle'.
// It is safe to call release multiple times.
func (h *Handle) Release() {
nPtr := atomic.LoadPointer(&h.n)
if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
n := (*Node)(nPtr)
n.unrefLocked()
}
}
func murmur32(ns, key uint64, seed uint32) uint32 {
const (
m = uint32(0x5bd1e995)
r = 24
)
k1 := uint32(ns >> 32)
k2 := uint32(ns)
k3 := uint32(key >> 32)
k4 := uint32(key)
k1 *= m
k1 ^= k1 >> r
k1 *= m
k2 *= m
k2 ^= k2 >> r
k2 *= m
k3 *= m
k3 ^= k3 >> r
k3 *= m
k4 *= m
k4 ^= k4 >> r
k4 *= m
h := seed
h *= m
h ^= k1
h *= m
h ^= k2
h *= m
h ^= k3
h *= m
h ^= k4
h ^= h >> 13
h *= m
h ^= h >> 15
return h
}

View file

@ -1,195 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package cache
import (
"sync"
"unsafe"
)
type lruNode struct {
n *Node
h *Handle
ban bool
next, prev *lruNode
}
func (n *lruNode) insert(at *lruNode) {
x := at.next
at.next = n
n.prev = at
n.next = x
x.prev = n
}
func (n *lruNode) remove() {
if n.prev != nil {
n.prev.next = n.next
n.next.prev = n.prev
n.prev = nil
n.next = nil
} else {
panic("BUG: removing removed node")
}
}
type lru struct {
mu sync.Mutex
capacity int
used int
recent lruNode
}
func (r *lru) reset() {
r.recent.next = &r.recent
r.recent.prev = &r.recent
r.used = 0
}
func (r *lru) Capacity() int {
r.mu.Lock()
defer r.mu.Unlock()
return r.capacity
}
func (r *lru) SetCapacity(capacity int) {
var evicted []*lruNode
r.mu.Lock()
r.capacity = capacity
for r.used > r.capacity {
rn := r.recent.prev
if rn == nil {
panic("BUG: invalid LRU used or capacity counter")
}
rn.remove()
rn.n.CacheData = nil
r.used -= rn.n.Size()
evicted = append(evicted, rn)
}
r.mu.Unlock()
for _, rn := range evicted {
rn.h.Release()
}
}
func (r *lru) Promote(n *Node) {
var evicted []*lruNode
r.mu.Lock()
if n.CacheData == nil {
if n.Size() <= r.capacity {
rn := &lruNode{n: n, h: n.GetHandle()}
rn.insert(&r.recent)
n.CacheData = unsafe.Pointer(rn)
r.used += n.Size()
for r.used > r.capacity {
rn := r.recent.prev
if rn == nil {
panic("BUG: invalid LRU used or capacity counter")
}
rn.remove()
rn.n.CacheData = nil
r.used -= rn.n.Size()
evicted = append(evicted, rn)
}
}
} else {
rn := (*lruNode)(n.CacheData)
if !rn.ban {
rn.remove()
rn.insert(&r.recent)
}
}
r.mu.Unlock()
for _, rn := range evicted {
rn.h.Release()
}
}
func (r *lru) Ban(n *Node) {
r.mu.Lock()
if n.CacheData == nil {
n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
} else {
rn := (*lruNode)(n.CacheData)
if !rn.ban {
rn.remove()
rn.ban = true
r.used -= rn.n.Size()
r.mu.Unlock()
rn.h.Release()
rn.h = nil
return
}
}
r.mu.Unlock()
}
func (r *lru) Evict(n *Node) {
r.mu.Lock()
rn := (*lruNode)(n.CacheData)
if rn == nil || rn.ban {
r.mu.Unlock()
return
}
n.CacheData = nil
r.mu.Unlock()
rn.h.Release()
}
func (r *lru) EvictNS(ns uint64) {
var evicted []*lruNode
r.mu.Lock()
for e := r.recent.prev; e != &r.recent; {
rn := e
e = e.prev
if rn.n.NS() == ns {
rn.remove()
rn.n.CacheData = nil
r.used -= rn.n.Size()
evicted = append(evicted, rn)
}
}
r.mu.Unlock()
for _, rn := range evicted {
rn.h.Release()
}
}
func (r *lru) EvictAll() {
r.mu.Lock()
back := r.recent.prev
for rn := back; rn != &r.recent; rn = rn.prev {
rn.n.CacheData = nil
}
r.reset()
r.mu.Unlock()
for rn := back; rn != &r.recent; rn = rn.prev {
rn.h.Release()
}
}
func (r *lru) Close() error {
return nil
}
// NewLRU create a new LRU-cache.
func NewLRU(capacity int) Cacher {
r := &lru{capacity: capacity}
r.reset()
return r
}

View file

@ -1,67 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/comparer"
)
type iComparer struct {
ucmp comparer.Comparer
}
func (icmp *iComparer) uName() string {
return icmp.ucmp.Name()
}
func (icmp *iComparer) uCompare(a, b []byte) int {
return icmp.ucmp.Compare(a, b)
}
func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte {
return icmp.ucmp.Separator(dst, a, b)
}
func (icmp *iComparer) uSuccessor(dst, b []byte) []byte {
return icmp.ucmp.Successor(dst, b)
}
func (icmp *iComparer) Name() string {
return icmp.uName()
}
func (icmp *iComparer) Compare(a, b []byte) int {
x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey())
if x == 0 {
if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
return -1
} else if m < n {
return 1
}
}
return x
}
func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
dst = icmp.uSeparator(dst, ua, ub)
if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
// Append earliest possible number.
return append(dst, keyMaxNumBytes...)
}
return nil
}
func (icmp *iComparer) Successor(dst, b []byte) []byte {
ub := internalKey(b).ukey()
dst = icmp.uSuccessor(dst, ub)
if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
// Append earliest possible number.
return append(dst, keyMaxNumBytes...)
}
return nil
}

View file

@ -1,51 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package comparer
import "bytes"
type bytesComparer struct{}
func (bytesComparer) Compare(a, b []byte) int {
return bytes.Compare(a, b)
}
func (bytesComparer) Name() string {
return "leveldb.BytewiseComparator"
}
func (bytesComparer) Separator(dst, a, b []byte) []byte {
i, n := 0, len(a)
if n > len(b) {
n = len(b)
}
for ; i < n && a[i] == b[i]; i++ {
}
if i >= n {
// Do not shorten if one string is a prefix of the other
} else if c := a[i]; c < 0xff && c+1 < b[i] {
dst = append(dst, a[:i+1]...)
dst[i]++
return dst
}
return nil
}
func (bytesComparer) Successor(dst, b []byte) []byte {
for i, c := range b {
if c != 0xff {
dst = append(dst, b[:i+1]...)
dst[i]++
return dst
}
}
return nil
}
// DefaultComparer are default implementation of the Comparer interface.
// It uses the natural ordering, consistent with bytes.Compare.
var DefaultComparer = bytesComparer{}

View file

@ -1,57 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package comparer provides interface and implementation for ordering
// sets of data.
package comparer
// BasicComparer is the interface that wraps the basic Compare method.
type BasicComparer interface {
// Compare returns -1, 0, or +1 depending on whether a is 'less than',
// 'equal to' or 'greater than' b. The two arguments can only be 'equal'
// if their contents are exactly equal. Furthermore, the empty slice
// must be 'less than' any non-empty slice.
Compare(a, b []byte) int
}
// Comparer defines a total ordering over the space of []byte keys: a 'less
// than' relationship.
type Comparer interface {
BasicComparer
// Name returns name of the comparer.
//
// The Level-DB on-disk format stores the comparer name, and opening a
// database with a different comparer from the one it was created with
// will result in an error.
//
// An implementation to a new name whenever the comparer implementation
// changes in a way that will cause the relative ordering of any two keys
// to change.
//
// Names starting with "leveldb." are reserved and should not be used
// by any users of this package.
Name() string
// Bellow are advanced functions used used to reduce the space requirements
// for internal data structures such as index blocks.
// Separator appends a sequence of bytes x to dst such that a <= x && x < b,
// where 'less than' is consistent with Compare. An implementation should
// return nil if x equal to a.
//
// Either contents of a or b should not by any means modified. Doing so
// may cause corruption on the internal state.
Separator(dst, a, b []byte) []byte
// Successor appends a sequence of bytes x to dst such that x >= b, where
// 'less than' is consistent with Compare. An implementation should return
// nil if x equal to b.
//
// Contents of b should not by any means modified. Doing so may cause
// corruption on the internal state.
Successor(dst, b []byte) []byte
}

File diff suppressed because it is too large Load diff

View file

@ -1,826 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
)
var (
errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
)
type cStat struct {
duration time.Duration
read int64
write int64
}
func (p *cStat) add(n *cStatStaging) {
p.duration += n.duration
p.read += n.read
p.write += n.write
}
func (p *cStat) get() (duration time.Duration, read, write int64) {
return p.duration, p.read, p.write
}
type cStatStaging struct {
start time.Time
duration time.Duration
on bool
read int64
write int64
}
func (p *cStatStaging) startTimer() {
if !p.on {
p.start = time.Now()
p.on = true
}
}
func (p *cStatStaging) stopTimer() {
if p.on {
p.duration += time.Since(p.start)
p.on = false
}
}
type cStats struct {
lk sync.Mutex
stats []cStat
}
func (p *cStats) addStat(level int, n *cStatStaging) {
p.lk.Lock()
if level >= len(p.stats) {
newStats := make([]cStat, level+1)
copy(newStats, p.stats)
p.stats = newStats
}
p.stats[level].add(n)
p.lk.Unlock()
}
func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
p.lk.Lock()
defer p.lk.Unlock()
if level < len(p.stats) {
return p.stats[level].get()
}
return
}
func (db *DB) compactionError() {
var err error
noerr:
// No error.
for {
select {
case err = <-db.compErrSetC:
switch {
case err == nil:
case err == ErrReadOnly, errors.IsCorrupted(err):
goto hasperr
default:
goto haserr
}
case <-db.closeC:
return
}
}
haserr:
// Transient error.
for {
select {
case db.compErrC <- err:
case err = <-db.compErrSetC:
switch {
case err == nil:
goto noerr
case err == ErrReadOnly, errors.IsCorrupted(err):
goto hasperr
default:
}
case <-db.closeC:
return
}
}
hasperr:
// Persistent error.
for {
select {
case db.compErrC <- err:
case db.compPerErrC <- err:
case db.writeLockC <- struct{}{}:
// Hold write lock, so that write won't pass-through.
db.compWriteLocking = true
case <-db.closeC:
if db.compWriteLocking {
// We should release the lock or Close will hang.
<-db.writeLockC
}
return
}
}
}
type compactionTransactCounter int
func (cnt *compactionTransactCounter) incr() {
*cnt++
}
type compactionTransactInterface interface {
run(cnt *compactionTransactCounter) error
revert() error
}
func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
defer func() {
if x := recover(); x != nil {
if x == errCompactionTransactExiting {
if err := t.revert(); err != nil {
db.logf("%s revert error %q", name, err)
}
}
panic(x)
}
}()
const (
backoffMin = 1 * time.Second
backoffMax = 8 * time.Second
backoffMul = 2 * time.Second
)
var (
backoff = backoffMin
backoffT = time.NewTimer(backoff)
lastCnt = compactionTransactCounter(0)
disableBackoff = db.s.o.GetDisableCompactionBackoff()
)
for n := 0; ; n++ {
// Check whether the DB is closed.
if db.isClosed() {
db.logf("%s exiting", name)
db.compactionExitTransact()
} else if n > 0 {
db.logf("%s retrying N·%d", name, n)
}
// Execute.
cnt := compactionTransactCounter(0)
err := t.run(&cnt)
if err != nil {
db.logf("%s error I·%d %q", name, cnt, err)
}
// Set compaction error status.
select {
case db.compErrSetC <- err:
case perr := <-db.compPerErrC:
if err != nil {
db.logf("%s exiting (persistent error %q)", name, perr)
db.compactionExitTransact()
}
case <-db.closeC:
db.logf("%s exiting", name)
db.compactionExitTransact()
}
if err == nil {
return
}
if errors.IsCorrupted(err) {
db.logf("%s exiting (corruption detected)", name)
db.compactionExitTransact()
}
if !disableBackoff {
// Reset backoff duration if counter is advancing.
if cnt > lastCnt {
backoff = backoffMin
lastCnt = cnt
}
// Backoff.
backoffT.Reset(backoff)
if backoff < backoffMax {
backoff *= backoffMul
if backoff > backoffMax {
backoff = backoffMax
}
}
select {
case <-backoffT.C:
case <-db.closeC:
db.logf("%s exiting", name)
db.compactionExitTransact()
}
}
}
}
type compactionTransactFunc struct {
runFunc func(cnt *compactionTransactCounter) error
revertFunc func() error
}
func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
return t.runFunc(cnt)
}
func (t *compactionTransactFunc) revert() error {
if t.revertFunc != nil {
return t.revertFunc()
}
return nil
}
func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
db.compactionTransact(name, &compactionTransactFunc{run, revert})
}
func (db *DB) compactionExitTransact() {
panic(errCompactionTransactExiting)
}
func (db *DB) compactionCommit(name string, rec *sessionRecord) {
db.compCommitLk.Lock()
defer db.compCommitLk.Unlock() // Defer is necessary.
db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
return db.s.commit(rec)
}, nil)
}
func (db *DB) memCompaction() {
mdb := db.getFrozenMem()
if mdb == nil {
return
}
defer mdb.decref()
db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
// Don't compact empty memdb.
if mdb.Len() == 0 {
db.logf("memdb@flush skipping")
// drop frozen memdb
db.dropFrozenMem()
return
}
// Pause table compaction.
resumeC := make(chan struct{})
select {
case db.tcompPauseC <- (chan<- struct{})(resumeC):
case <-db.compPerErrC:
close(resumeC)
resumeC = nil
case <-db.closeC:
return
}
var (
rec = &sessionRecord{}
stats = &cStatStaging{}
flushLevel int
)
// Generate tables.
db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
stats.stopTimer()
return
}, func() error {
for _, r := range rec.addedTables {
db.logf("memdb@flush revert @%d", r.num)
if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
return err
}
}
return nil
})
rec.setJournalNum(db.journalFd.Num)
rec.setSeqNum(db.frozenSeq)
// Commit.
stats.startTimer()
db.compactionCommit("memdb", rec)
stats.stopTimer()
db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
for _, r := range rec.addedTables {
stats.write += r.size
}
db.compStats.addStat(flushLevel, stats)
// Drop frozen memdb.
db.dropFrozenMem()
// Resume table compaction.
if resumeC != nil {
select {
case <-resumeC:
close(resumeC)
case <-db.closeC:
return
}
}
// Trigger table compaction.
db.compTrigger(db.tcompCmdC)
}
type tableCompactionBuilder struct {
db *DB
s *session
c *compaction
rec *sessionRecord
stat0, stat1 *cStatStaging
snapHasLastUkey bool
snapLastUkey []byte
snapLastSeq uint64
snapIter int
snapKerrCnt int
snapDropCnt int
kerrCnt int
dropCnt int
minSeq uint64
strict bool
tableSize int
tw *tWriter
}
func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
// Create new table if not already.
if b.tw == nil {
// Check for pause event.
if b.db != nil {
select {
case ch := <-b.db.tcompPauseC:
b.db.pauseCompaction(ch)
case <-b.db.closeC:
b.db.compactionExitTransact()
default:
}
}
// Create new table.
var err error
b.tw, err = b.s.tops.create()
if err != nil {
return err
}
}
// Write key/value into table.
return b.tw.append(key, value)
}
func (b *tableCompactionBuilder) needFlush() bool {
return b.tw.tw.BytesLen() >= b.tableSize
}
func (b *tableCompactionBuilder) flush() error {
t, err := b.tw.finish()
if err != nil {
return err
}
b.rec.addTableFile(b.c.sourceLevel+1, t)
b.stat1.write += t.size
b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
b.tw = nil
return nil
}
func (b *tableCompactionBuilder) cleanup() {
if b.tw != nil {
b.tw.drop()
b.tw = nil
}
}
func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
snapResumed := b.snapIter > 0
hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
lastUkey := append([]byte{}, b.snapLastUkey...)
lastSeq := b.snapLastSeq
b.kerrCnt = b.snapKerrCnt
b.dropCnt = b.snapDropCnt
// Restore compaction state.
b.c.restore()
defer b.cleanup()
b.stat1.startTimer()
defer b.stat1.stopTimer()
iter := b.c.newIterator()
defer iter.Release()
for i := 0; iter.Next(); i++ {
// Incr transact counter.
cnt.incr()
// Skip until last state.
if i < b.snapIter {
continue
}
resumed := false
if snapResumed {
resumed = true
snapResumed = false
}
ikey := iter.Key()
ukey, seq, kt, kerr := parseInternalKey(ikey)
if kerr == nil {
shouldStop := !resumed && b.c.shouldStopBefore(ikey)
if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
// First occurrence of this user key.
// Only rotate tables if ukey doesn't hop across.
if b.tw != nil && (shouldStop || b.needFlush()) {
if err := b.flush(); err != nil {
return err
}
// Creates snapshot of the state.
b.c.save()
b.snapHasLastUkey = hasLastUkey
b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
b.snapLastSeq = lastSeq
b.snapIter = i
b.snapKerrCnt = b.kerrCnt
b.snapDropCnt = b.dropCnt
}
hasLastUkey = true
lastUkey = append(lastUkey[:0], ukey...)
lastSeq = keyMaxSeq
}
switch {
case lastSeq <= b.minSeq:
// Dropped because newer entry for same user key exist
fallthrough // (A)
case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger seq numbers
// (3) data in layers that are being compacted here and have
// smaller seq numbers will be dropped in the next
// few iterations of this loop (by rule (A) above).
// Therefore this deletion marker is obsolete and can be dropped.
lastSeq = seq
b.dropCnt++
continue
default:
lastSeq = seq
}
} else {
if b.strict {
return kerr
}
// Don't drop corrupted keys.
hasLastUkey = false
lastUkey = lastUkey[:0]
lastSeq = keyMaxSeq
b.kerrCnt++
}
if err := b.appendKV(ikey, iter.Value()); err != nil {
return err
}
}
if err := iter.Error(); err != nil {
return err
}
// Finish last table.
if b.tw != nil && !b.tw.empty() {
return b.flush()
}
return nil
}
func (b *tableCompactionBuilder) revert() error {
for _, at := range b.rec.addedTables {
b.s.logf("table@build revert @%d", at.num)
if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
return err
}
}
return nil
}
func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
defer c.release()
rec := &sessionRecord{}
rec.addCompPtr(c.sourceLevel, c.imax)
if !noTrivial && c.trivial() {
t := c.levels[0][0]
db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
rec.delTable(c.sourceLevel, t.fd.Num)
rec.addTableFile(c.sourceLevel+1, t)
db.compactionCommit("table-move", rec)
return
}
var stats [2]cStatStaging
for i, tables := range c.levels {
for _, t := range tables {
stats[i].read += t.size
// Insert deleted tables into record
rec.delTable(c.sourceLevel+i, t.fd.Num)
}
}
sourceSize := int(stats[0].read + stats[1].read)
minSeq := db.minSeq()
db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
b := &tableCompactionBuilder{
db: db,
s: db.s,
c: c,
rec: rec,
stat1: &stats[1],
minSeq: minSeq,
strict: db.s.o.GetStrict(opt.StrictCompaction),
tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
}
db.compactionTransact("table@build", b)
// Commit.
stats[1].startTimer()
db.compactionCommit("table", rec)
stats[1].stopTimer()
resultSize := int(stats[1].write)
db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
// Save compaction stats
for i := range stats {
db.compStats.addStat(c.sourceLevel+1, &stats[i])
}
}
func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
db.logf("table@compaction range L%d %q:%q", level, umin, umax)
if level >= 0 {
if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
db.tableCompaction(c, true)
}
} else {
// Retry until nothing to compact.
for {
compacted := false
// Scan for maximum level with overlapped tables.
v := db.s.version()
m := 1
for i := m; i < len(v.levels); i++ {
tables := v.levels[i]
if tables.overlaps(db.s.icmp, umin, umax, false) {
m = i
}
}
v.release()
for level := 0; level < m; level++ {
if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
db.tableCompaction(c, true)
compacted = true
}
}
if !compacted {
break
}
}
}
return nil
}
func (db *DB) tableAutoCompaction() {
if c := db.s.pickCompaction(); c != nil {
db.tableCompaction(c, false)
}
}
func (db *DB) tableNeedCompaction() bool {
v := db.s.version()
defer v.release()
return v.needCompaction()
}
func (db *DB) pauseCompaction(ch chan<- struct{}) {
select {
case ch <- struct{}{}:
case <-db.closeC:
db.compactionExitTransact()
}
}
type cCmd interface {
ack(err error)
}
type cAuto struct {
ackC chan<- error
}
func (r cAuto) ack(err error) {
if r.ackC != nil {
defer func() {
recover()
}()
r.ackC <- err
}
}
type cRange struct {
level int
min, max []byte
ackC chan<- error
}
func (r cRange) ack(err error) {
if r.ackC != nil {
defer func() {
recover()
}()
r.ackC <- err
}
}
// This will trigger auto compaction but will not wait for it.
func (db *DB) compTrigger(compC chan<- cCmd) {
select {
case compC <- cAuto{}:
default:
}
}
// This will trigger auto compaction and/or wait for all compaction to be done.
func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cAuto{ch}:
case err = <-db.compErrC:
return
case <-db.closeC:
return ErrClosed
}
// Wait cmd.
select {
case err = <-ch:
case err = <-db.compErrC:
case <-db.closeC:
return ErrClosed
}
return err
}
// Send range compaction request.
func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cRange{level, min, max, ch}:
case err := <-db.compErrC:
return err
case <-db.closeC:
return ErrClosed
}
// Wait cmd.
select {
case err = <-ch:
case err = <-db.compErrC:
case <-db.closeC:
return ErrClosed
}
return err
}
func (db *DB) mCompaction() {
var x cCmd
defer func() {
if x := recover(); x != nil {
if x != errCompactionTransactExiting {
panic(x)
}
}
if x != nil {
x.ack(ErrClosed)
}
db.closeW.Done()
}()
for {
select {
case x = <-db.mcompCmdC:
switch x.(type) {
case cAuto:
db.memCompaction()
x.ack(nil)
x = nil
default:
panic("leveldb: unknown command")
}
case <-db.closeC:
return
}
}
}
func (db *DB) tCompaction() {
var x cCmd
var ackQ []cCmd
defer func() {
if x := recover(); x != nil {
if x != errCompactionTransactExiting {
panic(x)
}
}
for i := range ackQ {
ackQ[i].ack(ErrClosed)
ackQ[i] = nil
}
if x != nil {
x.ack(ErrClosed)
}
db.closeW.Done()
}()
for {
if db.tableNeedCompaction() {
select {
case x = <-db.tcompCmdC:
case ch := <-db.tcompPauseC:
db.pauseCompaction(ch)
continue
case <-db.closeC:
return
default:
}
} else {
for i := range ackQ {
ackQ[i].ack(nil)
ackQ[i] = nil
}
ackQ = ackQ[:0]
select {
case x = <-db.tcompCmdC:
case ch := <-db.tcompPauseC:
db.pauseCompaction(ch)
continue
case <-db.closeC:
return
}
}
if x != nil {
switch cmd := x.(type) {
case cAuto:
ackQ = append(ackQ, x)
case cRange:
x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
default:
panic("leveldb: unknown command")
}
x = nil
}
db.tableAutoCompaction()
}
}

View file

@ -1,360 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"errors"
"math/rand"
"runtime"
"sync"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
var (
errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
)
type memdbReleaser struct {
once sync.Once
m *memDB
}
func (mr *memdbReleaser) Release() {
mr.once.Do(func() {
mr.m.decref()
})
}
func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
em, fm := db.getMems()
v := db.s.version()
tableIts := v.getIterators(slice, ro)
n := len(tableIts) + len(auxt) + 3
its := make([]iterator.Iterator, 0, n)
if auxm != nil {
ami := auxm.NewIterator(slice)
ami.SetReleaser(&memdbReleaser{m: auxm})
its = append(its, ami)
}
for _, t := range auxt {
its = append(its, v.s.tops.newIterator(t, slice, ro))
}
emi := em.NewIterator(slice)
emi.SetReleaser(&memdbReleaser{m: em})
its = append(its, emi)
if fm != nil {
fmi := fm.NewIterator(slice)
fmi.SetReleaser(&memdbReleaser{m: fm})
its = append(its, fmi)
}
its = append(its, tableIts...)
mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
mi.SetReleaser(&versionReleaser{v: v})
return mi
}
func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
var islice *util.Range
if slice != nil {
islice = &util.Range{}
if slice.Start != nil {
islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
}
if slice.Limit != nil {
islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
}
}
rawIter := db.newRawIterator(auxm, auxt, islice, ro)
iter := &dbIter{
db: db,
icmp: db.s.icmp,
iter: rawIter,
seq: seq,
strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
key: make([]byte, 0),
value: make([]byte, 0),
}
atomic.AddInt32(&db.aliveIters, 1)
runtime.SetFinalizer(iter, (*dbIter).Release)
return iter
}
func (db *DB) iterSamplingRate() int {
return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
}
type dir int
const (
dirReleased dir = iota - 1
dirSOI
dirEOI
dirBackward
dirForward
)
// dbIter represent an interator states over a database session.
type dbIter struct {
db *DB
icmp *iComparer
iter iterator.Iterator
seq uint64
strict bool
smaplingGap int
dir dir
key []byte
value []byte
err error
releaser util.Releaser
}
func (i *dbIter) sampleSeek() {
ikey := i.iter.Key()
i.smaplingGap -= len(ikey) + len(i.iter.Value())
for i.smaplingGap < 0 {
i.smaplingGap += i.db.iterSamplingRate()
i.db.sampleSeek(ikey)
}
}
func (i *dbIter) setErr(err error) {
i.err = err
i.key = nil
i.value = nil
}
func (i *dbIter) iterErr() {
if err := i.iter.Error(); err != nil {
i.setErr(err)
}
}
func (i *dbIter) Valid() bool {
return i.err == nil && i.dir > dirEOI
}
func (i *dbIter) First() bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
if i.iter.First() {
i.dir = dirSOI
return i.next()
}
i.dir = dirEOI
i.iterErr()
return false
}
func (i *dbIter) Last() bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
if i.iter.Last() {
return i.prev()
}
i.dir = dirSOI
i.iterErr()
return false
}
func (i *dbIter) Seek(key []byte) bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
if i.iter.Seek(ikey) {
i.dir = dirSOI
return i.next()
}
i.dir = dirEOI
i.iterErr()
return false
}
func (i *dbIter) next() bool {
for {
if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if seq <= i.seq {
switch kt {
case keyTypeDel:
// Skip deleted key.
i.key = append(i.key[:0], ukey...)
i.dir = dirForward
case keyTypeVal:
if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
i.dir = dirForward
return true
}
}
}
} else if i.strict {
i.setErr(kerr)
break
}
if !i.iter.Next() {
i.dir = dirEOI
i.iterErr()
break
}
}
return false
}
func (i *dbIter) Next() bool {
if i.dir == dirEOI || i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) {
i.dir = dirEOI
i.iterErr()
return false
}
return i.next()
}
func (i *dbIter) prev() bool {
i.dir = dirBackward
del := true
if i.iter.Valid() {
for {
if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if seq <= i.seq {
if !del && i.icmp.uCompare(ukey, i.key) < 0 {
return true
}
del = (kt == keyTypeDel)
if !del {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
}
}
} else if i.strict {
i.setErr(kerr)
return false
}
if !i.iter.Prev() {
break
}
}
}
if del {
i.dir = dirSOI
i.iterErr()
return false
}
return true
}
func (i *dbIter) Prev() bool {
if i.dir == dirSOI || i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
switch i.dir {
case dirEOI:
return i.Last()
case dirForward:
for i.iter.Prev() {
if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if i.icmp.uCompare(ukey, i.key) < 0 {
goto cont
}
} else if i.strict {
i.setErr(kerr)
return false
}
}
i.dir = dirSOI
i.iterErr()
return false
}
cont:
return i.prev()
}
func (i *dbIter) Key() []byte {
if i.err != nil || i.dir <= dirEOI {
return nil
}
return i.key
}
func (i *dbIter) Value() []byte {
if i.err != nil || i.dir <= dirEOI {
return nil
}
return i.value
}
func (i *dbIter) Release() {
if i.dir != dirReleased {
// Clear the finalizer.
runtime.SetFinalizer(i, nil)
if i.releaser != nil {
i.releaser.Release()
i.releaser = nil
}
i.dir = dirReleased
i.key = nil
i.value = nil
i.iter.Release()
i.iter = nil
atomic.AddInt32(&i.db.aliveIters, -1)
i.db = nil
}
}
func (i *dbIter) SetReleaser(releaser util.Releaser) {
if i.dir == dirReleased {
panic(util.ErrReleased)
}
if i.releaser != nil && releaser != nil {
panic(util.ErrHasReleaser)
}
i.releaser = releaser
}
func (i *dbIter) Error() error {
return i.err
}

View file

@ -1,183 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"container/list"
"fmt"
"runtime"
"sync"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
type snapshotElement struct {
seq uint64
ref int
e *list.Element
}
// Acquires a snapshot, based on latest sequence.
func (db *DB) acquireSnapshot() *snapshotElement {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
seq := db.getSeq()
if e := db.snapsList.Back(); e != nil {
se := e.Value.(*snapshotElement)
if se.seq == seq {
se.ref++
return se
} else if seq < se.seq {
panic("leveldb: sequence number is not increasing")
}
}
se := &snapshotElement{seq: seq, ref: 1}
se.e = db.snapsList.PushBack(se)
return se
}
// Releases given snapshot element.
func (db *DB) releaseSnapshot(se *snapshotElement) {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
se.ref--
if se.ref == 0 {
db.snapsList.Remove(se.e)
se.e = nil
} else if se.ref < 0 {
panic("leveldb: Snapshot: negative element reference")
}
}
// Gets minimum sequence that not being snapshotted.
func (db *DB) minSeq() uint64 {
db.snapsMu.Lock()
defer db.snapsMu.Unlock()
if e := db.snapsList.Front(); e != nil {
return e.Value.(*snapshotElement).seq
}
return db.getSeq()
}
// Snapshot is a DB snapshot.
type Snapshot struct {
db *DB
elem *snapshotElement
mu sync.RWMutex
released bool
}
// Creates new snapshot object.
func (db *DB) newSnapshot() *Snapshot {
snap := &Snapshot{
db: db,
elem: db.acquireSnapshot(),
}
atomic.AddInt32(&db.aliveSnaps, 1)
runtime.SetFinalizer(snap, (*Snapshot).Release)
return snap
}
func (snap *Snapshot) String() string {
return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
}
// Get gets the value for the given key. It returns ErrNotFound if
// the DB does not contains the key.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Get returns.
func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
err = snap.db.ok()
if err != nil {
return
}
snap.mu.RLock()
defer snap.mu.RUnlock()
if snap.released {
err = ErrSnapshotReleased
return
}
return snap.db.get(nil, nil, key, snap.elem.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Get returns.
func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
err = snap.db.ok()
if err != nil {
return
}
snap.mu.RLock()
defer snap.mu.RUnlock()
if snap.released {
err = ErrSnapshotReleased
return
}
return snap.db.has(nil, nil, key, snap.elem.seq, ro)
}
// NewIterator returns an iterator for the snapshot of the underlying DB.
// The returned iterator is not safe for concurrent use, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently with modifying its
// underlying DB. The resultant key/value pairs are guaranteed to be
// consistent.
//
// Slice allows slicing the iterator to only contains keys in the given
// range. A nil Range.Start is treated as a key before all keys in the
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The iterator must be released after use, by calling Release method.
// Releasing the snapshot doesn't mean releasing the iterator too, the
// iterator would be still valid until released.
//
// Also read Iterator documentation of the leveldb/iterator package.
func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
if err := snap.db.ok(); err != nil {
return iterator.NewEmptyIterator(err)
}
snap.mu.Lock()
defer snap.mu.Unlock()
if snap.released {
return iterator.NewEmptyIterator(ErrSnapshotReleased)
}
// Since iterator already hold version ref, it doesn't need to
// hold snapshot ref.
return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
}
// Release releases the snapshot. This will not release any returned
// iterators, the iterators would still be valid until released or the
// underlying DB is closed.
//
// Other methods should not be called after the snapshot has been released.
func (snap *Snapshot) Release() {
snap.mu.Lock()
defer snap.mu.Unlock()
if !snap.released {
// Clear the finalizer.
runtime.SetFinalizer(snap, nil)
snap.released = true
snap.db.releaseSnapshot(snap.elem)
atomic.AddInt32(&snap.db.aliveSnaps, -1)
snap.db = nil
snap.elem = nil
}
}

View file

@ -1,234 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"sync/atomic"
"time"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
type memDB struct {
db *DB
*memdb.DB
ref int32
}
func (m *memDB) getref() int32 {
return atomic.LoadInt32(&m.ref)
}
func (m *memDB) incref() {
atomic.AddInt32(&m.ref, 1)
}
func (m *memDB) decref() {
if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
// Only put back memdb with std capacity.
if m.Capacity() == m.db.s.o.GetWriteBuffer() {
m.Reset()
m.db.mpoolPut(m.DB)
}
m.db = nil
m.DB = nil
} else if ref < 0 {
panic("negative memdb ref")
}
}
// Get latest sequence number.
func (db *DB) getSeq() uint64 {
return atomic.LoadUint64(&db.seq)
}
// Atomically adds delta to seq.
func (db *DB) addSeq(delta uint64) {
atomic.AddUint64(&db.seq, delta)
}
func (db *DB) setSeq(seq uint64) {
atomic.StoreUint64(&db.seq, seq)
}
func (db *DB) sampleSeek(ikey internalKey) {
v := db.s.version()
if v.sampleSeek(ikey) {
// Trigger table compaction.
db.compTrigger(db.tcompCmdC)
}
v.release()
}
func (db *DB) mpoolPut(mem *memdb.DB) {
if !db.isClosed() {
select {
case db.memPool <- mem:
default:
}
}
}
func (db *DB) mpoolGet(n int) *memDB {
var mdb *memdb.DB
select {
case mdb = <-db.memPool:
default:
}
if mdb == nil || mdb.Capacity() < n {
mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
}
return &memDB{
db: db,
DB: mdb,
}
}
func (db *DB) mpoolDrain() {
ticker := time.NewTicker(30 * time.Second)
for {
select {
case <-ticker.C:
select {
case <-db.memPool:
default:
}
case <-db.closeC:
ticker.Stop()
// Make sure the pool is drained.
select {
case <-db.memPool:
case <-time.After(time.Second):
}
close(db.memPool)
return
}
}
}
// Create new memdb and froze the old one; need external synchronization.
// newMem only called synchronously by the writer.
func (db *DB) newMem(n int) (mem *memDB, err error) {
fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
w, err := db.s.stor.Create(fd)
if err != nil {
db.s.reuseFileNum(fd.Num)
return
}
db.memMu.Lock()
defer db.memMu.Unlock()
if db.frozenMem != nil {
panic("still has frozen mem")
}
if db.journal == nil {
db.journal = journal.NewWriter(w)
} else {
db.journal.Reset(w)
db.journalWriter.Close()
db.frozenJournalFd = db.journalFd
}
db.journalWriter = w
db.journalFd = fd
db.frozenMem = db.mem
mem = db.mpoolGet(n)
mem.incref() // for self
mem.incref() // for caller
db.mem = mem
// The seq only incremented by the writer. And whoever called newMem
// should hold write lock, so no need additional synchronization here.
db.frozenSeq = db.seq
return
}
// Get all memdbs.
func (db *DB) getMems() (e, f *memDB) {
db.memMu.RLock()
defer db.memMu.RUnlock()
if db.mem != nil {
db.mem.incref()
} else if !db.isClosed() {
panic("nil effective mem")
}
if db.frozenMem != nil {
db.frozenMem.incref()
}
return db.mem, db.frozenMem
}
// Get effective memdb.
func (db *DB) getEffectiveMem() *memDB {
db.memMu.RLock()
defer db.memMu.RUnlock()
if db.mem != nil {
db.mem.incref()
} else if !db.isClosed() {
panic("nil effective mem")
}
return db.mem
}
// Check whether we has frozen memdb.
func (db *DB) hasFrozenMem() bool {
db.memMu.RLock()
defer db.memMu.RUnlock()
return db.frozenMem != nil
}
// Get frozen memdb.
func (db *DB) getFrozenMem() *memDB {
db.memMu.RLock()
defer db.memMu.RUnlock()
if db.frozenMem != nil {
db.frozenMem.incref()
}
return db.frozenMem
}
// Drop frozen memdb; assume that frozen memdb isn't nil.
func (db *DB) dropFrozenMem() {
db.memMu.Lock()
if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
} else {
db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
}
db.frozenJournalFd = storage.FileDesc{}
db.frozenMem.decref()
db.frozenMem = nil
db.memMu.Unlock()
}
// Clear mems ptr; used by DB.Close().
func (db *DB) clearMems() {
db.memMu.Lock()
db.mem = nil
db.frozenMem = nil
db.memMu.Unlock()
}
// Set closed flag; return true if not already closed.
func (db *DB) setClosed() bool {
return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
}
// Check whether DB was closed.
func (db *DB) isClosed() bool {
return atomic.LoadUint32(&db.closed) != 0
}
// Check read ok status.
func (db *DB) ok() error {
if db.isClosed() {
return ErrClosed
}
return nil
}

View file

@ -1,325 +0,0 @@
// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"errors"
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
var errTransactionDone = errors.New("leveldb: transaction already closed")
// Transaction is the transaction handle.
type Transaction struct {
db *DB
lk sync.RWMutex
seq uint64
mem *memDB
tables tFiles
ikScratch []byte
rec sessionRecord
stats cStatStaging
closed bool
}
// Get gets the value for the given key. It returns ErrNotFound if the
// DB does not contains the key.
//
// The returned slice is its own copy, it is safe to modify the contents
// of the returned slice.
// It is safe to modify the contents of the argument after Get returns.
func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return nil, errTransactionDone
}
return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Has returns.
func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return false, errTransactionDone
}
return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
}
// NewIterator returns an iterator for the latest snapshot of the transaction.
// The returned iterator is not safe for concurrent use, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently while writes to the
// transaction. The resultant key/value pairs are guaranteed to be consistent.
//
// Slice allows slicing the iterator to only contains keys in the given
// range. A nil Range.Start is treated as a key before all keys in the
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.
func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return iterator.NewEmptyIterator(errTransactionDone)
}
tr.mem.incref()
return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
}
func (tr *Transaction) flush() error {
// Flush memdb.
if tr.mem.Len() != 0 {
tr.stats.startTimer()
iter := tr.mem.NewIterator(nil)
t, n, err := tr.db.s.tops.createFrom(iter)
iter.Release()
tr.stats.stopTimer()
if err != nil {
return err
}
if tr.mem.getref() == 1 {
tr.mem.Reset()
} else {
tr.mem.decref()
tr.mem = tr.db.mpoolGet(0)
tr.mem.incref()
}
tr.tables = append(tr.tables, t)
tr.rec.addTableFile(0, t)
tr.stats.write += t.size
tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
}
return nil
}
func (tr *Transaction) put(kt keyType, key, value []byte) error {
tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
if tr.mem.Free() < len(tr.ikScratch)+len(value) {
if err := tr.flush(); err != nil {
return err
}
}
if err := tr.mem.Put(tr.ikScratch, value); err != nil {
return err
}
tr.seq++
return nil
}
// Put sets the value for the given key. It overwrites any previous value
// for that key; a DB is not a multi-map.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Put returns.
func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return tr.put(keyTypeVal, key, value)
}
// Delete deletes the value for the given key.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Delete returns.
func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return tr.put(keyTypeDel, key, nil)
}
// Write apply the given batch to the transaction. The batch will be applied
// sequentially.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Write returns.
func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
if b == nil || b.Len() == 0 {
return nil
}
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return b.replayInternal(func(i int, kt keyType, k, v []byte) error {
return tr.put(kt, k, v)
})
}
func (tr *Transaction) setDone() {
tr.closed = true
tr.db.tr = nil
tr.mem.decref()
<-tr.db.writeLockC
}
// Commit commits the transaction. If error is not nil, then the transaction is
// not committed, it can then either be retried or discarded.
//
// Other methods should not be called after transaction has been committed.
func (tr *Transaction) Commit() error {
if err := tr.db.ok(); err != nil {
return err
}
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
if err := tr.flush(); err != nil {
// Return error, lets user decide either to retry or discard
// transaction.
return err
}
if len(tr.tables) != 0 {
// Committing transaction.
tr.rec.setSeqNum(tr.seq)
tr.db.compCommitLk.Lock()
tr.stats.startTimer()
var cerr error
for retry := 0; retry < 3; retry++ {
cerr = tr.db.s.commit(&tr.rec)
if cerr != nil {
tr.db.logf("transaction@commit error R·%d %q", retry, cerr)
select {
case <-time.After(time.Second):
case <-tr.db.closeC:
tr.db.logf("transaction@commit exiting")
tr.db.compCommitLk.Unlock()
return cerr
}
} else {
// Success. Set db.seq.
tr.db.setSeq(tr.seq)
break
}
}
tr.stats.stopTimer()
if cerr != nil {
// Return error, lets user decide either to retry or discard
// transaction.
return cerr
}
// Update compaction stats. This is safe as long as we hold compCommitLk.
tr.db.compStats.addStat(0, &tr.stats)
// Trigger table auto-compaction.
tr.db.compTrigger(tr.db.tcompCmdC)
tr.db.compCommitLk.Unlock()
// Additionally, wait compaction when certain threshold reached.
// Ignore error, returns error only if transaction can't be committed.
tr.db.waitCompaction()
}
// Only mark as done if transaction committed successfully.
tr.setDone()
return nil
}
func (tr *Transaction) discard() {
// Discard transaction.
for _, t := range tr.tables {
tr.db.logf("transaction@discard @%d", t.fd.Num)
if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
tr.db.s.reuseFileNum(t.fd.Num)
}
}
}
// Discard discards the transaction.
//
// Other methods should not be called after transaction has been discarded.
func (tr *Transaction) Discard() {
tr.lk.Lock()
if !tr.closed {
tr.discard()
tr.setDone()
}
tr.lk.Unlock()
}
func (db *DB) waitCompaction() error {
if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() {
return db.compTriggerWait(db.tcompCmdC)
}
return nil
}
// OpenTransaction opens an atomic DB transaction. Only one transaction can be
// opened at a time. Subsequent call to Write and OpenTransaction will be blocked
// until in-flight transaction is committed or discarded.
// The returned transaction handle is safe for concurrent use.
//
// Transaction is expensive and can overwhelm compaction, especially if
// transaction size is small. Use with caution.
//
// The transaction must be closed once done, either by committing or discarding
// the transaction.
// Closing the DB will discard open transaction.
func (db *DB) OpenTransaction() (*Transaction, error) {
if err := db.ok(); err != nil {
return nil, err
}
// The write happen synchronously.
select {
case db.writeLockC <- struct{}{}:
case err := <-db.compPerErrC:
return nil, err
case <-db.closeC:
return nil, ErrClosed
}
if db.tr != nil {
panic("leveldb: has open transaction")
}
// Flush current memdb.
if db.mem != nil && db.mem.Len() != 0 {
if _, err := db.rotateMem(0, true); err != nil {
return nil, err
}
}
// Wait compaction when certain threshold reached.
if err := db.waitCompaction(); err != nil {
return nil, err
}
tr := &Transaction{
db: db,
seq: db.seq,
mem: db.mpoolGet(0),
}
tr.mem.incref()
db.tr = tr
return tr, nil
}

View file

@ -1,102 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
// Reader is the interface that wraps basic Get and NewIterator methods.
// This interface implemented by both DB and Snapshot.
type Reader interface {
Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
}
// Sizes is list of size.
type Sizes []int64
// Sum returns sum of the sizes.
func (sizes Sizes) Sum() int64 {
var sum int64
for _, size := range sizes {
sum += size
}
return sum
}
// Logging.
func (db *DB) log(v ...interface{}) { db.s.log(v...) }
func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
// Check and clean files.
func (db *DB) checkAndCleanFiles() error {
v := db.s.version()
defer v.release()
tmap := make(map[int64]bool)
for _, tables := range v.levels {
for _, t := range tables {
tmap[t.fd.Num] = false
}
}
fds, err := db.s.stor.List(storage.TypeAll)
if err != nil {
return err
}
var nt int
var rem []storage.FileDesc
for _, fd := range fds {
keep := true
switch fd.Type {
case storage.TypeManifest:
keep = fd.Num >= db.s.manifestFd.Num
case storage.TypeJournal:
if !db.frozenJournalFd.Zero() {
keep = fd.Num >= db.frozenJournalFd.Num
} else {
keep = fd.Num >= db.journalFd.Num
}
case storage.TypeTable:
_, keep = tmap[fd.Num]
if keep {
tmap[fd.Num] = true
nt++
}
}
if !keep {
rem = append(rem, fd)
}
}
if nt != len(tmap) {
var mfds []storage.FileDesc
for num, present := range tmap {
if !present {
mfds = append(mfds, storage.FileDesc{storage.TypeTable, num})
db.logf("db@janitor table missing @%d", num)
}
}
return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
}
db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
for _, fd := range rem {
db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
if err := db.s.stor.Remove(fd); err != nil {
return err
}
}
return nil
}

View file

@ -1,443 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"time"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error {
wr, err := db.journal.Next()
if err != nil {
return err
}
if err := writeBatchesWithHeader(wr, batches, seq); err != nil {
return err
}
if err := db.journal.Flush(); err != nil {
return err
}
if sync {
return db.journalWriter.Sync()
}
return nil
}
func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
// Wait for pending memdb compaction.
err = db.compTriggerWait(db.mcompCmdC)
if err != nil {
return
}
// Create new memdb and journal.
mem, err = db.newMem(n)
if err != nil {
return
}
// Schedule memdb compaction.
if wait {
err = db.compTriggerWait(db.mcompCmdC)
} else {
db.compTrigger(db.mcompCmdC)
}
return
}
func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
delayed := false
slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger()
pauseTrigger := db.s.o.GetWriteL0PauseTrigger()
flush := func() (retry bool) {
mdb = db.getEffectiveMem()
if mdb == nil {
err = ErrClosed
return false
}
defer func() {
if retry {
mdb.decref()
mdb = nil
}
}()
tLen := db.s.tLen(0)
mdbFree = mdb.Free()
switch {
case tLen >= slowdownTrigger && !delayed:
delayed = true
time.Sleep(time.Millisecond)
case mdbFree >= n:
return false
case tLen >= pauseTrigger:
delayed = true
err = db.compTriggerWait(db.tcompCmdC)
if err != nil {
return false
}
default:
// Allow memdb to grow if it has no entry.
if mdb.Len() == 0 {
mdbFree = n
} else {
mdb.decref()
mdb, err = db.rotateMem(n, false)
if err == nil {
mdbFree = mdb.Free()
} else {
mdbFree = 0
}
}
return false
}
return true
}
start := time.Now()
for flush() {
}
if delayed {
db.writeDelay += time.Since(start)
db.writeDelayN++
} else if db.writeDelayN > 0 {
db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
db.writeDelay = 0
db.writeDelayN = 0
}
return
}
type writeMerge struct {
sync bool
batch *Batch
keyType keyType
key, value []byte
}
func (db *DB) unlockWrite(overflow bool, merged int, err error) {
for i := 0; i < merged; i++ {
db.writeAckC <- err
}
if overflow {
// Pass lock to the next write (that failed to merge).
db.writeMergedC <- false
} else {
// Release lock.
<-db.writeLockC
}
}
// ourBatch if defined should equal with batch.
func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
// Try to flush memdb. This method would also trying to throttle writes
// if it is too fast and compaction cannot catch-up.
mdb, mdbFree, err := db.flush(batch.internalLen)
if err != nil {
db.unlockWrite(false, 0, err)
return err
}
defer mdb.decref()
var (
overflow bool
merged int
batches = []*Batch{batch}
)
if merge {
// Merge limit.
var mergeLimit int
if batch.internalLen > 128<<10 {
mergeLimit = (1 << 20) - batch.internalLen
} else {
mergeLimit = 128 << 10
}
mergeCap := mdbFree - batch.internalLen
if mergeLimit > mergeCap {
mergeLimit = mergeCap
}
merge:
for mergeLimit > 0 {
select {
case incoming := <-db.writeMergeC:
if incoming.batch != nil {
// Merge batch.
if incoming.batch.internalLen > mergeLimit {
overflow = true
break merge
}
batches = append(batches, incoming.batch)
mergeLimit -= incoming.batch.internalLen
} else {
// Merge put.
internalLen := len(incoming.key) + len(incoming.value) + 8
if internalLen > mergeLimit {
overflow = true
break merge
}
if ourBatch == nil {
ourBatch = db.batchPool.Get().(*Batch)
ourBatch.Reset()
batches = append(batches, ourBatch)
}
// We can use same batch since concurrent write doesn't
// guarantee write order.
ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value)
mergeLimit -= internalLen
}
sync = sync || incoming.sync
merged++
db.writeMergedC <- true
default:
break merge
}
}
}
// Seq number.
seq := db.seq + 1
// Write journal.
if err := db.writeJournal(batches, seq, sync); err != nil {
db.unlockWrite(overflow, merged, err)
return err
}
// Put batches.
for _, batch := range batches {
if err := batch.putMem(seq, mdb.DB); err != nil {
panic(err)
}
seq += uint64(batch.Len())
}
// Incr seq number.
db.addSeq(uint64(batchesLen(batches)))
// Rotate memdb if it's reach the threshold.
if batch.internalLen >= mdbFree {
db.rotateMem(0, false)
}
db.unlockWrite(overflow, merged, nil)
return nil
}
// Write apply the given batch to the DB. The batch records will be applied
// sequentially. Write might be used concurrently, when used concurrently and
// batch is small enough, write will try to merge the batches. Set NoWriteMerge
// option to true to disable write merge.
//
// It is safe to modify the contents of the arguments after Write returns but
// not before. Write will not modify content of the batch.
func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error {
if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 {
return err
}
// If the batch size is larger than write buffer, it may justified to write
// using transaction instead. Using transaction the batch will be written
// into tables directly, skipping the journaling.
if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
tr, err := db.OpenTransaction()
if err != nil {
return err
}
if err := tr.Write(batch, wo); err != nil {
tr.Discard()
return err
}
return tr.Commit()
}
merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
sync := wo.GetSync() && !db.s.o.GetNoSync()
// Acquire write lock.
if merge {
select {
case db.writeMergeC <- writeMerge{sync: sync, batch: batch}:
if <-db.writeMergedC {
// Write is merged.
return <-db.writeAckC
}
// Write is not merged, the write lock is handed to us. Continue.
case db.writeLockC <- struct{}{}:
// Write lock acquired.
case err := <-db.compPerErrC:
// Compaction error.
return err
case <-db.closeC:
// Closed
return ErrClosed
}
} else {
select {
case db.writeLockC <- struct{}{}:
// Write lock acquired.
case err := <-db.compPerErrC:
// Compaction error.
return err
case <-db.closeC:
// Closed
return ErrClosed
}
}
return db.writeLocked(batch, nil, merge, sync)
}
func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error {
if err := db.ok(); err != nil {
return err
}
merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
sync := wo.GetSync() && !db.s.o.GetNoSync()
// Acquire write lock.
if merge {
select {
case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}:
if <-db.writeMergedC {
// Write is merged.
return <-db.writeAckC
}
// Write is not merged, the write lock is handed to us. Continue.
case db.writeLockC <- struct{}{}:
// Write lock acquired.
case err := <-db.compPerErrC:
// Compaction error.
return err
case <-db.closeC:
// Closed
return ErrClosed
}
} else {
select {
case db.writeLockC <- struct{}{}:
// Write lock acquired.
case err := <-db.compPerErrC:
// Compaction error.
return err
case <-db.closeC:
// Closed
return ErrClosed
}
}
batch := db.batchPool.Get().(*Batch)
batch.Reset()
batch.appendRec(kt, key, value)
return db.writeLocked(batch, batch, merge, sync)
}
// Put sets the value for the given key. It overwrites any previous value
// for that key; a DB is not a multi-map. Write merge also applies for Put, see
// Write.
//
// It is safe to modify the contents of the arguments after Put returns but not
// before.
func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
return db.putRec(keyTypeVal, key, value, wo)
}
// Delete deletes the value for the given key. Delete will not returns error if
// key doesn't exist. Write merge also applies for Delete, see Write.
//
// It is safe to modify the contents of the arguments after Delete returns but
// not before.
func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
return db.putRec(keyTypeDel, key, nil, wo)
}
func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
iter := mem.NewIterator(nil)
defer iter.Release()
return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
(min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
}
// CompactRange compacts the underlying DB for the given key range.
// In particular, deleted and overwritten versions are discarded,
// and the data is rearranged to reduce the cost of operations
// needed to access the data. This operation should typically only
// be invoked by users who understand the underlying implementation.
//
// A nil Range.Start is treated as a key before all keys in the DB.
// And a nil Range.Limit is treated as a key after all keys in the DB.
// Therefore if both is nil then it will compact entire DB.
func (db *DB) CompactRange(r util.Range) error {
if err := db.ok(); err != nil {
return err
}
// Lock writer.
select {
case db.writeLockC <- struct{}{}:
case err := <-db.compPerErrC:
return err
case <-db.closeC:
return ErrClosed
}
// Check for overlaps in memdb.
mdb := db.getEffectiveMem()
if mdb == nil {
return ErrClosed
}
defer mdb.decref()
if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
// Memdb compaction.
if _, err := db.rotateMem(0, false); err != nil {
<-db.writeLockC
return err
}
<-db.writeLockC
if err := db.compTriggerWait(db.mcompCmdC); err != nil {
return err
}
} else {
<-db.writeLockC
}
// Table compaction.
return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
}
// SetReadOnly makes DB read-only. It will stay read-only until reopened.
func (db *DB) SetReadOnly() error {
if err := db.ok(); err != nil {
return err
}
// Lock writer.
select {
case db.writeLockC <- struct{}{}:
db.compWriteLocking = true
case err := <-db.compPerErrC:
return err
case <-db.closeC:
return ErrClosed
}
// Set compaction read-only.
select {
case db.compErrSetC <- ErrReadOnly:
case perr := <-db.compPerErrC:
return perr
case <-db.closeC:
return ErrClosed
}
return nil
}

View file

@ -1,90 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package leveldb provides implementation of LevelDB key/value database.
//
// Create or open a database:
//
// db, err := leveldb.OpenFile("path/to/db", nil)
// ...
// defer db.Close()
// ...
//
// Read or modify the database content:
//
// // Remember that the contents of the returned slice should not be modified.
// data, err := db.Get([]byte("key"), nil)
// ...
// err = db.Put([]byte("key"), []byte("value"), nil)
// ...
// err = db.Delete([]byte("key"), nil)
// ...
//
// Iterate over database content:
//
// iter := db.NewIterator(nil, nil)
// for iter.Next() {
// // Remember that the contents of the returned slice should not be modified, and
// // only valid until the next call to Next.
// key := iter.Key()
// value := iter.Value()
// ...
// }
// iter.Release()
// err = iter.Error()
// ...
//
// Iterate over subset of database content with a particular prefix:
// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
// for iter.Next() {
// // Use key/value.
// ...
// }
// iter.Release()
// err = iter.Error()
// ...
//
// Seek-then-Iterate:
//
// iter := db.NewIterator(nil, nil)
// for ok := iter.Seek(key); ok; ok = iter.Next() {
// // Use key/value.
// ...
// }
// iter.Release()
// err = iter.Error()
// ...
//
// Iterate over subset of database content:
//
// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
// for iter.Next() {
// // Use key/value.
// ...
// }
// iter.Release()
// err = iter.Error()
// ...
//
// Batch writes:
//
// batch := new(leveldb.Batch)
// batch.Put([]byte("foo"), []byte("value"))
// batch.Put([]byte("bar"), []byte("another value"))
// batch.Delete([]byte("baz"))
// err = db.Write(batch, nil)
// ...
//
// Use bloom filter:
//
// o := &opt.Options{
// Filter: filter.NewBloomFilter(10),
// }
// db, err := leveldb.OpenFile("path/to/db", o)
// ...
// defer db.Close()
// ...
package leveldb

View file

@ -1,20 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/errors"
)
// Common errors.
var (
ErrNotFound = errors.ErrNotFound
ErrReadOnly = errors.New("leveldb: read-only mode")
ErrSnapshotReleased = errors.New("leveldb: snapshot released")
ErrIterReleased = errors.New("leveldb: iterator released")
ErrClosed = errors.New("leveldb: closed")
)

View file

@ -1,78 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package errors provides common error types used throughout leveldb.
package errors
import (
"errors"
"fmt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
// Common errors.
var (
ErrNotFound = New("leveldb: not found")
ErrReleased = util.ErrReleased
ErrHasReleaser = util.ErrHasReleaser
)
// New returns an error that formats as the given text.
func New(text string) error {
return errors.New(text)
}
// ErrCorrupted is the type that wraps errors that indicate corruption in
// the database.
type ErrCorrupted struct {
Fd storage.FileDesc
Err error
}
func (e *ErrCorrupted) Error() string {
if !e.Fd.Zero() {
return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
}
return e.Err.Error()
}
// NewErrCorrupted creates new ErrCorrupted error.
func NewErrCorrupted(fd storage.FileDesc, err error) error {
return &ErrCorrupted{fd, err}
}
// IsCorrupted returns a boolean indicating whether the error is indicating
// a corruption.
func IsCorrupted(err error) bool {
switch err.(type) {
case *ErrCorrupted:
return true
case *storage.ErrCorrupted:
return true
}
return false
}
// ErrMissingFiles is the type that indicating a corruption due to missing
// files. ErrMissingFiles always wrapped with ErrCorrupted.
type ErrMissingFiles struct {
Fds []storage.FileDesc
}
func (e *ErrMissingFiles) Error() string { return "file missing" }
// SetFd sets 'file info' of the given error with the given file.
// Currently only ErrCorrupted is supported, otherwise will do nothing.
func SetFd(err error, fd storage.FileDesc) error {
switch x := err.(type) {
case *ErrCorrupted:
x.Fd = fd
return x
}
return err
}

View file

@ -1,31 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/filter"
)
type iFilter struct {
filter.Filter
}
func (f iFilter) Contains(filter, key []byte) bool {
return f.Filter.Contains(filter, internalKey(key).ukey())
}
func (f iFilter) NewGenerator() filter.FilterGenerator {
return iFilterGenerator{f.Filter.NewGenerator()}
}
type iFilterGenerator struct {
filter.FilterGenerator
}
func (g iFilterGenerator) Add(key []byte) {
g.FilterGenerator.Add(internalKey(key).ukey())
}

View file

@ -1,116 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package filter
import (
"github.com/syndtr/goleveldb/leveldb/util"
)
func bloomHash(key []byte) uint32 {
return util.Hash(key, 0xbc9f1d34)
}
type bloomFilter int
// The bloom filter serializes its parameters and is backward compatible
// with respect to them. Therefor, its parameters are not added to its
// name.
func (bloomFilter) Name() string {
return "leveldb.BuiltinBloomFilter"
}
func (f bloomFilter) Contains(filter, key []byte) bool {
nBytes := len(filter) - 1
if nBytes < 1 {
return false
}
nBits := uint32(nBytes * 8)
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
k := filter[nBytes]
if k > 30 {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true
}
kh := bloomHash(key)
delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
for j := uint8(0); j < k; j++ {
bitpos := kh % nBits
if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
return false
}
kh += delta
}
return true
}
func (f bloomFilter) NewGenerator() FilterGenerator {
// Round down to reduce probing cost a little bit.
k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
if k < 1 {
k = 1
} else if k > 30 {
k = 30
}
return &bloomFilterGenerator{
n: int(f),
k: k,
}
}
type bloomFilterGenerator struct {
n int
k uint8
keyHashes []uint32
}
func (g *bloomFilterGenerator) Add(key []byte) {
// Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006].
g.keyHashes = append(g.keyHashes, bloomHash(key))
}
func (g *bloomFilterGenerator) Generate(b Buffer) {
// Compute bloom filter size (in both bits and bytes)
nBits := uint32(len(g.keyHashes) * g.n)
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if nBits < 64 {
nBits = 64
}
nBytes := (nBits + 7) / 8
nBits = nBytes * 8
dest := b.Alloc(int(nBytes) + 1)
dest[nBytes] = g.k
for _, kh := range g.keyHashes {
delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
for j := uint8(0); j < g.k; j++ {
bitpos := kh % nBits
dest[bitpos/8] |= (1 << (bitpos % 8))
kh += delta
}
}
g.keyHashes = g.keyHashes[:0]
}
// NewBloomFilter creates a new initialized bloom filter for given
// bitsPerKey.
//
// Since bitsPerKey is persisted individually for each bloom filter
// serialization, bloom filters are backwards compatible with respect to
// changing bitsPerKey. This means that no big performance penalty will
// be experienced when changing the parameter. See documentation for
// opt.Options.Filter for more information.
func NewBloomFilter(bitsPerKey int) Filter {
return bloomFilter(bitsPerKey)
}

View file

@ -1,60 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package filter provides interface and implementation of probabilistic
// data structure.
//
// The filter is resposible for creating small filter from a set of keys.
// These filter will then used to test whether a key is a member of the set.
// In many cases, a filter can cut down the number of disk seeks from a
// handful to a single disk seek per DB.Get call.
package filter
// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods.
type Buffer interface {
// Alloc allocs n bytes of slice from the buffer. This also advancing
// write offset.
Alloc(n int) []byte
// Write appends the contents of p to the buffer.
Write(p []byte) (n int, err error)
// WriteByte appends the byte c to the buffer.
WriteByte(c byte) error
}
// Filter is the filter.
type Filter interface {
// Name returns the name of this policy.
//
// Note that if the filter encoding changes in an incompatible way,
// the name returned by this method must be changed. Otherwise, old
// incompatible filters may be passed to methods of this type.
Name() string
// NewGenerator creates a new filter generator.
NewGenerator() FilterGenerator
// Contains returns true if the filter contains the given key.
//
// The filter are filters generated by the filter generator.
Contains(filter, key []byte) bool
}
// FilterGenerator is the filter generator.
type FilterGenerator interface {
// Add adds a key to the filter generator.
//
// The key may become invalid after call to this method end, therefor
// key must be copied if implementation require keeping key for later
// use. The key should not modified directly, doing so may cause
// undefined results.
Add(key []byte)
// Generate generates filters based on keys passed so far. After call
// to Generate the filter generator maybe resetted, depends on implementation.
Generate(b Buffer)
}

View file

@ -1,184 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package iterator
import (
"github.com/syndtr/goleveldb/leveldb/util"
)
// BasicArray is the interface that wraps basic Len and Search method.
type BasicArray interface {
// Len returns length of the array.
Len() int
// Search finds smallest index that point to a key that is greater
// than or equal to the given key.
Search(key []byte) int
}
// Array is the interface that wraps BasicArray and basic Index method.
type Array interface {
BasicArray
// Index returns key/value pair with index of i.
Index(i int) (key, value []byte)
}
// Array is the interface that wraps BasicArray and basic Get method.
type ArrayIndexer interface {
BasicArray
// Get returns a new data iterator with index of i.
Get(i int) Iterator
}
type basicArrayIterator struct {
util.BasicReleaser
array BasicArray
pos int
err error
}
func (i *basicArrayIterator) Valid() bool {
return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
}
func (i *basicArrayIterator) First() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.array.Len() == 0 {
i.pos = -1
return false
}
i.pos = 0
return true
}
func (i *basicArrayIterator) Last() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
n := i.array.Len()
if n == 0 {
i.pos = 0
return false
}
i.pos = n - 1
return true
}
func (i *basicArrayIterator) Seek(key []byte) bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
n := i.array.Len()
if n == 0 {
i.pos = 0
return false
}
i.pos = i.array.Search(key)
if i.pos >= n {
return false
}
return true
}
func (i *basicArrayIterator) Next() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.pos++
if n := i.array.Len(); i.pos >= n {
i.pos = n
return false
}
return true
}
func (i *basicArrayIterator) Prev() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.pos--
if i.pos < 0 {
i.pos = -1
return false
}
return true
}
func (i *basicArrayIterator) Error() error { return i.err }
type arrayIterator struct {
basicArrayIterator
array Array
pos int
key, value []byte
}
func (i *arrayIterator) updateKV() {
if i.pos == i.basicArrayIterator.pos {
return
}
i.pos = i.basicArrayIterator.pos
if i.Valid() {
i.key, i.value = i.array.Index(i.pos)
} else {
i.key = nil
i.value = nil
}
}
func (i *arrayIterator) Key() []byte {
i.updateKV()
return i.key
}
func (i *arrayIterator) Value() []byte {
i.updateKV()
return i.value
}
type arrayIteratorIndexer struct {
basicArrayIterator
array ArrayIndexer
}
func (i *arrayIteratorIndexer) Get() Iterator {
if i.Valid() {
return i.array.Get(i.basicArrayIterator.pos)
}
return nil
}
// NewArrayIterator returns an iterator from the given array.
func NewArrayIterator(array Array) Iterator {
return &arrayIterator{
basicArrayIterator: basicArrayIterator{array: array, pos: -1},
array: array,
pos: -1,
}
}
// NewArrayIndexer returns an index iterator from the given array.
func NewArrayIndexer(array ArrayIndexer) IteratorIndexer {
return &arrayIteratorIndexer{
basicArrayIterator: basicArrayIterator{array: array, pos: -1},
array: array,
}
}

View file

@ -1,242 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package iterator
import (
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
// IteratorIndexer is the interface that wraps CommonIterator and basic Get
// method. IteratorIndexer provides index for indexed iterator.
type IteratorIndexer interface {
CommonIterator
// Get returns a new data iterator for the current position, or nil if
// done.
Get() Iterator
}
type indexedIterator struct {
util.BasicReleaser
index IteratorIndexer
strict bool
data Iterator
err error
errf func(err error)
closed bool
}
func (i *indexedIterator) setData() {
if i.data != nil {
i.data.Release()
}
i.data = i.index.Get()
}
func (i *indexedIterator) clearData() {
if i.data != nil {
i.data.Release()
}
i.data = nil
}
func (i *indexedIterator) indexErr() {
if err := i.index.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
i.err = err
}
}
func (i *indexedIterator) dataErr() bool {
if err := i.data.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
}
return false
}
func (i *indexedIterator) Valid() bool {
return i.data != nil && i.data.Valid()
}
func (i *indexedIterator) First() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.First() {
i.indexErr()
i.clearData()
return false
}
i.setData()
return i.Next()
}
func (i *indexedIterator) Last() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.Last() {
i.indexErr()
i.clearData()
return false
}
i.setData()
if !i.data.Last() {
if i.dataErr() {
return false
}
i.clearData()
return i.Prev()
}
return true
}
func (i *indexedIterator) Seek(key []byte) bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
if !i.index.Seek(key) {
i.indexErr()
i.clearData()
return false
}
i.setData()
if !i.data.Seek(key) {
if i.dataErr() {
return false
}
i.clearData()
return i.Next()
}
return true
}
func (i *indexedIterator) Next() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
switch {
case i.data != nil && !i.data.Next():
if i.dataErr() {
return false
}
i.clearData()
fallthrough
case i.data == nil:
if !i.index.Next() {
i.indexErr()
return false
}
i.setData()
return i.Next()
}
return true
}
func (i *indexedIterator) Prev() bool {
if i.err != nil {
return false
} else if i.Released() {
i.err = ErrIterReleased
return false
}
switch {
case i.data != nil && !i.data.Prev():
if i.dataErr() {
return false
}
i.clearData()
fallthrough
case i.data == nil:
if !i.index.Prev() {
i.indexErr()
return false
}
i.setData()
if !i.data.Last() {
if i.dataErr() {
return false
}
i.clearData()
return i.Prev()
}
}
return true
}
func (i *indexedIterator) Key() []byte {
if i.data == nil {
return nil
}
return i.data.Key()
}
func (i *indexedIterator) Value() []byte {
if i.data == nil {
return nil
}
return i.data.Value()
}
func (i *indexedIterator) Release() {
i.clearData()
i.index.Release()
i.BasicReleaser.Release()
}
func (i *indexedIterator) Error() error {
if i.err != nil {
return i.err
}
if err := i.index.Error(); err != nil {
return err
}
return nil
}
func (i *indexedIterator) SetErrorCallback(f func(err error)) {
i.errf = f
}
// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
// that returns another iterator, a 'data iterator'. A 'data iterator' is the
// iterator that contains actual key/value pairs.
//
// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
// ignored and will halt the iterator.
func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
return &indexedIterator{index: index, strict: strict}
}

View file

@ -1,132 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package iterator provides interface and implementation to traverse over
// contents of a database.
package iterator
import (
"errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
var (
ErrIterReleased = errors.New("leveldb/iterator: iterator released")
)
// IteratorSeeker is the interface that wraps the 'seeks method'.
type IteratorSeeker interface {
// First moves the iterator to the first key/value pair. If the iterator
// only contains one key/value pair then First and Last would moves
// to the same key/value pair.
// It returns whether such pair exist.
First() bool
// Last moves the iterator to the last key/value pair. If the iterator
// only contains one key/value pair then First and Last would moves
// to the same key/value pair.
// It returns whether such pair exist.
Last() bool
// Seek moves the iterator to the first key/value pair whose key is greater
// than or equal to the given key.
// It returns whether such pair exist.
//
// It is safe to modify the contents of the argument after Seek returns.
Seek(key []byte) bool
// Next moves the iterator to the next key/value pair.
// It returns whether the iterator is exhausted.
Next() bool
// Prev moves the iterator to the previous key/value pair.
// It returns whether the iterator is exhausted.
Prev() bool
}
// CommonIterator is the interface that wraps common iterator methods.
type CommonIterator interface {
IteratorSeeker
// util.Releaser is the interface that wraps basic Release method.
// When called Release will releases any resources associated with the
// iterator.
util.Releaser
// util.ReleaseSetter is the interface that wraps the basic SetReleaser
// method.
util.ReleaseSetter
// TODO: Remove this when ready.
Valid() bool
// Error returns any accumulated error. Exhausting all the key/value pairs
// is not considered to be an error.
Error() error
}
// Iterator iterates over a DB's key/value pairs in key order.
//
// When encounter an error any 'seeks method' will return false and will
// yield no key/value pairs. The error can be queried by calling the Error
// method. Calling Release is still necessary.
//
// An iterator must be released after use, but it is not necessary to read
// an iterator until exhaustion.
// Also, an iterator is not necessarily safe for concurrent use, but it is
// safe to use multiple iterators concurrently, with each in a dedicated
// goroutine.
type Iterator interface {
CommonIterator
// Key returns the key of the current key/value pair, or nil if done.
// The caller should not modify the contents of the returned slice, and
// its contents may change on the next call to any 'seeks method'.
Key() []byte
// Value returns the key of the current key/value pair, or nil if done.
// The caller should not modify the contents of the returned slice, and
// its contents may change on the next call to any 'seeks method'.
Value() []byte
}
// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback
// method.
//
// ErrorCallbackSetter implemented by indexed and merged iterator.
type ErrorCallbackSetter interface {
// SetErrorCallback allows set an error callback of the corresponding
// iterator. Use nil to clear the callback.
SetErrorCallback(f func(err error))
}
type emptyIterator struct {
util.BasicReleaser
err error
}
func (i *emptyIterator) rErr() {
if i.err == nil && i.Released() {
i.err = ErrIterReleased
}
}
func (*emptyIterator) Valid() bool { return false }
func (i *emptyIterator) First() bool { i.rErr(); return false }
func (i *emptyIterator) Last() bool { i.rErr(); return false }
func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false }
func (i *emptyIterator) Next() bool { i.rErr(); return false }
func (i *emptyIterator) Prev() bool { i.rErr(); return false }
func (*emptyIterator) Key() []byte { return nil }
func (*emptyIterator) Value() []byte { return nil }
func (i *emptyIterator) Error() error { return i.err }
// NewEmptyIterator creates an empty iterator. The err parameter can be
// nil, but if not nil the given err will be returned by Error method.
func NewEmptyIterator(err error) Iterator {
return &emptyIterator{err: err}
}

View file

@ -1,304 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package iterator
import (
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/util"
)
type dir int
const (
dirReleased dir = iota - 1
dirSOI
dirEOI
dirBackward
dirForward
)
type mergedIterator struct {
cmp comparer.Comparer
iters []Iterator
strict bool
keys [][]byte
index int
dir dir
err error
errf func(err error)
releaser util.Releaser
}
func assertKey(key []byte) []byte {
if key == nil {
panic("leveldb/iterator: nil key")
}
return key
}
func (i *mergedIterator) iterErr(iter Iterator) bool {
if err := iter.Error(); err != nil {
if i.errf != nil {
i.errf(err)
}
if i.strict || !errors.IsCorrupted(err) {
i.err = err
return true
}
}
return false
}
func (i *mergedIterator) Valid() bool {
return i.err == nil && i.dir > dirEOI
}
func (i *mergedIterator) First() bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
for x, iter := range i.iters {
switch {
case iter.First():
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
}
i.dir = dirSOI
return i.next()
}
func (i *mergedIterator) Last() bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
for x, iter := range i.iters {
switch {
case iter.Last():
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
}
i.dir = dirEOI
return i.prev()
}
func (i *mergedIterator) Seek(key []byte) bool {
if i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
for x, iter := range i.iters {
switch {
case iter.Seek(key):
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
}
i.dir = dirSOI
return i.next()
}
func (i *mergedIterator) next() bool {
var key []byte
if i.dir == dirForward {
key = i.keys[i.index]
}
for x, tkey := range i.keys {
if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) {
key = tkey
i.index = x
}
}
if key == nil {
i.dir = dirEOI
return false
}
i.dir = dirForward
return true
}
func (i *mergedIterator) Next() bool {
if i.dir == dirEOI || i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
switch i.dir {
case dirSOI:
return i.First()
case dirBackward:
key := append([]byte{}, i.keys[i.index]...)
if !i.Seek(key) {
return false
}
return i.Next()
}
x := i.index
iter := i.iters[x]
switch {
case iter.Next():
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
return i.next()
}
func (i *mergedIterator) prev() bool {
var key []byte
if i.dir == dirBackward {
key = i.keys[i.index]
}
for x, tkey := range i.keys {
if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) {
key = tkey
i.index = x
}
}
if key == nil {
i.dir = dirSOI
return false
}
i.dir = dirBackward
return true
}
func (i *mergedIterator) Prev() bool {
if i.dir == dirSOI || i.err != nil {
return false
} else if i.dir == dirReleased {
i.err = ErrIterReleased
return false
}
switch i.dir {
case dirEOI:
return i.Last()
case dirForward:
key := append([]byte{}, i.keys[i.index]...)
for x, iter := range i.iters {
if x == i.index {
continue
}
seek := iter.Seek(key)
switch {
case seek && iter.Prev(), !seek && iter.Last():
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
}
}
x := i.index
iter := i.iters[x]
switch {
case iter.Prev():
i.keys[x] = assertKey(iter.Key())
case i.iterErr(iter):
return false
default:
i.keys[x] = nil
}
return i.prev()
}
func (i *mergedIterator) Key() []byte {
if i.err != nil || i.dir <= dirEOI {
return nil
}
return i.keys[i.index]
}
func (i *mergedIterator) Value() []byte {
if i.err != nil || i.dir <= dirEOI {
return nil
}
return i.iters[i.index].Value()
}
func (i *mergedIterator) Release() {
if i.dir != dirReleased {
i.dir = dirReleased
for _, iter := range i.iters {
iter.Release()
}
i.iters = nil
i.keys = nil
if i.releaser != nil {
i.releaser.Release()
i.releaser = nil
}
}
}
func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
if i.dir == dirReleased {
panic(util.ErrReleased)
}
if i.releaser != nil && releaser != nil {
panic(util.ErrHasReleaser)
}
i.releaser = releaser
}
func (i *mergedIterator) Error() error {
return i.err
}
func (i *mergedIterator) SetErrorCallback(f func(err error)) {
i.errf = f
}
// NewMergedIterator returns an iterator that merges its input. Walking the
// resultant iterator will return all key/value pairs of all input iterators
// in strictly increasing key order, as defined by cmp.
// The input's key ranges may overlap, but there are assumed to be no duplicate
// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
// None of the iters may be nil.
//
// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
// won't be ignored and will halt 'merged iterator', otherwise the iterator will
// continue to the next 'input iterator'.
func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
return &mergedIterator{
iters: iters,
cmp: cmp,
strict: strict,
keys: make([][]byte, len(iters)),
}
}

View file

@ -1,524 +0,0 @@
// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0
// License, authors and contributors informations can be found at bellow URLs respectively:
// https://code.google.com/p/leveldb-go/source/browse/LICENSE
// https://code.google.com/p/leveldb-go/source/browse/AUTHORS
// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS
// Package journal reads and writes sequences of journals. Each journal is a stream
// of bytes that completes before the next journal starts.
//
// When reading, call Next to obtain an io.Reader for the next journal. Next will
// return io.EOF when there are no more journals. It is valid to call Next
// without reading the current journal to exhaustion.
//
// When writing, call Next to obtain an io.Writer for the next journal. Calling
// Next finishes the current journal. Call Close to finish the final journal.
//
// Optionally, call Flush to finish the current journal and flush the underlying
// writer without starting a new journal. To start a new journal after flushing,
// call Next.
//
// Neither Readers or Writers are safe to use concurrently.
//
// Example code:
// func read(r io.Reader) ([]string, error) {
// var ss []string
// journals := journal.NewReader(r, nil, true, true)
// for {
// j, err := journals.Next()
// if err == io.EOF {
// break
// }
// if err != nil {
// return nil, err
// }
// s, err := ioutil.ReadAll(j)
// if err != nil {
// return nil, err
// }
// ss = append(ss, string(s))
// }
// return ss, nil
// }
//
// func write(w io.Writer, ss []string) error {
// journals := journal.NewWriter(w)
// for _, s := range ss {
// j, err := journals.Next()
// if err != nil {
// return err
// }
// if _, err := j.Write([]byte(s)), err != nil {
// return err
// }
// }
// return journals.Close()
// }
//
// The wire format is that the stream is divided into 32KiB blocks, and each
// block contains a number of tightly packed chunks. Chunks cannot cross block
// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a
// block must be zero.
//
// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4
// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type)
// followed by a payload. The checksum is over the chunk type and the payload.
//
// There are four chunk types: whether the chunk is the full journal, or the
// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal
// has one first chunk, zero or more middle chunks, and one last chunk.
//
// The wire format allows for limited recovery in the face of data corruption:
// on a format error (such as a checksum mismatch), the reader moves to the
// next block and looks for the next full or first chunk.
package journal
import (
"encoding/binary"
"fmt"
"io"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
// These constants are part of the wire format and should not be changed.
const (
fullChunkType = 1
firstChunkType = 2
middleChunkType = 3
lastChunkType = 4
)
const (
blockSize = 32 * 1024
headerSize = 7
)
type flusher interface {
Flush() error
}
// ErrCorrupted is the error type that generated by corrupted block or chunk.
type ErrCorrupted struct {
Size int
Reason string
}
func (e *ErrCorrupted) Error() string {
return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
}
// Dropper is the interface that wrap simple Drop method. The Drop
// method will be called when the journal reader dropping a block or chunk.
type Dropper interface {
Drop(err error)
}
// Reader reads journals from an underlying io.Reader.
type Reader struct {
// r is the underlying reader.
r io.Reader
// the dropper.
dropper Dropper
// strict flag.
strict bool
// checksum flag.
checksum bool
// seq is the sequence number of the current journal.
seq int
// buf[i:j] is the unread portion of the current chunk's payload.
// The low bound, i, excludes the chunk header.
i, j int
// n is the number of bytes of buf that are valid. Once reading has started,
// only the final block can have n < blockSize.
n int
// last is whether the current chunk is the last chunk of the journal.
last bool
// err is any accumulated error.
err error
// buf is the buffer.
buf [blockSize]byte
}
// NewReader returns a new reader. The dropper may be nil, and if
// strict is true then corrupted or invalid chunk will halt the journal
// reader entirely.
func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
return &Reader{
r: r,
dropper: dropper,
strict: strict,
checksum: checksum,
last: true,
}
}
var errSkip = errors.New("leveldb/journal: skipped")
func (r *Reader) corrupt(n int, reason string, skip bool) error {
if r.dropper != nil {
r.dropper.Drop(&ErrCorrupted{n, reason})
}
if r.strict && !skip {
r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
return r.err
}
return errSkip
}
// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
// next block into the buffer if necessary.
func (r *Reader) nextChunk(first bool) error {
for {
if r.j+headerSize <= r.n {
checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
chunkType := r.buf[r.j+6]
unprocBlock := r.n - r.j
if checksum == 0 && length == 0 && chunkType == 0 {
// Drop entire block.
r.i = r.n
r.j = r.n
return r.corrupt(unprocBlock, "zero header", false)
}
if chunkType < fullChunkType || chunkType > lastChunkType {
// Drop entire block.
r.i = r.n
r.j = r.n
return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false)
}
r.i = r.j + headerSize
r.j = r.j + headerSize + int(length)
if r.j > r.n {
// Drop entire block.
r.i = r.n
r.j = r.n
return r.corrupt(unprocBlock, "chunk length overflows block", false)
} else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
// Drop entire block.
r.i = r.n
r.j = r.n
return r.corrupt(unprocBlock, "checksum mismatch", false)
}
if first && chunkType != fullChunkType && chunkType != firstChunkType {
chunkLength := (r.j - r.i) + headerSize
r.i = r.j
// Report the error, but skip it.
return r.corrupt(chunkLength, "orphan chunk", true)
}
r.last = chunkType == fullChunkType || chunkType == lastChunkType
return nil
}
// The last block.
if r.n < blockSize && r.n > 0 {
if !first {
return r.corrupt(0, "missing chunk part", false)
}
r.err = io.EOF
return r.err
}
// Read block.
n, err := io.ReadFull(r.r, r.buf[:])
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
return err
}
if n == 0 {
if !first {
return r.corrupt(0, "missing chunk part", false)
}
r.err = io.EOF
return r.err
}
r.i, r.j, r.n = 0, 0, n
}
}
// Next returns a reader for the next journal. It returns io.EOF if there are no
// more journals. The reader returned becomes stale after the next Next call,
// and should no longer be used. If strict is false, the reader will returns
// io.ErrUnexpectedEOF error when found corrupted journal.
func (r *Reader) Next() (io.Reader, error) {
r.seq++
if r.err != nil {
return nil, r.err
}
r.i = r.j
for {
if err := r.nextChunk(true); err == nil {
break
} else if err != errSkip {
return nil, err
}
}
return &singleReader{r, r.seq, nil}, nil
}
// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
// last accumulated error.
func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
r.seq++
err := r.err
r.r = reader
r.dropper = dropper
r.strict = strict
r.checksum = checksum
r.i = 0
r.j = 0
r.n = 0
r.last = true
r.err = nil
return err
}
type singleReader struct {
r *Reader
seq int
err error
}
func (x *singleReader) Read(p []byte) (int, error) {
r := x.r
if r.seq != x.seq {
return 0, errors.New("leveldb/journal: stale reader")
}
if x.err != nil {
return 0, x.err
}
if r.err != nil {
return 0, r.err
}
for r.i == r.j {
if r.last {
return 0, io.EOF
}
x.err = r.nextChunk(false)
if x.err != nil {
if x.err == errSkip {
x.err = io.ErrUnexpectedEOF
}
return 0, x.err
}
}
n := copy(p, r.buf[r.i:r.j])
r.i += n
return n, nil
}
func (x *singleReader) ReadByte() (byte, error) {
r := x.r
if r.seq != x.seq {
return 0, errors.New("leveldb/journal: stale reader")
}
if x.err != nil {
return 0, x.err
}
if r.err != nil {
return 0, r.err
}
for r.i == r.j {
if r.last {
return 0, io.EOF
}
x.err = r.nextChunk(false)
if x.err != nil {
if x.err == errSkip {
x.err = io.ErrUnexpectedEOF
}
return 0, x.err
}
}
c := r.buf[r.i]
r.i++
return c, nil
}
// Writer writes journals to an underlying io.Writer.
type Writer struct {
// w is the underlying writer.
w io.Writer
// seq is the sequence number of the current journal.
seq int
// f is w as a flusher.
f flusher
// buf[i:j] is the bytes that will become the current chunk.
// The low bound, i, includes the chunk header.
i, j int
// buf[:written] has already been written to w.
// written is zero unless Flush has been called.
written int
// first is whether the current chunk is the first chunk of the journal.
first bool
// pending is whether a chunk is buffered but not yet written.
pending bool
// err is any accumulated error.
err error
// buf is the buffer.
buf [blockSize]byte
}
// NewWriter returns a new Writer.
func NewWriter(w io.Writer) *Writer {
f, _ := w.(flusher)
return &Writer{
w: w,
f: f,
}
}
// fillHeader fills in the header for the pending chunk.
func (w *Writer) fillHeader(last bool) {
if w.i+headerSize > w.j || w.j > blockSize {
panic("leveldb/journal: bad writer state")
}
if last {
if w.first {
w.buf[w.i+6] = fullChunkType
} else {
w.buf[w.i+6] = lastChunkType
}
} else {
if w.first {
w.buf[w.i+6] = firstChunkType
} else {
w.buf[w.i+6] = middleChunkType
}
}
binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value())
binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize))
}
// writeBlock writes the buffered block to the underlying writer, and reserves
// space for the next chunk's header.
func (w *Writer) writeBlock() {
_, w.err = w.w.Write(w.buf[w.written:])
w.i = 0
w.j = headerSize
w.written = 0
}
// writePending finishes the current journal and writes the buffer to the
// underlying writer.
func (w *Writer) writePending() {
if w.err != nil {
return
}
if w.pending {
w.fillHeader(true)
w.pending = false
}
_, w.err = w.w.Write(w.buf[w.written:w.j])
w.written = w.j
}
// Close finishes the current journal and closes the writer.
func (w *Writer) Close() error {
w.seq++
w.writePending()
if w.err != nil {
return w.err
}
w.err = errors.New("leveldb/journal: closed Writer")
return nil
}
// Flush finishes the current journal, writes to the underlying writer, and
// flushes it if that writer implements interface{ Flush() error }.
func (w *Writer) Flush() error {
w.seq++
w.writePending()
if w.err != nil {
return w.err
}
if w.f != nil {
w.err = w.f.Flush()
return w.err
}
return nil
}
// Reset resets the journal writer, allows reuse of the journal writer. Reset
// will also closes the journal writer if not already.
func (w *Writer) Reset(writer io.Writer) (err error) {
w.seq++
if w.err == nil {
w.writePending()
err = w.err
}
w.w = writer
w.f, _ = writer.(flusher)
w.i = 0
w.j = 0
w.written = 0
w.first = false
w.pending = false
w.err = nil
return
}
// Next returns a writer for the next journal. The writer returned becomes stale
// after the next Close, Flush or Next call, and should no longer be used.
func (w *Writer) Next() (io.Writer, error) {
w.seq++
if w.err != nil {
return nil, w.err
}
if w.pending {
w.fillHeader(true)
}
w.i = w.j
w.j = w.j + headerSize
// Check if there is room in the block for the header.
if w.j > blockSize {
// Fill in the rest of the block with zeroes.
for k := w.i; k < blockSize; k++ {
w.buf[k] = 0
}
w.writeBlock()
if w.err != nil {
return nil, w.err
}
}
w.first = true
w.pending = true
return singleWriter{w, w.seq}, nil
}
type singleWriter struct {
w *Writer
seq int
}
func (x singleWriter) Write(p []byte) (int, error) {
w := x.w
if w.seq != x.seq {
return 0, errors.New("leveldb/journal: stale writer")
}
if w.err != nil {
return 0, w.err
}
n0 := len(p)
for len(p) > 0 {
// Write a block, if it is full.
if w.j == blockSize {
w.fillHeader(false)
w.writeBlock()
if w.err != nil {
return 0, w.err
}
w.first = false
}
// Copy bytes into the buffer.
n := copy(w.buf[w.j:], p)
w.j += n
p = p[n:]
}
return n0, nil
}

View file

@ -1,143 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"encoding/binary"
"fmt"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
)
// ErrInternalKeyCorrupted records internal key corruption.
type ErrInternalKeyCorrupted struct {
Ikey []byte
Reason string
}
func (e *ErrInternalKeyCorrupted) Error() string {
return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
}
func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
}
type keyType uint
func (kt keyType) String() string {
switch kt {
case keyTypeDel:
return "d"
case keyTypeVal:
return "v"
}
return fmt.Sprintf("<invalid:%#x>", uint(kt))
}
// Value types encoded as the last component of internal keys.
// Don't modify; this value are saved to disk.
const (
keyTypeDel = keyType(0)
keyTypeVal = keyType(1)
)
// keyTypeSeek defines the keyType that should be passed when constructing an
// internal key for seeking to a particular sequence number (since we
// sort sequence numbers in decreasing order and the value type is
// embedded as the low 8 bits in the sequence number in internal keys,
// we need to use the highest-numbered ValueType, not the lowest).
const keyTypeSeek = keyTypeVal
const (
// Maximum value possible for sequence number; the 8-bits are
// used by value type, so its can packed together in single
// 64-bit integer.
keyMaxSeq = (uint64(1) << 56) - 1
// Maximum value possible for packed sequence number and type.
keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
)
// Maximum number encoded in bytes.
var keyMaxNumBytes = make([]byte, 8)
func init() {
binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
}
type internalKey []byte
func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
if seq > keyMaxSeq {
panic("leveldb: invalid sequence number")
} else if kt > keyTypeVal {
panic("leveldb: invalid type")
}
dst = ensureBuffer(dst, len(ukey)+8)
copy(dst, ukey)
binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
return internalKey(dst)
}
func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
if len(ik) < 8 {
return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
}
num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
seq, kt = uint64(num>>8), keyType(num&0xff)
if kt > keyTypeVal {
return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
}
ukey = ik[:len(ik)-8]
return
}
func validInternalKey(ik []byte) bool {
_, _, _, err := parseInternalKey(ik)
return err == nil
}
func (ik internalKey) assert() {
if ik == nil {
panic("leveldb: nil internalKey")
}
if len(ik) < 8 {
panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
}
}
func (ik internalKey) ukey() []byte {
ik.assert()
return ik[:len(ik)-8]
}
func (ik internalKey) num() uint64 {
ik.assert()
return binary.LittleEndian.Uint64(ik[len(ik)-8:])
}
func (ik internalKey) parseNum() (seq uint64, kt keyType) {
num := ik.num()
seq, kt = uint64(num>>8), keyType(num&0xff)
if kt > keyTypeVal {
panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
}
return
}
func (ik internalKey) String() string {
if ik == nil {
return "<nil>"
}
if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
}
return fmt.Sprintf("<invalid:%#x>", []byte(ik))
}

View file

@ -1,475 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package memdb provides in-memory key/value database implementation.
package memdb
import (
"math/rand"
"sync"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/util"
)
// Common errors.
var (
ErrNotFound = errors.ErrNotFound
ErrIterReleased = errors.New("leveldb/memdb: iterator released")
)
const tMaxHeight = 12
type dbIter struct {
util.BasicReleaser
p *DB
slice *util.Range
node int
forward bool
key, value []byte
err error
}
func (i *dbIter) fill(checkStart, checkLimit bool) bool {
if i.node != 0 {
n := i.p.nodeData[i.node]
m := n + i.p.nodeData[i.node+nKey]
i.key = i.p.kvData[n:m]
if i.slice != nil {
switch {
case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0:
fallthrough
case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0:
i.node = 0
goto bail
}
}
i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]]
return true
}
bail:
i.key = nil
i.value = nil
return false
}
func (i *dbIter) Valid() bool {
return i.node != 0
}
func (i *dbIter) First() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
if i.slice != nil && i.slice.Start != nil {
i.node, _ = i.p.findGE(i.slice.Start, false)
} else {
i.node = i.p.nodeData[nNext]
}
return i.fill(false, true)
}
func (i *dbIter) Last() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = false
i.p.mu.RLock()
defer i.p.mu.RUnlock()
if i.slice != nil && i.slice.Limit != nil {
i.node = i.p.findLT(i.slice.Limit)
} else {
i.node = i.p.findLast()
}
return i.fill(true, false)
}
func (i *dbIter) Seek(key []byte) bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 {
key = i.slice.Start
}
i.node, _ = i.p.findGE(key, false)
return i.fill(false, true)
}
func (i *dbIter) Next() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.node == 0 {
if !i.forward {
return i.First()
}
return false
}
i.forward = true
i.p.mu.RLock()
defer i.p.mu.RUnlock()
i.node = i.p.nodeData[i.node+nNext]
return i.fill(false, true)
}
func (i *dbIter) Prev() bool {
if i.Released() {
i.err = ErrIterReleased
return false
}
if i.node == 0 {
if i.forward {
return i.Last()
}
return false
}
i.forward = false
i.p.mu.RLock()
defer i.p.mu.RUnlock()
i.node = i.p.findLT(i.key)
return i.fill(true, false)
}
func (i *dbIter) Key() []byte {
return i.key
}
func (i *dbIter) Value() []byte {
return i.value
}
func (i *dbIter) Error() error { return i.err }
func (i *dbIter) Release() {
if !i.Released() {
i.p = nil
i.node = 0
i.key = nil
i.value = nil
i.BasicReleaser.Release()
}
}
const (
nKV = iota
nKey
nVal
nHeight
nNext
)
// DB is an in-memory key/value database.
type DB struct {
cmp comparer.BasicComparer
rnd *rand.Rand
mu sync.RWMutex
kvData []byte
// Node data:
// [0] : KV offset
// [1] : Key length
// [2] : Value length
// [3] : Height
// [3..height] : Next nodes
nodeData []int
prevNode [tMaxHeight]int
maxHeight int
n int
kvSize int
}
func (p *DB) randHeight() (h int) {
const branching = 4
h = 1
for h < tMaxHeight && p.rnd.Int()%branching == 0 {
h++
}
return
}
// Must hold RW-lock if prev == true, as it use shared prevNode slice.
func (p *DB) findGE(key []byte, prev bool) (int, bool) {
node := 0
h := p.maxHeight - 1
for {
next := p.nodeData[node+nNext+h]
cmp := 1
if next != 0 {
o := p.nodeData[next]
cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key)
}
if cmp < 0 {
// Keep searching in this list
node = next
} else {
if prev {
p.prevNode[h] = node
} else if cmp == 0 {
return next, true
}
if h == 0 {
return next, cmp == 0
}
h--
}
}
}
func (p *DB) findLT(key []byte) int {
node := 0
h := p.maxHeight - 1
for {
next := p.nodeData[node+nNext+h]
o := p.nodeData[next]
if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 {
if h == 0 {
break
}
h--
} else {
node = next
}
}
return node
}
func (p *DB) findLast() int {
node := 0
h := p.maxHeight - 1
for {
next := p.nodeData[node+nNext+h]
if next == 0 {
if h == 0 {
break
}
h--
} else {
node = next
}
}
return node
}
// Put sets the value for the given key. It overwrites any previous value
// for that key; a DB is not a multi-map.
//
// It is safe to modify the contents of the arguments after Put returns.
func (p *DB) Put(key []byte, value []byte) error {
p.mu.Lock()
defer p.mu.Unlock()
if node, exact := p.findGE(key, true); exact {
kvOffset := len(p.kvData)
p.kvData = append(p.kvData, key...)
p.kvData = append(p.kvData, value...)
p.nodeData[node] = kvOffset
m := p.nodeData[node+nVal]
p.nodeData[node+nVal] = len(value)
p.kvSize += len(value) - m
return nil
}
h := p.randHeight()
if h > p.maxHeight {
for i := p.maxHeight; i < h; i++ {
p.prevNode[i] = 0
}
p.maxHeight = h
}
kvOffset := len(p.kvData)
p.kvData = append(p.kvData, key...)
p.kvData = append(p.kvData, value...)
// Node
node := len(p.nodeData)
p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
for i, n := range p.prevNode[:h] {
m := n + nNext + i
p.nodeData = append(p.nodeData, p.nodeData[m])
p.nodeData[m] = node
}
p.kvSize += len(key) + len(value)
p.n++
return nil
}
// Delete deletes the value for the given key. It returns ErrNotFound if
// the DB does not contain the key.
//
// It is safe to modify the contents of the arguments after Delete returns.
func (p *DB) Delete(key []byte) error {
p.mu.Lock()
defer p.mu.Unlock()
node, exact := p.findGE(key, true)
if !exact {
return ErrNotFound
}
h := p.nodeData[node+nHeight]
for i, n := range p.prevNode[:h] {
m := n + 4 + i
p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i]
}
p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal]
p.n--
return nil
}
// Contains returns true if the given key are in the DB.
//
// It is safe to modify the contents of the arguments after Contains returns.
func (p *DB) Contains(key []byte) bool {
p.mu.RLock()
_, exact := p.findGE(key, false)
p.mu.RUnlock()
return exact
}
// Get gets the value for the given key. It returns error.ErrNotFound if the
// DB does not contain the key.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Get returns.
func (p *DB) Get(key []byte) (value []byte, err error) {
p.mu.RLock()
if node, exact := p.findGE(key, false); exact {
o := p.nodeData[node] + p.nodeData[node+nKey]
value = p.kvData[o : o+p.nodeData[node+nVal]]
} else {
err = ErrNotFound
}
p.mu.RUnlock()
return
}
// Find finds key/value pair whose key is greater than or equal to the
// given key. It returns ErrNotFound if the table doesn't contain
// such pair.
//
// The caller should not modify the contents of the returned slice, but
// it is safe to modify the contents of the argument after Find returns.
func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
p.mu.RLock()
if node, _ := p.findGE(key, false); node != 0 {
n := p.nodeData[node]
m := n + p.nodeData[node+nKey]
rkey = p.kvData[n:m]
value = p.kvData[m : m+p.nodeData[node+nVal]]
} else {
err = ErrNotFound
}
p.mu.RUnlock()
return
}
// NewIterator returns an iterator of the DB.
// The returned iterator is not safe for concurrent use, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently with modifying its
// underlying DB. However, the resultant key/value pairs are not guaranteed
// to be a consistent snapshot of the DB at a particular point in time.
//
// Slice allows slicing the iterator to only contains keys in the given
// range. A nil Range.Start is treated as a key before all keys in the
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.
func (p *DB) NewIterator(slice *util.Range) iterator.Iterator {
return &dbIter{p: p, slice: slice}
}
// Capacity returns keys/values buffer capacity.
func (p *DB) Capacity() int {
p.mu.RLock()
defer p.mu.RUnlock()
return cap(p.kvData)
}
// Size returns sum of keys and values length. Note that deleted
// key/value will not be accounted for, but it will still consume
// the buffer, since the buffer is append only.
func (p *DB) Size() int {
p.mu.RLock()
defer p.mu.RUnlock()
return p.kvSize
}
// Free returns keys/values free buffer before need to grow.
func (p *DB) Free() int {
p.mu.RLock()
defer p.mu.RUnlock()
return cap(p.kvData) - len(p.kvData)
}
// Len returns the number of entries in the DB.
func (p *DB) Len() int {
p.mu.RLock()
defer p.mu.RUnlock()
return p.n
}
// Reset resets the DB to initial empty state. Allows reuse the buffer.
func (p *DB) Reset() {
p.mu.Lock()
p.rnd = rand.New(rand.NewSource(0xdeadbeef))
p.maxHeight = 1
p.n = 0
p.kvSize = 0
p.kvData = p.kvData[:0]
p.nodeData = p.nodeData[:nNext+tMaxHeight]
p.nodeData[nKV] = 0
p.nodeData[nKey] = 0
p.nodeData[nVal] = 0
p.nodeData[nHeight] = tMaxHeight
for n := 0; n < tMaxHeight; n++ {
p.nodeData[nNext+n] = 0
p.prevNode[n] = 0
}
p.mu.Unlock()
}
// New creates a new initialized in-memory key/value DB. The capacity
// is the initial key/value buffer capacity. The capacity is advisory,
// not enforced.
//
// This DB is append-only, deleting an entry would remove entry node but not
// reclaim KV buffer.
//
// The returned DB instance is safe for concurrent use.
func New(cmp comparer.BasicComparer, capacity int) *DB {
p := &DB{
cmp: cmp,
rnd: rand.New(rand.NewSource(0xdeadbeef)),
maxHeight: 1,
kvData: make([]byte, 0, capacity),
nodeData: make([]int, 4+tMaxHeight),
}
p.nodeData[nHeight] = tMaxHeight
return p
}

View file

@ -1,684 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package opt provides sets of options used by LevelDB.
package opt
import (
"math"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"
)
const (
KiB = 1024
MiB = KiB * 1024
GiB = MiB * 1024
)
var (
DefaultBlockCacher = LRUCacher
DefaultBlockCacheCapacity = 8 * MiB
DefaultBlockRestartInterval = 16
DefaultBlockSize = 4 * KiB
DefaultCompactionExpandLimitFactor = 25
DefaultCompactionGPOverlapsFactor = 10
DefaultCompactionL0Trigger = 4
DefaultCompactionSourceLimitFactor = 1
DefaultCompactionTableSize = 2 * MiB
DefaultCompactionTableSizeMultiplier = 1.0
DefaultCompactionTotalSize = 10 * MiB
DefaultCompactionTotalSizeMultiplier = 10.0
DefaultCompressionType = SnappyCompression
DefaultIteratorSamplingRate = 1 * MiB
DefaultOpenFilesCacher = LRUCacher
DefaultOpenFilesCacheCapacity = 500
DefaultWriteBuffer = 4 * MiB
DefaultWriteL0PauseTrigger = 12
DefaultWriteL0SlowdownTrigger = 8
)
// Cacher is a caching algorithm.
type Cacher interface {
New(capacity int) cache.Cacher
}
type CacherFunc struct {
NewFunc func(capacity int) cache.Cacher
}
func (f *CacherFunc) New(capacity int) cache.Cacher {
if f.NewFunc != nil {
return f.NewFunc(capacity)
}
return nil
}
func noCacher(int) cache.Cacher { return nil }
var (
// LRUCacher is the LRU-cache algorithm.
LRUCacher = &CacherFunc{cache.NewLRU}
// NoCacher is the value to disable caching algorithm.
NoCacher = &CacherFunc{}
)
// Compression is the 'sorted table' block compression algorithm to use.
type Compression uint
func (c Compression) String() string {
switch c {
case DefaultCompression:
return "default"
case NoCompression:
return "none"
case SnappyCompression:
return "snappy"
}
return "invalid"
}
const (
DefaultCompression Compression = iota
NoCompression
SnappyCompression
nCompression
)
// Strict is the DB 'strict level'.
type Strict uint
const (
// If present then a corrupted or invalid chunk or block in manifest
// journal will cause an error instead of being dropped.
// This will prevent database with corrupted manifest to be opened.
StrictManifest Strict = 1 << iota
// If present then journal chunk checksum will be verified.
StrictJournalChecksum
// If present then a corrupted or invalid chunk or block in journal
// will cause an error instead of being dropped.
// This will prevent database with corrupted journal to be opened.
StrictJournal
// If present then 'sorted table' block checksum will be verified.
// This has effect on both 'read operation' and compaction.
StrictBlockChecksum
// If present then a corrupted 'sorted table' will fails compaction.
// The database will enter read-only mode.
StrictCompaction
// If present then a corrupted 'sorted table' will halts 'read operation'.
StrictReader
// If present then leveldb.Recover will drop corrupted 'sorted table'.
StrictRecovery
// This only applicable for ReadOptions, if present then this ReadOptions
// 'strict level' will override global ones.
StrictOverride
// StrictAll enables all strict flags.
StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
// DefaultStrict is the default strict flags. Specify any strict flags
// will override default strict flags as whole (i.e. not OR'ed).
DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
// NoStrict disables all strict flags. Override default strict flags.
NoStrict = ^StrictAll
)
// Options holds the optional parameters for the DB at large.
type Options struct {
// AltFilters defines one or more 'alternative filters'.
// 'alternative filters' will be used during reads if a filter block
// does not match with the 'effective filter'.
//
// The default value is nil
AltFilters []filter.Filter
// BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
// Specify NoCacher to disable caching algorithm.
//
// The default value is LRUCacher.
BlockCacher Cacher
// BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
// Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
//
// The default value is 8MiB.
BlockCacheCapacity int
// BlockRestartInterval is the number of keys between restart points for
// delta encoding of keys.
//
// The default value is 16.
BlockRestartInterval int
// BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
// block.
//
// The default value is 4KiB.
BlockSize int
// CompactionExpandLimitFactor limits compaction size after expanded.
// This will be multiplied by table size limit at compaction target level.
//
// The default value is 25.
CompactionExpandLimitFactor int
// CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
// single 'sorted table' generates.
// This will be multiplied by table size limit at grandparent level.
//
// The default value is 10.
CompactionGPOverlapsFactor int
// CompactionL0Trigger defines number of 'sorted table' at level-0 that will
// trigger compaction.
//
// The default value is 4.
CompactionL0Trigger int
// CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
// level-0.
// This will be multiplied by table size limit at compaction target level.
//
// The default value is 1.
CompactionSourceLimitFactor int
// CompactionTableSize limits size of 'sorted table' that compaction generates.
// The limits for each level will be calculated as:
// CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
// The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
//
// The default value is 2MiB.
CompactionTableSize int
// CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
//
// The default value is 1.
CompactionTableSizeMultiplier float64
// CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
// CompactionTableSize.
// Use zero to skip a level.
//
// The default value is nil.
CompactionTableSizeMultiplierPerLevel []float64
// CompactionTotalSize limits total size of 'sorted table' for each level.
// The limits for each level will be calculated as:
// CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
// The multiplier for each level can also fine-tuned using
// CompactionTotalSizeMultiplierPerLevel.
//
// The default value is 10MiB.
CompactionTotalSize int
// CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
//
// The default value is 10.
CompactionTotalSizeMultiplier float64
// CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
// CompactionTotalSize.
// Use zero to skip a level.
//
// The default value is nil.
CompactionTotalSizeMultiplierPerLevel []float64
// Comparer defines a total ordering over the space of []byte keys: a 'less
// than' relationship. The same comparison algorithm must be used for reads
// and writes over the lifetime of the DB.
//
// The default value uses the same ordering as bytes.Compare.
Comparer comparer.Comparer
// Compression defines the 'sorted table' block compression to use.
//
// The default value (DefaultCompression) uses snappy compression.
Compression Compression
// DisableBufferPool allows disable use of util.BufferPool functionality.
//
// The default value is false.
DisableBufferPool bool
// DisableBlockCache allows disable use of cache.Cache functionality on
// 'sorted table' block.
//
// The default value is false.
DisableBlockCache bool
// DisableCompactionBackoff allows disable compaction retry backoff.
//
// The default value is false.
DisableCompactionBackoff bool
// DisableLargeBatchTransaction allows disabling switch-to-transaction mode
// on large batch write. If enable batch writes large than WriteBuffer will
// use transaction.
//
// The default is false.
DisableLargeBatchTransaction bool
// ErrorIfExist defines whether an error should returned if the DB already
// exist.
//
// The default value is false.
ErrorIfExist bool
// ErrorIfMissing defines whether an error should returned if the DB is
// missing. If false then the database will be created if missing, otherwise
// an error will be returned.
//
// The default value is false.
ErrorIfMissing bool
// Filter defines an 'effective filter' to use. An 'effective filter'
// if defined will be used to generate per-table filter block.
// The filter name will be stored on disk.
// During reads LevelDB will try to find matching filter from
// 'effective filter' and 'alternative filters'.
//
// Filter can be changed after a DB has been created. It is recommended
// to put old filter to the 'alternative filters' to mitigate lack of
// filter during transition period.
//
// A filter is used to reduce disk reads when looking for a specific key.
//
// The default value is nil.
Filter filter.Filter
// IteratorSamplingRate defines approximate gap (in bytes) between read
// sampling of an iterator. The samples will be used to determine when
// compaction should be triggered.
//
// The default is 1MiB.
IteratorSamplingRate int
// NoSync allows completely disable fsync.
//
// The default is false.
NoSync bool
// NoWriteMerge allows disabling write merge.
//
// The default is false.
NoWriteMerge bool
// OpenFilesCacher provides cache algorithm for open files caching.
// Specify NoCacher to disable caching algorithm.
//
// The default value is LRUCacher.
OpenFilesCacher Cacher
// OpenFilesCacheCapacity defines the capacity of the open files caching.
// Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
//
// The default value is 500.
OpenFilesCacheCapacity int
// If true then opens DB in read-only mode.
//
// The default value is false.
ReadOnly bool
// Strict defines the DB strict level.
Strict Strict
// WriteBuffer defines maximum size of a 'memdb' before flushed to
// 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
// unsorted journal.
//
// LevelDB may held up to two 'memdb' at the same time.
//
// The default value is 4MiB.
WriteBuffer int
// WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
// pause write.
//
// The default value is 12.
WriteL0PauseTrigger int
// WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
// will trigger write slowdown.
//
// The default value is 8.
WriteL0SlowdownTrigger int
}
func (o *Options) GetAltFilters() []filter.Filter {
if o == nil {
return nil
}
return o.AltFilters
}
func (o *Options) GetBlockCacher() Cacher {
if o == nil || o.BlockCacher == nil {
return DefaultBlockCacher
} else if o.BlockCacher == NoCacher {
return nil
}
return o.BlockCacher
}
func (o *Options) GetBlockCacheCapacity() int {
if o == nil || o.BlockCacheCapacity == 0 {
return DefaultBlockCacheCapacity
} else if o.BlockCacheCapacity < 0 {
return 0
}
return o.BlockCacheCapacity
}
func (o *Options) GetBlockRestartInterval() int {
if o == nil || o.BlockRestartInterval <= 0 {
return DefaultBlockRestartInterval
}
return o.BlockRestartInterval
}
func (o *Options) GetBlockSize() int {
if o == nil || o.BlockSize <= 0 {
return DefaultBlockSize
}
return o.BlockSize
}
func (o *Options) GetCompactionExpandLimit(level int) int {
factor := DefaultCompactionExpandLimitFactor
if o != nil && o.CompactionExpandLimitFactor > 0 {
factor = o.CompactionExpandLimitFactor
}
return o.GetCompactionTableSize(level+1) * factor
}
func (o *Options) GetCompactionGPOverlaps(level int) int {
factor := DefaultCompactionGPOverlapsFactor
if o != nil && o.CompactionGPOverlapsFactor > 0 {
factor = o.CompactionGPOverlapsFactor
}
return o.GetCompactionTableSize(level+2) * factor
}
func (o *Options) GetCompactionL0Trigger() int {
if o == nil || o.CompactionL0Trigger == 0 {
return DefaultCompactionL0Trigger
}
return o.CompactionL0Trigger
}
func (o *Options) GetCompactionSourceLimit(level int) int {
factor := DefaultCompactionSourceLimitFactor
if o != nil && o.CompactionSourceLimitFactor > 0 {
factor = o.CompactionSourceLimitFactor
}
return o.GetCompactionTableSize(level+1) * factor
}
func (o *Options) GetCompactionTableSize(level int) int {
var (
base = DefaultCompactionTableSize
mult float64
)
if o != nil {
if o.CompactionTableSize > 0 {
base = o.CompactionTableSize
}
if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTableSizeMultiplierPerLevel[level]
} else if o.CompactionTableSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
}
}
if mult == 0 {
mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
}
return int(float64(base) * mult)
}
func (o *Options) GetCompactionTotalSize(level int) int64 {
var (
base = DefaultCompactionTotalSize
mult float64
)
if o != nil {
if o.CompactionTotalSize > 0 {
base = o.CompactionTotalSize
}
if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTotalSizeMultiplierPerLevel[level]
} else if o.CompactionTotalSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
}
}
if mult == 0 {
mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
}
return int64(float64(base) * mult)
}
func (o *Options) GetComparer() comparer.Comparer {
if o == nil || o.Comparer == nil {
return comparer.DefaultComparer
}
return o.Comparer
}
func (o *Options) GetCompression() Compression {
if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression {
return DefaultCompressionType
}
return o.Compression
}
func (o *Options) GetDisableBufferPool() bool {
if o == nil {
return false
}
return o.DisableBufferPool
}
func (o *Options) GetDisableBlockCache() bool {
if o == nil {
return false
}
return o.DisableBlockCache
}
func (o *Options) GetDisableCompactionBackoff() bool {
if o == nil {
return false
}
return o.DisableCompactionBackoff
}
func (o *Options) GetDisableLargeBatchTransaction() bool {
if o == nil {
return false
}
return o.DisableLargeBatchTransaction
}
func (o *Options) GetErrorIfExist() bool {
if o == nil {
return false
}
return o.ErrorIfExist
}
func (o *Options) GetErrorIfMissing() bool {
if o == nil {
return false
}
return o.ErrorIfMissing
}
func (o *Options) GetFilter() filter.Filter {
if o == nil {
return nil
}
return o.Filter
}
func (o *Options) GetIteratorSamplingRate() int {
if o == nil || o.IteratorSamplingRate <= 0 {
return DefaultIteratorSamplingRate
}
return o.IteratorSamplingRate
}
func (o *Options) GetNoSync() bool {
if o == nil {
return false
}
return o.NoSync
}
func (o *Options) GetNoWriteMerge() bool {
if o == nil {
return false
}
return o.NoWriteMerge
}
func (o *Options) GetOpenFilesCacher() Cacher {
if o == nil || o.OpenFilesCacher == nil {
return DefaultOpenFilesCacher
}
if o.OpenFilesCacher == NoCacher {
return nil
}
return o.OpenFilesCacher
}
func (o *Options) GetOpenFilesCacheCapacity() int {
if o == nil || o.OpenFilesCacheCapacity == 0 {
return DefaultOpenFilesCacheCapacity
} else if o.OpenFilesCacheCapacity < 0 {
return 0
}
return o.OpenFilesCacheCapacity
}
func (o *Options) GetReadOnly() bool {
if o == nil {
return false
}
return o.ReadOnly
}
func (o *Options) GetStrict(strict Strict) bool {
if o == nil || o.Strict == 0 {
return DefaultStrict&strict != 0
}
return o.Strict&strict != 0
}
func (o *Options) GetWriteBuffer() int {
if o == nil || o.WriteBuffer <= 0 {
return DefaultWriteBuffer
}
return o.WriteBuffer
}
func (o *Options) GetWriteL0PauseTrigger() int {
if o == nil || o.WriteL0PauseTrigger == 0 {
return DefaultWriteL0PauseTrigger
}
return o.WriteL0PauseTrigger
}
func (o *Options) GetWriteL0SlowdownTrigger() int {
if o == nil || o.WriteL0SlowdownTrigger == 0 {
return DefaultWriteL0SlowdownTrigger
}
return o.WriteL0SlowdownTrigger
}
// ReadOptions holds the optional parameters for 'read operation'. The
// 'read operation' includes Get, Find and NewIterator.
type ReadOptions struct {
// DontFillCache defines whether block reads for this 'read operation'
// should be cached. If false then the block will be cached. This does
// not affects already cached block.
//
// The default value is false.
DontFillCache bool
// Strict will be OR'ed with global DB 'strict level' unless StrictOverride
// is present. Currently only StrictReader that has effect here.
Strict Strict
}
func (ro *ReadOptions) GetDontFillCache() bool {
if ro == nil {
return false
}
return ro.DontFillCache
}
func (ro *ReadOptions) GetStrict(strict Strict) bool {
if ro == nil {
return false
}
return ro.Strict&strict != 0
}
// WriteOptions holds the optional parameters for 'write operation'. The
// 'write operation' includes Write, Put and Delete.
type WriteOptions struct {
// NoWriteMerge allows disabling write merge.
//
// The default is false.
NoWriteMerge bool
// Sync is whether to sync underlying writes from the OS buffer cache
// through to actual disk, if applicable. Setting Sync can result in
// slower writes.
//
// If false, and the machine crashes, then some recent writes may be lost.
// Note that if it is just the process that crashes (and the machine does
// not) then no writes will be lost.
//
// In other words, Sync being false has the same semantics as a write
// system call. Sync being true means write followed by fsync.
//
// The default value is false.
Sync bool
}
func (wo *WriteOptions) GetNoWriteMerge() bool {
if wo == nil {
return false
}
return wo.NoWriteMerge
}
func (wo *WriteOptions) GetSync() bool {
if wo == nil {
return false
}
return wo.Sync
}
func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
if ro.GetStrict(StrictOverride) {
return ro.GetStrict(strict)
} else {
return o.GetStrict(strict) || ro.GetStrict(strict)
}
}

View file

@ -1,107 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt"
)
func dupOptions(o *opt.Options) *opt.Options {
newo := &opt.Options{}
if o != nil {
*newo = *o
}
if newo.Strict == 0 {
newo.Strict = opt.DefaultStrict
}
return newo
}
func (s *session) setOptions(o *opt.Options) {
no := dupOptions(o)
// Alternative filters.
if filters := o.GetAltFilters(); len(filters) > 0 {
no.AltFilters = make([]filter.Filter, len(filters))
for i, filter := range filters {
no.AltFilters[i] = &iFilter{filter}
}
}
// Comparer.
s.icmp = &iComparer{o.GetComparer()}
no.Comparer = s.icmp
// Filter.
if filter := o.GetFilter(); filter != nil {
no.Filter = &iFilter{filter}
}
s.o = &cachedOptions{Options: no}
s.o.cache()
}
const optCachedLevel = 7
type cachedOptions struct {
*opt.Options
compactionExpandLimit []int
compactionGPOverlaps []int
compactionSourceLimit []int
compactionTableSize []int
compactionTotalSize []int64
}
func (co *cachedOptions) cache() {
co.compactionExpandLimit = make([]int, optCachedLevel)
co.compactionGPOverlaps = make([]int, optCachedLevel)
co.compactionSourceLimit = make([]int, optCachedLevel)
co.compactionTableSize = make([]int, optCachedLevel)
co.compactionTotalSize = make([]int64, optCachedLevel)
for level := 0; level < optCachedLevel; level++ {
co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
}
}
func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
if level < optCachedLevel {
return co.compactionExpandLimit[level]
}
return co.Options.GetCompactionExpandLimit(level)
}
func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
if level < optCachedLevel {
return co.compactionGPOverlaps[level]
}
return co.Options.GetCompactionGPOverlaps(level)
}
func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
if level < optCachedLevel {
return co.compactionSourceLimit[level]
}
return co.Options.GetCompactionSourceLimit(level)
}
func (co *cachedOptions) GetCompactionTableSize(level int) int {
if level < optCachedLevel {
return co.compactionTableSize[level]
}
return co.Options.GetCompactionTableSize(level)
}
func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
if level < optCachedLevel {
return co.compactionTotalSize[level]
}
return co.Options.GetCompactionTotalSize(level)
}

View file

@ -1,208 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"fmt"
"io"
"os"
"sync"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
)
// ErrManifestCorrupted records manifest corruption. This error will be
// wrapped with errors.ErrCorrupted.
type ErrManifestCorrupted struct {
Field string
Reason string
}
func (e *ErrManifestCorrupted) Error() string {
return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
}
func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
}
// session represent a persistent database session.
type session struct {
// Need 64-bit alignment.
stNextFileNum int64 // current unused file number
stJournalNum int64 // current journal file number; need external synchronization
stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
stTempFileNum int64
stSeqNum uint64 // last mem compacted seq; need external synchronization
stor storage.Storage
storLock storage.Locker
o *cachedOptions
icmp *iComparer
tops *tOps
manifest *journal.Writer
manifestWriter storage.Writer
manifestFd storage.FileDesc
stCompPtrs []internalKey // compaction pointers; need external synchronization
stVersion *version // current version
vmu sync.Mutex
}
// Creates new initialized session instance.
func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
if stor == nil {
return nil, os.ErrInvalid
}
storLock, err := stor.Lock()
if err != nil {
return
}
s = &session{
stor: stor,
storLock: storLock,
}
s.setOptions(o)
s.tops = newTableOps(s)
s.setVersion(newVersion(s))
s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
return
}
// Close session.
func (s *session) close() {
s.tops.close()
if s.manifest != nil {
s.manifest.Close()
}
if s.manifestWriter != nil {
s.manifestWriter.Close()
}
s.manifest = nil
s.manifestWriter = nil
s.setVersion(&version{s: s, closing: true})
}
// Release session lock.
func (s *session) release() {
s.storLock.Unlock()
}
// Create a new database session; need external synchronization.
func (s *session) create() error {
// create manifest
return s.newManifest(nil, nil)
}
// Recover a database session; need external synchronization.
func (s *session) recover() (err error) {
defer func() {
if os.IsNotExist(err) {
// Don't return os.ErrNotExist if the underlying storage contains
// other files that belong to LevelDB. So the DB won't get trashed.
if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
}
}
}()
fd, err := s.stor.GetMeta()
if err != nil {
return
}
reader, err := s.stor.Open(fd)
if err != nil {
return
}
defer reader.Close()
var (
// Options.
strict = s.o.GetStrict(opt.StrictManifest)
jr = journal.NewReader(reader, dropper{s, fd}, strict, true)
rec = &sessionRecord{}
staging = s.stVersion.newStaging()
)
for {
var r io.Reader
r, err = jr.Next()
if err != nil {
if err == io.EOF {
err = nil
break
}
return errors.SetFd(err, fd)
}
err = rec.decode(r)
if err == nil {
// save compact pointers
for _, r := range rec.compPtrs {
s.setCompPtr(r.level, internalKey(r.ikey))
}
// commit record to version staging
staging.commit(rec)
} else {
err = errors.SetFd(err, fd)
if strict || !errors.IsCorrupted(err) {
return
}
s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
}
rec.resetCompPtrs()
rec.resetAddedTables()
rec.resetDeletedTables()
}
switch {
case !rec.has(recComparer):
return newErrManifestCorrupted(fd, "comparer", "missing")
case rec.comparer != s.icmp.uName():
return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
case !rec.has(recNextFileNum):
return newErrManifestCorrupted(fd, "next-file-num", "missing")
case !rec.has(recJournalNum):
return newErrManifestCorrupted(fd, "journal-file-num", "missing")
case !rec.has(recSeqNum):
return newErrManifestCorrupted(fd, "seq-num", "missing")
}
s.manifestFd = fd
s.setVersion(staging.finish())
s.setNextFileNum(rec.nextFileNum)
s.recordCommited(rec)
return nil
}
// Commit session; need external synchronization.
func (s *session) commit(r *sessionRecord) (err error) {
v := s.version()
defer v.release()
// spawn new version based on current version
nv := v.spawn(r)
if s.manifest == nil {
// manifest journal writer not yet created, create one
err = s.newManifest(r, nv)
} else {
err = s.flushManifest(r)
}
// finally, apply new version if no error rise
if err == nil {
s.setVersion(nv)
}
return
}

View file

@ -1,302 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
v := s.version()
defer v.release()
return v.pickMemdbLevel(umin, umax, maxLevel)
}
func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
// Create sorted table.
iter := mdb.NewIterator(nil)
defer iter.Release()
t, n, err := s.tops.createFrom(iter)
if err != nil {
return 0, err
}
// Pick level other than zero can cause compaction issue with large
// bulk insert and delete on strictly incrementing key-space. The
// problem is that the small deletion markers trapped at lower level,
// while key/value entries keep growing at higher level. Since the
// key-space is strictly incrementing it will not overlaps with
// higher level, thus maximum possible level is always picked, while
// overlapping deletion marker pushed into lower level.
// See: https://github.com/syndtr/goleveldb/issues/127.
flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
rec.addTableFile(flushLevel, t)
s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
return flushLevel, nil
}
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version()
var sourceLevel int
var t0 tFiles
if v.cScore >= 1 {
sourceLevel = v.cLevel
cptr := s.getCompPtr(sourceLevel)
tables := v.levels[sourceLevel]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
t0 = append(t0, t)
break
}
}
if len(t0) == 0 {
t0 = append(t0, tables[0])
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
sourceLevel = ts.level
t0 = append(t0, ts.table)
} else {
v.release()
return nil
}
}
return newCompaction(s, v, sourceLevel, t0)
}
// Create compaction from given level and range; need external synchronization.
func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
v := s.version()
if sourceLevel >= len(v.levels) {
v.release()
return nil
}
t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
if len(t0) == 0 {
v.release()
return nil
}
// Avoid compacting too much in one shot in case the range is large.
// But we cannot do this for level-0 since level-0 files can overlap
// and we must not pick one file and drop another older file if the
// two files overlap.
if !noLimit && sourceLevel > 0 {
limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
total := int64(0)
for i, t := range t0 {
total += t.size
if total >= limit {
s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
t0 = t0[:i+1]
break
}
}
}
return newCompaction(s, v, sourceLevel, t0)
}
func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
c := &compaction{
s: s,
v: v,
sourceLevel: sourceLevel,
levels: [2]tFiles{t0, nil},
maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
tPtrs: make([]int, len(v.levels)),
}
c.expand()
c.save()
return c
}
// compaction represent a compaction state.
type compaction struct {
s *session
v *version
sourceLevel int
levels [2]tFiles
maxGPOverlaps int64
gp tFiles
gpi int
seenKey bool
gpOverlappedBytes int64
imin, imax internalKey
tPtrs []int
released bool
snapGPI int
snapSeenKey bool
snapGPOverlappedBytes int64
snapTPtrs []int
}
func (c *compaction) save() {
c.snapGPI = c.gpi
c.snapSeenKey = c.seenKey
c.snapGPOverlappedBytes = c.gpOverlappedBytes
c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
}
func (c *compaction) restore() {
c.gpi = c.snapGPI
c.seenKey = c.snapSeenKey
c.gpOverlappedBytes = c.snapGPOverlappedBytes
c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
}
func (c *compaction) release() {
if !c.released {
c.released = true
c.v.release()
}
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
vt0 := c.v.levels[c.sourceLevel]
vt1 := tFiles{}
if level := c.sourceLevel + 1; level < len(c.v.levels) {
vt1 = c.v.levels[level]
}
t0, t1 := c.levels[0], c.levels[1]
imin, imax := t0.getRange(c.s.icmp)
// We expand t0 here just incase ukey hop across tables.
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
if len(t0) != len(c.levels[0]) {
imin, imax = t0.getRange(c.s.icmp)
}
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
// See if we can grow the number of inputs in "sourceLevel" without
// changing the number of "sourceLevel+1" files we pick up.
if len(t1) > 0 {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
imin, imax = xmin, xmax
t0, t1 = exp0, exp1
amin, amax = append(t0, t1...).getRange(c.s.icmp)
}
}
}
// Compute the set of grandparent files that overlap this compaction
// (parent == sourceLevel+1; grandparent == sourceLevel+2)
if level := c.sourceLevel + 2; level < len(c.v.levels) {
c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.levels[0], c.levels[1] = t0, t1
c.imin, c.imax = imin, imax
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
func (c *compaction) baseLevelForKey(ukey []byte) bool {
for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
tables := c.v.levels[level]
for c.tPtrs[level] < len(tables) {
t := tables[c.tPtrs[level]]
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
// We've advanced far enough.
if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
// Key falls in this file's range, so definitely not base level.
return false
}
break
}
c.tPtrs[level]++
}
}
return true
}
func (c *compaction) shouldStopBefore(ikey internalKey) bool {
for ; c.gpi < len(c.gp); c.gpi++ {
gp := c.gp[c.gpi]
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
break
}
if c.seenKey {
c.gpOverlappedBytes += gp.size
}
}
c.seenKey = true
if c.gpOverlappedBytes > c.maxGPOverlaps {
// Too much overlap for current output; start new output.
c.gpOverlappedBytes = 0
return true
}
return false
}
// Creates an iterator.
func (c *compaction) newIterator() iterator.Iterator {
// Creates iterator slice.
icap := len(c.levels)
if c.sourceLevel == 0 {
// Special case for level-0.
icap = len(c.levels[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
// Options.
ro := &opt.ReadOptions{
DontFillCache: true,
Strict: opt.StrictOverride,
}
strict := c.s.o.GetStrict(opt.StrictCompaction)
if strict {
ro.Strict |= opt.StrictReader
}
for i, tables := range c.levels {
if len(tables) == 0 {
continue
}
// Level-0 is not sorted and may overlaps each other.
if c.sourceLevel+i == 0 {
for _, t := range tables {
its = append(its, c.s.tops.newIterator(t, nil, ro))
}
} else {
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
its = append(its, it)
}
}
return iterator.NewMergedIterator(its, c.s.icmp, strict)
}

View file

@ -1,323 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"bufio"
"encoding/binary"
"io"
"strings"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
)
type byteReader interface {
io.Reader
io.ByteReader
}
// These numbers are written to disk and should not be changed.
const (
recComparer = 1
recJournalNum = 2
recNextFileNum = 3
recSeqNum = 4
recCompPtr = 5
recDelTable = 6
recAddTable = 7
// 8 was used for large value refs
recPrevJournalNum = 9
)
type cpRecord struct {
level int
ikey internalKey
}
type atRecord struct {
level int
num int64
size int64
imin internalKey
imax internalKey
}
type dtRecord struct {
level int
num int64
}
type sessionRecord struct {
hasRec int
comparer string
journalNum int64
prevJournalNum int64
nextFileNum int64
seqNum uint64
compPtrs []cpRecord
addedTables []atRecord
deletedTables []dtRecord
scratch [binary.MaxVarintLen64]byte
err error
}
func (p *sessionRecord) has(rec int) bool {
return p.hasRec&(1<<uint(rec)) != 0
}
func (p *sessionRecord) setComparer(name string) {
p.hasRec |= 1 << recComparer
p.comparer = name
}
func (p *sessionRecord) setJournalNum(num int64) {
p.hasRec |= 1 << recJournalNum
p.journalNum = num
}
func (p *sessionRecord) setPrevJournalNum(num int64) {
p.hasRec |= 1 << recPrevJournalNum
p.prevJournalNum = num
}
func (p *sessionRecord) setNextFileNum(num int64) {
p.hasRec |= 1 << recNextFileNum
p.nextFileNum = num
}
func (p *sessionRecord) setSeqNum(num uint64) {
p.hasRec |= 1 << recSeqNum
p.seqNum = num
}
func (p *sessionRecord) addCompPtr(level int, ikey internalKey) {
p.hasRec |= 1 << recCompPtr
p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
}
func (p *sessionRecord) resetCompPtrs() {
p.hasRec &= ^(1 << recCompPtr)
p.compPtrs = p.compPtrs[:0]
}
func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) {
p.hasRec |= 1 << recAddTable
p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
}
func (p *sessionRecord) addTableFile(level int, t *tFile) {
p.addTable(level, t.fd.Num, t.size, t.imin, t.imax)
}
func (p *sessionRecord) resetAddedTables() {
p.hasRec &= ^(1 << recAddTable)
p.addedTables = p.addedTables[:0]
}
func (p *sessionRecord) delTable(level int, num int64) {
p.hasRec |= 1 << recDelTable
p.deletedTables = append(p.deletedTables, dtRecord{level, num})
}
func (p *sessionRecord) resetDeletedTables() {
p.hasRec &= ^(1 << recDelTable)
p.deletedTables = p.deletedTables[:0]
}
func (p *sessionRecord) putUvarint(w io.Writer, x uint64) {
if p.err != nil {
return
}
n := binary.PutUvarint(p.scratch[:], x)
_, p.err = w.Write(p.scratch[:n])
}
func (p *sessionRecord) putVarint(w io.Writer, x int64) {
if x < 0 {
panic("invalid negative value")
}
p.putUvarint(w, uint64(x))
}
func (p *sessionRecord) putBytes(w io.Writer, x []byte) {
if p.err != nil {
return
}
p.putUvarint(w, uint64(len(x)))
if p.err != nil {
return
}
_, p.err = w.Write(x)
}
func (p *sessionRecord) encode(w io.Writer) error {
p.err = nil
if p.has(recComparer) {
p.putUvarint(w, recComparer)
p.putBytes(w, []byte(p.comparer))
}
if p.has(recJournalNum) {
p.putUvarint(w, recJournalNum)
p.putVarint(w, p.journalNum)
}
if p.has(recNextFileNum) {
p.putUvarint(w, recNextFileNum)
p.putVarint(w, p.nextFileNum)
}
if p.has(recSeqNum) {
p.putUvarint(w, recSeqNum)
p.putUvarint(w, p.seqNum)
}
for _, r := range p.compPtrs {
p.putUvarint(w, recCompPtr)
p.putUvarint(w, uint64(r.level))
p.putBytes(w, r.ikey)
}
for _, r := range p.deletedTables {
p.putUvarint(w, recDelTable)
p.putUvarint(w, uint64(r.level))
p.putVarint(w, r.num)
}
for _, r := range p.addedTables {
p.putUvarint(w, recAddTable)
p.putUvarint(w, uint64(r.level))
p.putVarint(w, r.num)
p.putVarint(w, r.size)
p.putBytes(w, r.imin)
p.putBytes(w, r.imax)
}
return p.err
}
func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
if p.err != nil {
return 0
}
x, err := binary.ReadUvarint(r)
if err != nil {
if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
} else if strings.HasPrefix(err.Error(), "binary:") {
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()})
} else {
p.err = err
}
return 0
}
return x
}
func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
return p.readUvarintMayEOF(field, r, false)
}
func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 {
x := int64(p.readUvarintMayEOF(field, r, false))
if x < 0 {
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"})
}
return x
}
func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
if p.err != nil {
return nil
}
n := p.readUvarint(field, r)
if p.err != nil {
return nil
}
x := make([]byte, n)
_, p.err = io.ReadFull(r, x)
if p.err != nil {
if p.err == io.ErrUnexpectedEOF {
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
}
return nil
}
return x
}
func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
if p.err != nil {
return 0
}
x := p.readUvarint(field, r)
if p.err != nil {
return 0
}
return int(x)
}
func (p *sessionRecord) decode(r io.Reader) error {
br, ok := r.(byteReader)
if !ok {
br = bufio.NewReader(r)
}
p.err = nil
for p.err == nil {
rec := p.readUvarintMayEOF("field-header", br, true)
if p.err != nil {
if p.err == io.EOF {
return nil
}
return p.err
}
switch rec {
case recComparer:
x := p.readBytes("comparer", br)
if p.err == nil {
p.setComparer(string(x))
}
case recJournalNum:
x := p.readVarint("journal-num", br)
if p.err == nil {
p.setJournalNum(x)
}
case recPrevJournalNum:
x := p.readVarint("prev-journal-num", br)
if p.err == nil {
p.setPrevJournalNum(x)
}
case recNextFileNum:
x := p.readVarint("next-file-num", br)
if p.err == nil {
p.setNextFileNum(x)
}
case recSeqNum:
x := p.readUvarint("seq-num", br)
if p.err == nil {
p.setSeqNum(x)
}
case recCompPtr:
level := p.readLevel("comp-ptr.level", br)
ikey := p.readBytes("comp-ptr.ikey", br)
if p.err == nil {
p.addCompPtr(level, internalKey(ikey))
}
case recAddTable:
level := p.readLevel("add-table.level", br)
num := p.readVarint("add-table.num", br)
size := p.readVarint("add-table.size", br)
imin := p.readBytes("add-table.imin", br)
imax := p.readBytes("add-table.imax", br)
if p.err == nil {
p.addTable(level, num, size, imin, imax)
}
case recDelTable:
level := p.readLevel("del-table.level", br)
num := p.readVarint("del-table.num", br)
if p.err == nil {
p.delTable(level, num)
}
}
}
return p.err
}

View file

@ -1,258 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"fmt"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/storage"
)
// Logging.
type dropper struct {
s *session
fd storage.FileDesc
}
func (d dropper) Drop(err error) {
if e, ok := err.(*journal.ErrCorrupted); ok {
d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
} else {
d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
}
}
func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) }
func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
// File utils.
func (s *session) newTemp() storage.FileDesc {
num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
return storage.FileDesc{storage.TypeTemp, num}
}
// Session state.
// Get current version. This will incr version ref, must call
// version.release (exactly once) after use.
func (s *session) version() *version {
s.vmu.Lock()
defer s.vmu.Unlock()
s.stVersion.ref++
return s.stVersion
}
func (s *session) tLen(level int) int {
s.vmu.Lock()
defer s.vmu.Unlock()
return s.stVersion.tLen(level)
}
// Set current version to v.
func (s *session) setVersion(v *version) {
s.vmu.Lock()
v.ref = 1 // Holds by session.
if old := s.stVersion; old != nil {
v.ref++ // Holds by old version.
old.next = v
old.releaseNB()
}
s.stVersion = v
s.vmu.Unlock()
}
// Get current unused file number.
func (s *session) nextFileNum() int64 {
return atomic.LoadInt64(&s.stNextFileNum)
}
// Set current unused file number to num.
func (s *session) setNextFileNum(num int64) {
atomic.StoreInt64(&s.stNextFileNum, num)
}
// Mark file number as used.
func (s *session) markFileNum(num int64) {
nextFileNum := num + 1
for {
old, x := s.stNextFileNum, nextFileNum
if old > x {
x = old
}
if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
break
}
}
}
// Allocate a file number.
func (s *session) allocFileNum() int64 {
return atomic.AddInt64(&s.stNextFileNum, 1) - 1
}
// Reuse given file number.
func (s *session) reuseFileNum(num int64) {
for {
old, x := s.stNextFileNum, num
if old != x+1 {
x = old
}
if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
break
}
}
}
// Set compaction ptr at given level; need external synchronization.
func (s *session) setCompPtr(level int, ik internalKey) {
if level >= len(s.stCompPtrs) {
newCompPtrs := make([]internalKey, level+1)
copy(newCompPtrs, s.stCompPtrs)
s.stCompPtrs = newCompPtrs
}
s.stCompPtrs[level] = append(internalKey{}, ik...)
}
// Get compaction ptr at given level; need external synchronization.
func (s *session) getCompPtr(level int) internalKey {
if level >= len(s.stCompPtrs) {
return nil
}
return s.stCompPtrs[level]
}
// Manifest related utils.
// Fill given session record obj with current states; need external
// synchronization.
func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
r.setNextFileNum(s.nextFileNum())
if snapshot {
if !r.has(recJournalNum) {
r.setJournalNum(s.stJournalNum)
}
if !r.has(recSeqNum) {
r.setSeqNum(s.stSeqNum)
}
for level, ik := range s.stCompPtrs {
if ik != nil {
r.addCompPtr(level, ik)
}
}
r.setComparer(s.icmp.uName())
}
}
// Mark if record has been committed, this will update session state;
// need external synchronization.
func (s *session) recordCommited(rec *sessionRecord) {
if rec.has(recJournalNum) {
s.stJournalNum = rec.journalNum
}
if rec.has(recPrevJournalNum) {
s.stPrevJournalNum = rec.prevJournalNum
}
if rec.has(recSeqNum) {
s.stSeqNum = rec.seqNum
}
for _, r := range rec.compPtrs {
s.setCompPtr(r.level, internalKey(r.ikey))
}
}
// Create a new manifest file; need external synchronization.
func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
fd := storage.FileDesc{storage.TypeManifest, s.allocFileNum()}
writer, err := s.stor.Create(fd)
if err != nil {
return
}
jw := journal.NewWriter(writer)
if v == nil {
v = s.version()
defer v.release()
}
if rec == nil {
rec = &sessionRecord{}
}
s.fillRecord(rec, true)
v.fillRecord(rec)
defer func() {
if err == nil {
s.recordCommited(rec)
if s.manifest != nil {
s.manifest.Close()
}
if s.manifestWriter != nil {
s.manifestWriter.Close()
}
if !s.manifestFd.Zero() {
s.stor.Remove(s.manifestFd)
}
s.manifestFd = fd
s.manifestWriter = writer
s.manifest = jw
} else {
writer.Close()
s.stor.Remove(fd)
s.reuseFileNum(fd.Num)
}
}()
w, err := jw.Next()
if err != nil {
return
}
err = rec.encode(w)
if err != nil {
return
}
err = jw.Flush()
if err != nil {
return
}
err = s.stor.SetMeta(fd)
return
}
// Flush record to disk.
func (s *session) flushManifest(rec *sessionRecord) (err error) {
s.fillRecord(rec, false)
w, err := s.manifest.Next()
if err != nil {
return
}
err = rec.encode(w)
if err != nil {
return
}
err = s.manifest.Flush()
if err != nil {
return
}
if !s.o.GetNoSync() {
err = s.manifestWriter.Sync()
if err != nil {
return
}
}
s.recordCommited(rec)
return
}

View file

@ -1,583 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reservefs.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package storage
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
)
var (
errFileOpen = errors.New("leveldb/storage: file still open")
errReadOnly = errors.New("leveldb/storage: storage is read-only")
)
type fileLock interface {
release() error
}
type fileStorageLock struct {
fs *fileStorage
}
func (lock *fileStorageLock) Unlock() {
if lock.fs != nil {
lock.fs.mu.Lock()
defer lock.fs.mu.Unlock()
if lock.fs.slock == lock {
lock.fs.slock = nil
}
}
}
const logSizeThreshold = 1024 * 1024 // 1 MiB
// fileStorage is a file-system backed storage.
type fileStorage struct {
path string
readOnly bool
mu sync.Mutex
flock fileLock
slock *fileStorageLock
logw *os.File
logSize int64
buf []byte
// Opened file counter; if open < 0 means closed.
open int
day int
}
// OpenFile returns a new filesytem-backed storage implementation with the given
// path. This also acquire a file lock, so any subsequent attempt to open the
// same path will fail.
//
// The storage must be closed after use, by calling Close method.
func OpenFile(path string, readOnly bool) (Storage, error) {
if fi, err := os.Stat(path); err == nil {
if !fi.IsDir() {
return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
}
} else if os.IsNotExist(err) && !readOnly {
if err := os.MkdirAll(path, 0755); err != nil {
return nil, err
}
} else {
return nil, err
}
flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
flock.release()
}
}()
var (
logw *os.File
logSize int64
)
if !readOnly {
logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, err
}
logSize, err = logw.Seek(0, os.SEEK_END)
if err != nil {
logw.Close()
return nil, err
}
}
fs := &fileStorage{
path: path,
readOnly: readOnly,
flock: flock,
logw: logw,
logSize: logSize,
}
runtime.SetFinalizer(fs, (*fileStorage).Close)
return fs, nil
}
func (fs *fileStorage) Lock() (Locker, error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
if fs.readOnly {
return &fileStorageLock{}, nil
}
if fs.slock != nil {
return nil, ErrLocked
}
fs.slock = &fileStorageLock{fs: fs}
return fs.slock, nil
}
func itoa(buf []byte, i int, wid int) []byte {
u := uint(i)
if u == 0 && wid <= 1 {
return append(buf, '0')
}
// Assemble decimal in reverse order.
var b [32]byte
bp := len(b)
for ; u > 0 || wid > 0; u /= 10 {
bp--
wid--
b[bp] = byte(u%10) + '0'
}
return append(buf, b[bp:]...)
}
func (fs *fileStorage) printDay(t time.Time) {
if fs.day == t.Day() {
return
}
fs.day = t.Day()
fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n"))
}
func (fs *fileStorage) doLog(t time.Time, str string) {
if fs.logSize > logSizeThreshold {
// Rotate log file.
fs.logw.Close()
fs.logw = nil
fs.logSize = 0
rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
}
if fs.logw == nil {
var err error
fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return
}
// Force printDay on new log file.
fs.day = 0
}
fs.printDay(t)
hour, min, sec := t.Clock()
msec := t.Nanosecond() / 1e3
// time
fs.buf = itoa(fs.buf[:0], hour, 2)
fs.buf = append(fs.buf, ':')
fs.buf = itoa(fs.buf, min, 2)
fs.buf = append(fs.buf, ':')
fs.buf = itoa(fs.buf, sec, 2)
fs.buf = append(fs.buf, '.')
fs.buf = itoa(fs.buf, msec, 6)
fs.buf = append(fs.buf, ' ')
// write
fs.buf = append(fs.buf, []byte(str)...)
fs.buf = append(fs.buf, '\n')
fs.logw.Write(fs.buf)
}
func (fs *fileStorage) Log(str string) {
if !fs.readOnly {
t := time.Now()
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return
}
fs.doLog(t, str)
}
}
func (fs *fileStorage) log(str string) {
if !fs.readOnly {
fs.doLog(time.Now(), str)
}
}
func (fs *fileStorage) SetMeta(fd FileDesc) (err error) {
if !FileDescOk(fd) {
return ErrInvalidFile
}
if fs.readOnly {
return errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
defer func() {
if err != nil {
fs.log(fmt.Sprintf("CURRENT: %v", err))
}
}()
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return
}
_, err = fmt.Fprintln(w, fsGenName(fd))
// Close the file first.
if cerr := w.Close(); cerr != nil {
fs.log(fmt.Sprintf("close CURRENT.%d: %v", fd.Num, cerr))
}
if err != nil {
return
}
return rename(path, filepath.Join(fs.path, "CURRENT"))
}
func (fs *fileStorage) GetMeta() (fd FileDesc, err error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return FileDesc{}, ErrClosed
}
dir, err := os.Open(fs.path)
if err != nil {
return
}
names, err := dir.Readdirnames(0)
// Close the dir first before checking for Readdirnames error.
if ce := dir.Close(); ce != nil {
fs.log(fmt.Sprintf("close dir: %v", ce))
}
if err != nil {
return
}
// Find latest CURRENT file.
var rem []string
var pend bool
var cerr error
for _, name := range names {
if strings.HasPrefix(name, "CURRENT") {
pend1 := len(name) > 7
var pendNum int64
// Make sure it is valid name for a CURRENT file, otherwise skip it.
if pend1 {
if name[7] != '.' || len(name) < 9 {
fs.log(fmt.Sprintf("skipping %s: invalid file name", name))
continue
}
var e1 error
if pendNum, e1 = strconv.ParseInt(name[8:], 10, 0); e1 != nil {
fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", name, e1))
continue
}
}
path := filepath.Join(fs.path, name)
r, e1 := os.OpenFile(path, os.O_RDONLY, 0)
if e1 != nil {
return FileDesc{}, e1
}
b, e1 := ioutil.ReadAll(r)
if e1 != nil {
r.Close()
return FileDesc{}, e1
}
var fd1 FileDesc
if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd1) {
fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", name))
if pend1 {
rem = append(rem, name)
}
if !pend1 || cerr == nil {
metaFd, _ := fsParseName(name)
cerr = &ErrCorrupted{
Fd: metaFd,
Err: errors.New("leveldb/storage: corrupted or incomplete meta file"),
}
}
} else if pend1 && pendNum != fd1.Num {
fs.log(fmt.Sprintf("skipping %s: inconsistent pending-file num: %d vs %d", name, pendNum, fd1.Num))
rem = append(rem, name)
} else if fd1.Num < fd.Num {
fs.log(fmt.Sprintf("skipping %s: obsolete", name))
if pend1 {
rem = append(rem, name)
}
} else {
fd = fd1
pend = pend1
}
if err := r.Close(); err != nil {
fs.log(fmt.Sprintf("close %s: %v", name, err))
}
}
}
// Don't remove any files if there is no valid CURRENT file.
if fd.Zero() {
if cerr != nil {
err = cerr
} else {
err = os.ErrNotExist
}
return
}
if !fs.readOnly {
// Rename pending CURRENT file to an effective CURRENT.
if pend {
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil {
fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", fd.Num, err))
}
}
// Remove obsolete or incomplete pending CURRENT files.
for _, name := range rem {
path := filepath.Join(fs.path, name)
if err := os.Remove(path); err != nil {
fs.log(fmt.Sprintf("remove %s: %v", name, err))
}
}
}
return
}
func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
dir, err := os.Open(fs.path)
if err != nil {
return
}
names, err := dir.Readdirnames(0)
// Close the dir first before checking for Readdirnames error.
if cerr := dir.Close(); cerr != nil {
fs.log(fmt.Sprintf("close dir: %v", cerr))
}
if err == nil {
for _, name := range names {
if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
fds = append(fds, fd)
}
}
}
return
}
func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
if err != nil {
if fsHasOldName(fd) && os.IsNotExist(err) {
of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
if err == nil {
goto ok
}
}
return nil, err
}
ok:
fs.open++
return &fileWrap{File: of, fs: fs, fd: fd}, nil
}
func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
if fs.readOnly {
return nil, errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return nil, err
}
fs.open++
return &fileWrap{File: of, fs: fs, fd: fd}, nil
}
func (fs *fileStorage) Remove(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
if fs.readOnly {
return errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
if err != nil {
if fsHasOldName(fd) && os.IsNotExist(err) {
if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
err = e1
}
} else {
fs.log(fmt.Sprintf("remove %s: %v", fd, err))
}
}
return err
}
func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
if !FileDescOk(oldfd) || !FileDescOk(newfd) {
return ErrInvalidFile
}
if oldfd == newfd {
return nil
}
if fs.readOnly {
return errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
}
func (fs *fileStorage) Close() error {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
// Clear the finalizer.
runtime.SetFinalizer(fs, nil)
if fs.open > 0 {
fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
}
fs.open = -1
if fs.logw != nil {
fs.logw.Close()
}
return fs.flock.release()
}
type fileWrap struct {
*os.File
fs *fileStorage
fd FileDesc
closed bool
}
func (fw *fileWrap) Sync() error {
if err := fw.File.Sync(); err != nil {
return err
}
if fw.fd.Type == TypeManifest {
// Also sync parent directory if file type is manifest.
// See: https://code.google.com/p/leveldb/issues/detail?id=190.
if err := syncDir(fw.fs.path); err != nil {
fw.fs.log(fmt.Sprintf("syncDir: %v", err))
return err
}
}
return nil
}
func (fw *fileWrap) Close() error {
fw.fs.mu.Lock()
defer fw.fs.mu.Unlock()
if fw.closed {
return ErrClosed
}
fw.closed = true
fw.fs.open--
err := fw.File.Close()
if err != nil {
fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
}
return err
}
func fsGenName(fd FileDesc) string {
switch fd.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", fd.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", fd.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", fd.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", fd.Num)
default:
panic("invalid file type")
}
}
func fsHasOldName(fd FileDesc) bool {
return fd.Type == TypeTable
}
func fsGenOldName(fd FileDesc) string {
switch fd.Type {
case TypeTable:
return fmt.Sprintf("%06d.sst", fd.Num)
}
return fsGenName(fd)
}
func fsParseName(name string) (fd FileDesc, ok bool) {
var tail string
_, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
if err == nil {
switch tail {
case "log":
fd.Type = TypeJournal
case "ldb", "sst":
fd.Type = TypeTable
case "tmp":
fd.Type = TypeTemp
default:
return
}
return fd, true
}
n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
if n == 1 {
fd.Type = TypeManifest
return fd, true
}
return
}
func fsParseNamePtr(name string, fd *FileDesc) bool {
_fd, ok := fsParseName(name)
if fd != nil {
*fd = _fd
}
return ok
}

View file

@ -1,34 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build nacl
package storage
import (
"os"
"syscall"
)
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
return nil, syscall.ENOTSUP
}
func setFileLock(f *os.File, readOnly, lock bool) error {
return syscall.ENOTSUP
}
func rename(oldpath, newpath string) error {
return syscall.ENOTSUP
}
func isErrInvalid(err error) bool {
return false
}
func syncDir(name string) error {
return syscall.ENOTSUP
}

View file

@ -1,65 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package storage
import (
"os"
"path/filepath"
)
type plan9FileLock struct {
f *os.File
}
func (fl *plan9FileLock) release() error {
return fl.f.Close()
}
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var (
flag int
perm os.FileMode
)
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
perm = os.ModeExclusive
}
f, err := os.OpenFile(path, flag, perm)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
}
if err != nil {
return
}
fl = &plan9FileLock{f: f}
return
}
func rename(oldpath, newpath string) error {
if _, err := os.Stat(newpath); err == nil {
if err := os.Remove(newpath); err != nil {
return err
}
}
_, fname := filepath.Split(newpath)
return os.Rename(oldpath, fname)
}
func syncDir(name string) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
if err := f.Sync(); err != nil {
return err
}
return nil
}

View file

@ -1,81 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build solaris
package storage
import (
"os"
"syscall"
)
type unixFileLock struct {
f *os.File
}
func (fl *unixFileLock) release() error {
if err := setFileLock(fl.f, false, false); err != nil {
return err
}
return fl.f.Close()
}
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var flag int
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
}
f, err := os.OpenFile(path, flag, 0)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
}
if err != nil {
return
}
err = setFileLock(f, readOnly, true)
if err != nil {
f.Close()
return
}
fl = &unixFileLock{f: f}
return
}
func setFileLock(f *os.File, readOnly, lock bool) error {
flock := syscall.Flock_t{
Type: syscall.F_UNLCK,
Start: 0,
Len: 0,
Whence: 1,
}
if lock {
if readOnly {
flock.Type = syscall.F_RDLCK
} else {
flock.Type = syscall.F_WRLCK
}
}
return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
}
func rename(oldpath, newpath string) error {
return os.Rename(oldpath, newpath)
}
func syncDir(name string) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
if err := f.Sync(); err != nil {
return err
}
return nil
}

View file

@ -1,86 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build darwin dragonfly freebsd linux netbsd openbsd
package storage
import (
"os"
"syscall"
)
type unixFileLock struct {
f *os.File
}
func (fl *unixFileLock) release() error {
if err := setFileLock(fl.f, false, false); err != nil {
return err
}
return fl.f.Close()
}
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var flag int
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
}
f, err := os.OpenFile(path, flag, 0)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
}
if err != nil {
return
}
err = setFileLock(f, readOnly, true)
if err != nil {
f.Close()
return
}
fl = &unixFileLock{f: f}
return
}
func setFileLock(f *os.File, readOnly, lock bool) error {
how := syscall.LOCK_UN
if lock {
if readOnly {
how = syscall.LOCK_SH
} else {
how = syscall.LOCK_EX
}
}
return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
}
func rename(oldpath, newpath string) error {
return os.Rename(oldpath, newpath)
}
func isErrInvalid(err error) bool {
if err == os.ErrInvalid {
return true
}
if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
return true
}
return false
}
func syncDir(name string) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
if err := f.Sync(); err != nil && !isErrInvalid(err) {
return err
}
return nil
}

View file

@ -1,78 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package storage
import (
"syscall"
"unsafe"
)
var (
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procMoveFileExW = modkernel32.NewProc("MoveFileExW")
)
const (
_MOVEFILE_REPLACE_EXISTING = 1
)
type windowsFileLock struct {
fd syscall.Handle
}
func (fl *windowsFileLock) release() error {
return syscall.Close(fl.fd)
}
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return
}
var access, shareMode uint32
if readOnly {
access = syscall.GENERIC_READ
shareMode = syscall.FILE_SHARE_READ
} else {
access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
}
fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err == syscall.ERROR_FILE_NOT_FOUND {
fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
}
if err != nil {
return
}
fl = &windowsFileLock{fd: fd}
return
}
func moveFileEx(from *uint16, to *uint16, flags uint32) error {
r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
if r1 == 0 {
if e1 != 0 {
return error(e1)
}
return syscall.EINVAL
}
return nil
}
func rename(oldpath, newpath string) error {
from, err := syscall.UTF16PtrFromString(oldpath)
if err != nil {
return err
}
to, err := syscall.UTF16PtrFromString(newpath)
if err != nil {
return err
}
return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING)
}
func syncDir(name string) error { return nil }

View file

@ -1,218 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package storage
import (
"bytes"
"os"
"sync"
)
const typeShift = 3
type memStorageLock struct {
ms *memStorage
}
func (lock *memStorageLock) Unlock() {
ms := lock.ms
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.slock == lock {
ms.slock = nil
}
return
}
// memStorage is a memory-backed storage.
type memStorage struct {
mu sync.Mutex
slock *memStorageLock
files map[uint64]*memFile
meta FileDesc
}
// NewMemStorage returns a new memory-backed storage implementation.
func NewMemStorage() Storage {
return &memStorage{
files: make(map[uint64]*memFile),
}
}
func (ms *memStorage) Lock() (Locker, error) {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.slock != nil {
return nil, ErrLocked
}
ms.slock = &memStorageLock{ms: ms}
return ms.slock, nil
}
func (*memStorage) Log(str string) {}
func (ms *memStorage) SetMeta(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
ms.mu.Lock()
ms.meta = fd
ms.mu.Unlock()
return nil
}
func (ms *memStorage) GetMeta() (FileDesc, error) {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.meta.Zero() {
return FileDesc{}, os.ErrNotExist
}
return ms.meta, nil
}
func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
ms.mu.Lock()
var fds []FileDesc
for x := range ms.files {
fd := unpackFile(x)
if fd.Type&ft != 0 {
fds = append(fds, fd)
}
}
ms.mu.Unlock()
return fds, nil
}
func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
ms.mu.Lock()
defer ms.mu.Unlock()
if m, exist := ms.files[packFile(fd)]; exist {
if m.open {
return nil, errFileOpen
}
m.open = true
return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
}
return nil, os.ErrNotExist
}
func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
x := packFile(fd)
ms.mu.Lock()
defer ms.mu.Unlock()
m, exist := ms.files[x]
if exist {
if m.open {
return nil, errFileOpen
}
m.Reset()
} else {
m = &memFile{}
ms.files[x] = m
}
m.open = true
return &memWriter{memFile: m, ms: ms}, nil
}
func (ms *memStorage) Remove(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
x := packFile(fd)
ms.mu.Lock()
defer ms.mu.Unlock()
if _, exist := ms.files[x]; exist {
delete(ms.files, x)
return nil
}
return os.ErrNotExist
}
func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
if FileDescOk(oldfd) || FileDescOk(newfd) {
return ErrInvalidFile
}
if oldfd == newfd {
return nil
}
oldx := packFile(oldfd)
newx := packFile(newfd)
ms.mu.Lock()
defer ms.mu.Unlock()
oldm, exist := ms.files[oldx]
if !exist {
return os.ErrNotExist
}
newm, exist := ms.files[newx]
if (exist && newm.open) || oldm.open {
return errFileOpen
}
delete(ms.files, oldx)
ms.files[newx] = oldm
return nil
}
func (*memStorage) Close() error { return nil }
type memFile struct {
bytes.Buffer
open bool
}
type memReader struct {
*bytes.Reader
ms *memStorage
m *memFile
closed bool
}
func (mr *memReader) Close() error {
mr.ms.mu.Lock()
defer mr.ms.mu.Unlock()
if mr.closed {
return ErrClosed
}
mr.m.open = false
return nil
}
type memWriter struct {
*memFile
ms *memStorage
closed bool
}
func (*memWriter) Sync() error { return nil }
func (mw *memWriter) Close() error {
mw.ms.mu.Lock()
defer mw.ms.mu.Unlock()
if mw.closed {
return ErrClosed
}
mw.memFile.open = false
return nil
}
func packFile(fd FileDesc) uint64 {
return uint64(fd.Num)<<typeShift | uint64(fd.Type)
}
func unpackFile(x uint64) FileDesc {
return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)}
}

View file

@ -1,179 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package storage provides storage abstraction for LevelDB.
package storage
import (
"errors"
"fmt"
"io"
)
// FileType represent a file type.
type FileType int
// File types.
const (
TypeManifest FileType = 1 << iota
TypeJournal
TypeTable
TypeTemp
TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp
)
func (t FileType) String() string {
switch t {
case TypeManifest:
return "manifest"
case TypeJournal:
return "journal"
case TypeTable:
return "table"
case TypeTemp:
return "temp"
}
return fmt.Sprintf("<unknown:%d>", t)
}
// Common error.
var (
ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument")
ErrLocked = errors.New("leveldb/storage: already locked")
ErrClosed = errors.New("leveldb/storage: closed")
)
// ErrCorrupted is the type that wraps errors that indicate corruption of
// a file. Package storage has its own type instead of using
// errors.ErrCorrupted to prevent circular import.
type ErrCorrupted struct {
Fd FileDesc
Err error
}
func (e *ErrCorrupted) Error() string {
if !e.Fd.Zero() {
return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
}
return e.Err.Error()
}
// Syncer is the interface that wraps basic Sync method.
type Syncer interface {
// Sync commits the current contents of the file to stable storage.
Sync() error
}
// Reader is the interface that groups the basic Read, Seek, ReadAt and Close
// methods.
type Reader interface {
io.ReadSeeker
io.ReaderAt
io.Closer
}
// Writer is the interface that groups the basic Write, Sync and Close
// methods.
type Writer interface {
io.WriteCloser
Syncer
}
// Locker is the interface that wraps Unlock method.
type Locker interface {
Unlock()
}
// FileDesc is a 'file descriptor'.
type FileDesc struct {
Type FileType
Num int64
}
func (fd FileDesc) String() string {
switch fd.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", fd.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", fd.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", fd.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", fd.Num)
default:
return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
}
}
// Zero returns true if fd == (FileDesc{}).
func (fd FileDesc) Zero() bool {
return fd == (FileDesc{})
}
// FileDescOk returns true if fd is a valid 'file descriptor'.
func FileDescOk(fd FileDesc) bool {
switch fd.Type {
case TypeManifest:
case TypeJournal:
case TypeTable:
case TypeTemp:
default:
return false
}
return fd.Num >= 0
}
// Storage is the storage. A storage instance must be safe for concurrent use.
type Storage interface {
// Lock locks the storage. Any subsequent attempt to call Lock will fail
// until the last lock released.
// Caller should call Unlock method after use.
Lock() (Locker, error)
// Log logs a string. This is used for logging.
// An implementation may write to a file, stdout or simply do nothing.
Log(str string)
// SetMeta store 'file descriptor' that can later be acquired using GetMeta
// method. The 'file descriptor' should point to a valid file.
// SetMeta should be implemented in such way that changes should happen
// atomically.
SetMeta(fd FileDesc) error
// GetMeta returns 'file descriptor' stored in meta. The 'file descriptor'
// can be updated using SetMeta method.
// Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or
// 'file descriptor' point to nonexistent file.
GetMeta() (FileDesc, error)
// List returns file descriptors that match the given file types.
// The file types may be OR'ed together.
List(ft FileType) ([]FileDesc, error)
// Open opens file with the given 'file descriptor' read-only.
// Returns os.ErrNotExist error if the file does not exist.
// Returns ErrClosed if the underlying storage is closed.
Open(fd FileDesc) (Reader, error)
// Create creates file with the given 'file descriptor', truncate if already
// exist and opens write-only.
// Returns ErrClosed if the underlying storage is closed.
Create(fd FileDesc) (Writer, error)
// Remove removes file with the given 'file descriptor'.
// Returns ErrClosed if the underlying storage is closed.
Remove(fd FileDesc) error
// Rename renames file from oldfd to newfd.
// Returns ErrClosed if the underlying storage is closed.
Rename(oldfd, newfd FileDesc) error
// Close closes the storage.
// It is valid to call Close multiple times. Other methods should not be
// called after the storage has been closed.
Close() error
}

View file

@ -1,529 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"fmt"
"sort"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/table"
"github.com/syndtr/goleveldb/leveldb/util"
)
// tFile holds basic information about a table.
type tFile struct {
fd storage.FileDesc
seekLeft int32
size int64
imin, imax internalKey
}
// Returns true if given key is after largest key of this table.
func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
}
// Returns true if given key is before smallest key of this table.
func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
}
// Returns true if given key range overlaps with this table key range.
func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
return !t.after(icmp, umin) && !t.before(icmp, umax)
}
// Cosumes one seek and return current seeks left.
func (t *tFile) consumeSeek() int32 {
return atomic.AddInt32(&t.seekLeft, -1)
}
// Creates new tFile.
func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
f := &tFile{
fd: fd,
size: size,
imin: imin,
imax: imax,
}
// We arrange to automatically compact this file after
// a certain number of seeks. Let's assume:
// (1) One seek costs 10ms
// (2) Writing or reading 1MB costs 10ms (100MB/s)
// (3) A compaction of 1MB does 25MB of IO:
// 1MB read from this level
// 10-12MB read from next level (boundaries may be misaligned)
// 10-12MB written to next level
// This implies that 25 seeks cost the same as the compaction
// of 1MB of data. I.e., one seek costs approximately the
// same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction.
f.seekLeft = int32(size / 16384)
if f.seekLeft < 100 {
f.seekLeft = 100
}
return f
}
func tableFileFromRecord(r atRecord) *tFile {
return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
}
// tFiles hold multiple tFile.
type tFiles []*tFile
func (tf tFiles) Len() int { return len(tf) }
func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf tFiles) nums() string {
x := "[ "
for i, f := range tf {
if i != 0 {
x += ", "
}
x += fmt.Sprint(f.fd.Num)
}
x += " ]"
return x
}
// Returns true if i smallest key is less than j.
// This used for sort by key in ascending order.
func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
a, b := tf[i], tf[j]
n := icmp.Compare(a.imin, b.imin)
if n == 0 {
return a.fd.Num < b.fd.Num
}
return n < 0
}
// Returns true if i file number is greater than j.
// This used for sort by file number in descending order.
func (tf tFiles) lessByNum(i, j int) bool {
return tf[i].fd.Num > tf[j].fd.Num
}
// Sorts tables by key in ascending order.
func (tf tFiles) sortByKey(icmp *iComparer) {
sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
}
// Sorts tables by file number in descending order.
func (tf tFiles) sortByNum() {
sort.Sort(&tFilesSortByNum{tFiles: tf})
}
// Returns sum of all tables size.
func (tf tFiles) size() (sum int64) {
for _, t := range tf {
sum += t.size
}
return sum
}
// Searches smallest index of tables whose its smallest
// key is after or equal with given key.
func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
return sort.Search(len(tf), func(i int) bool {
return icmp.Compare(tf[i].imin, ikey) >= 0
})
}
// Searches smallest index of tables whose its largest
// key is after or equal with given key.
func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
return sort.Search(len(tf), func(i int) bool {
return icmp.Compare(tf[i].imax, ikey) >= 0
})
}
// Returns true if given key range overlaps with one or more
// tables key range. If unsorted is true then binary search will not be used.
func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
if unsorted {
// Check against all files.
for _, t := range tf {
if t.overlaps(icmp, umin, umax) {
return true
}
}
return false
}
i := 0
if len(umin) > 0 {
// Find the earliest possible internal key for min.
i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
}
if i >= len(tf) {
// Beginning of range is after all files, so no overlap.
return false
}
return !tf[i].before(icmp, umax)
}
// Returns tables whose its key range overlaps with given key range.
// Range will be expanded if ukey found hop across tables.
// If overlapped is true then the search will be restarted if umax
// expanded.
// The dst content will be overwritten.
func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
dst = dst[:0]
for i := 0; i < len(tf); {
t := tf[i]
if t.overlaps(icmp, umin, umax) {
if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
umin = t.imin.ukey()
dst = dst[:0]
i = 0
continue
} else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
umax = t.imax.ukey()
// Restart search if it is overlapped.
if overlapped {
dst = dst[:0]
i = 0
continue
}
}
dst = append(dst, t)
}
i++
}
return dst
}
// Returns tables key range.
func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
for i, t := range tf {
if i == 0 {
imin, imax = t.imin, t.imax
continue
}
if icmp.Compare(t.imin, imin) < 0 {
imin = t.imin
}
if icmp.Compare(t.imax, imax) > 0 {
imax = t.imax
}
}
return
}
// Creates iterator index from tables.
func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
if slice != nil {
var start, limit int
if slice.Start != nil {
start = tf.searchMax(icmp, internalKey(slice.Start))
}
if slice.Limit != nil {
limit = tf.searchMin(icmp, internalKey(slice.Limit))
} else {
limit = tf.Len()
}
tf = tf[start:limit]
}
return iterator.NewArrayIndexer(&tFilesArrayIndexer{
tFiles: tf,
tops: tops,
icmp: icmp,
slice: slice,
ro: ro,
})
}
// Tables iterator index.
type tFilesArrayIndexer struct {
tFiles
tops *tOps
icmp *iComparer
slice *util.Range
ro *opt.ReadOptions
}
func (a *tFilesArrayIndexer) Search(key []byte) int {
return a.searchMax(a.icmp, internalKey(key))
}
func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
if i == 0 || i == a.Len()-1 {
return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
}
return a.tops.newIterator(a.tFiles[i], nil, a.ro)
}
// Helper type for sortByKey.
type tFilesSortByKey struct {
tFiles
icmp *iComparer
}
func (x *tFilesSortByKey) Less(i, j int) bool {
return x.lessByKey(x.icmp, i, j)
}
// Helper type for sortByNum.
type tFilesSortByNum struct {
tFiles
}
func (x *tFilesSortByNum) Less(i, j int) bool {
return x.lessByNum(i, j)
}
// Table operations.
type tOps struct {
s *session
noSync bool
cache *cache.Cache
bcache *cache.Cache
bpool *util.BufferPool
}
// Creates an empty table and returns table writer.
func (t *tOps) create() (*tWriter, error) {
fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
fw, err := t.s.stor.Create(fd)
if err != nil {
return nil, err
}
return &tWriter{
t: t,
fd: fd,
w: fw,
tw: table.NewWriter(fw, t.s.o.Options),
}, nil
}
// Builds table from src iterator.
func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
w, err := t.create()
if err != nil {
return
}
defer func() {
if err != nil {
w.drop()
}
}()
for src.Next() {
err = w.append(src.Key(), src.Value())
if err != nil {
return
}
}
err = src.Error()
if err != nil {
return
}
n = w.tw.EntriesLen()
f, err = w.finish()
return
}
// Opens table. It returns a cache handle, which should
// be released after use.
func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
var r storage.Reader
r, err = t.s.stor.Open(f.fd)
if err != nil {
return 0, nil
}
var bcache *cache.NamespaceGetter
if t.bcache != nil {
bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
}
var tr *table.Reader
tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
if err != nil {
r.Close()
return 0, nil
}
return 1, tr
})
if ch == nil && err == nil {
err = ErrClosed
}
return
}
// Finds key/value pair whose key is greater than or equal to the
// given key.
func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
ch, err := t.open(f)
if err != nil {
return nil, nil, err
}
defer ch.Release()
return ch.Value().(*table.Reader).Find(key, true, ro)
}
// Finds key that is greater than or equal to the given key.
func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
ch, err := t.open(f)
if err != nil {
return nil, err
}
defer ch.Release()
return ch.Value().(*table.Reader).FindKey(key, true, ro)
}
// Returns approximate offset of the given key.
func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
ch, err := t.open(f)
if err != nil {
return
}
defer ch.Release()
return ch.Value().(*table.Reader).OffsetOf(key)
}
// Creates an iterator from the given table.
func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
ch, err := t.open(f)
if err != nil {
return iterator.NewEmptyIterator(err)
}
iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
iter.SetReleaser(ch)
return iter
}
// Removes table from persistent storage. It waits until
// no one use the the table.
func (t *tOps) remove(f *tFile) {
t.cache.Delete(0, uint64(f.fd.Num), func() {
if err := t.s.stor.Remove(f.fd); err != nil {
t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
} else {
t.s.logf("table@remove removed @%d", f.fd.Num)
}
if t.bcache != nil {
t.bcache.EvictNS(uint64(f.fd.Num))
}
})
}
// Closes the table ops instance. It will close all tables,
// regadless still used or not.
func (t *tOps) close() {
t.bpool.Close()
t.cache.Close()
if t.bcache != nil {
t.bcache.CloseWeak()
}
}
// Creates new initialized table ops instance.
func newTableOps(s *session) *tOps {
var (
cacher cache.Cacher
bcache *cache.Cache
bpool *util.BufferPool
)
if s.o.GetOpenFilesCacheCapacity() > 0 {
cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
}
if !s.o.GetDisableBlockCache() {
var bcacher cache.Cacher
if s.o.GetBlockCacheCapacity() > 0 {
bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
}
bcache = cache.NewCache(bcacher)
}
if !s.o.GetDisableBufferPool() {
bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
}
return &tOps{
s: s,
noSync: s.o.GetNoSync(),
cache: cache.NewCache(cacher),
bcache: bcache,
bpool: bpool,
}
}
// tWriter wraps the table writer. It keep track of file descriptor
// and added key range.
type tWriter struct {
t *tOps
fd storage.FileDesc
w storage.Writer
tw *table.Writer
first, last []byte
}
// Append key/value pair to the table.
func (w *tWriter) append(key, value []byte) error {
if w.first == nil {
w.first = append([]byte{}, key...)
}
w.last = append(w.last[:0], key...)
return w.tw.Append(key, value)
}
// Returns true if the table is empty.
func (w *tWriter) empty() bool {
return w.first == nil
}
// Closes the storage.Writer.
func (w *tWriter) close() {
if w.w != nil {
w.w.Close()
w.w = nil
}
}
// Finalizes the table and returns table file.
func (w *tWriter) finish() (f *tFile, err error) {
defer w.close()
err = w.tw.Close()
if err != nil {
return
}
if !w.t.noSync {
err = w.w.Sync()
if err != nil {
return
}
}
f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
return
}
// Drops the table.
func (w *tWriter) drop() {
w.close()
w.t.s.stor.Remove(w.fd)
w.t.s.reuseFileNum(w.fd.Num)
w.tw = nil
w.first = nil
w.last = nil
}

File diff suppressed because it is too large Load diff

View file

@ -1,177 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package table allows read and write sorted key/value.
package table
import (
"encoding/binary"
)
/*
Table:
Table is consist of one or more data blocks, an optional filter block
a metaindex block, an index block and a table footer. Metaindex block
is a special block used to keep parameters of the table, such as filter
block name and its block handle. Index block is a special block used to
keep record of data blocks offset and length, index block use one as
restart interval. The key used by index block are the last key of preceding
block, shorter separator of adjacent blocks or shorter successor of the
last key of the last block. Filter block is an optional block contains
sequence of filter data generated by a filter generator.
Table data structure:
+ optional
/
+--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
| data block 1 | ... | data block n | filter block | metaindex block | index block | footer |
+--------------+--------------+--------------+--------------+-----------------+-------------+--------+
Each block followed by a 5-bytes trailer contains compression type and checksum.
Table block trailer:
+---------------------------+-------------------+
| compression type (1-byte) | checksum (4-byte) |
+---------------------------+-------------------+
The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
type also included in the checksum.
Table footer:
+------------------- 40-bytes -------------------+
/ \
+------------------------+--------------------+------+-----------------+
| metaindex block handle / index block handle / ---- | magic (8-bytes) |
+------------------------+--------------------+------+-----------------+
The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
NOTE: All fixed-length integer are little-endian.
*/
/*
Block:
Block is consist of one or more key/value entries and a block trailer.
Block entry shares key prefix with its preceding key until a restart
point reached. A block should contains at least one restart point.
First restart point are always zero.
Block data structure:
+ restart point + restart point (depends on restart interval)
/ /
+---------------+---------------+---------------+---------------+---------+
| block entry 1 | block entry 2 | ... | block entry n | trailer |
+---------------+---------------+---------------+---------------+---------+
Key/value entry:
+---- key len ----+
/ \
+-------+---------+-----------+---------+--------------------+--------------+----------------+
| shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
+-----------------+---------------------+--------------------+--------------+----------------+
Block entry shares key prefix with its preceding key:
Conditions:
restart_interval=2
entry one : key=deck,value=v1
entry two : key=dock,value=v2
entry three: key=duck,value=v3
The entries will be encoded as follow:
+ restart point (offset=0) + restart point (offset=16)
/ /
+-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
| 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" |
+-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
\ / \ / \ /
+----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+
The block trailer will contains two restart points:
+------------+-----------+--------+
| 0 | 16 | 2 |
+------------+-----------+---+----+
\ / \
+-- restart points --+ + restart points length
Block trailer:
+-- 4-bytes --+
/ \
+-----------------+-----------------+-----------------+------------------------------+
| restart point 1 | .... | restart point n | restart points len (4-bytes) |
+-----------------+-----------------+-----------------+------------------------------+
NOTE: All fixed-length integer are little-endian.
*/
/*
Filter block:
Filter block consist of one or more filter data and a filter block trailer.
The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
Filter block data structure:
+ offset 1 + offset 2 + offset n + trailer offset
/ / / /
+---------------+---------------+---------------+---------+
| filter data 1 | ... | filter data n | trailer |
+---------------+---------------+---------------+---------+
Filter block trailer:
+- 4-bytes -+
/ \
+---------------+---------------+---------------+-------------------------------+------------------+
| data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+-------------- +---------------+---------------+-------------------------------+------------------+
NOTE: All fixed-length integer are little-endian.
*/
const (
blockTrailerLen = 5
footerLen = 48
magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
// The block type gives the per-block compression format.
// These constants are part of the file format and should not be changed.
blockTypeNoCompression = 0
blockTypeSnappyCompression = 1
// Generate new filter every 2KB of data
filterBaseLg = 11
filterBase = 1 << filterBaseLg
)
type blockHandle struct {
offset, length uint64
}
func decodeBlockHandle(src []byte) (blockHandle, int) {
offset, n := binary.Uvarint(src)
length, m := binary.Uvarint(src[n:])
if n == 0 || m == 0 {
return blockHandle{}, 0
}
return blockHandle{offset, length}, n + m
}
func encodeBlockHandle(dst []byte, b blockHandle) int {
n := binary.PutUvarint(dst, b.offset)
m := binary.PutUvarint(dst[n:], b.length)
return n + m
}

View file

@ -1,375 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package table
import (
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/golang/snappy"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
func sharedPrefixLen(a, b []byte) int {
i, n := 0, len(a)
if n > len(b) {
n = len(b)
}
for i < n && a[i] == b[i] {
i++
}
return i
}
type blockWriter struct {
restartInterval int
buf util.Buffer
nEntries int
prevKey []byte
restarts []uint32
scratch []byte
}
func (w *blockWriter) append(key, value []byte) {
nShared := 0
if w.nEntries%w.restartInterval == 0 {
w.restarts = append(w.restarts, uint32(w.buf.Len()))
} else {
nShared = sharedPrefixLen(w.prevKey, key)
}
n := binary.PutUvarint(w.scratch[0:], uint64(nShared))
n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared))
n += binary.PutUvarint(w.scratch[n:], uint64(len(value)))
w.buf.Write(w.scratch[:n])
w.buf.Write(key[nShared:])
w.buf.Write(value)
w.prevKey = append(w.prevKey[:0], key...)
w.nEntries++
}
func (w *blockWriter) finish() {
// Write restarts entry.
if w.nEntries == 0 {
// Must have at least one restart entry.
w.restarts = append(w.restarts, 0)
}
w.restarts = append(w.restarts, uint32(len(w.restarts)))
for _, x := range w.restarts {
buf4 := w.buf.Alloc(4)
binary.LittleEndian.PutUint32(buf4, x)
}
}
func (w *blockWriter) reset() {
w.buf.Reset()
w.nEntries = 0
w.restarts = w.restarts[:0]
}
func (w *blockWriter) bytesLen() int {
restartsLen := len(w.restarts)
if restartsLen == 0 {
restartsLen = 1
}
return w.buf.Len() + 4*restartsLen + 4
}
type filterWriter struct {
generator filter.FilterGenerator
buf util.Buffer
nKeys int
offsets []uint32
}
func (w *filterWriter) add(key []byte) {
if w.generator == nil {
return
}
w.generator.Add(key)
w.nKeys++
}
func (w *filterWriter) flush(offset uint64) {
if w.generator == nil {
return
}
for x := int(offset / filterBase); x > len(w.offsets); {
w.generate()
}
}
func (w *filterWriter) finish() {
if w.generator == nil {
return
}
// Generate last keys.
if w.nKeys > 0 {
w.generate()
}
w.offsets = append(w.offsets, uint32(w.buf.Len()))
for _, x := range w.offsets {
buf4 := w.buf.Alloc(4)
binary.LittleEndian.PutUint32(buf4, x)
}
w.buf.WriteByte(filterBaseLg)
}
func (w *filterWriter) generate() {
// Record offset.
w.offsets = append(w.offsets, uint32(w.buf.Len()))
// Generate filters.
if w.nKeys > 0 {
w.generator.Generate(&w.buf)
w.nKeys = 0
}
}
// Writer is a table writer.
type Writer struct {
writer io.Writer
err error
// Options
cmp comparer.Comparer
filter filter.Filter
compression opt.Compression
blockSize int
dataBlock blockWriter
indexBlock blockWriter
filterBlock filterWriter
pendingBH blockHandle
offset uint64
nEntries int
// Scratch allocated enough for 5 uvarint. Block writer should not use
// first 20-bytes since it will be used to encode block handle, which
// then passed to the block writer itself.
scratch [50]byte
comparerScratch []byte
compressionScratch []byte
}
func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) {
// Compress the buffer if necessary.
var b []byte
if compression == opt.SnappyCompression {
// Allocate scratch enough for compression and block trailer.
if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n {
w.compressionScratch = make([]byte, n)
}
compressed := snappy.Encode(w.compressionScratch, buf.Bytes())
n := len(compressed)
b = compressed[:n+blockTrailerLen]
b[n] = blockTypeSnappyCompression
} else {
tmp := buf.Alloc(blockTrailerLen)
tmp[0] = blockTypeNoCompression
b = buf.Bytes()
}
// Calculate the checksum.
n := len(b) - 4
checksum := util.NewCRC(b[:n]).Value()
binary.LittleEndian.PutUint32(b[n:], checksum)
// Write the buffer to the file.
_, err = w.writer.Write(b)
if err != nil {
return
}
bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)}
w.offset += uint64(len(b))
return
}
func (w *Writer) flushPendingBH(key []byte) {
if w.pendingBH.length == 0 {
return
}
var separator []byte
if len(key) == 0 {
separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey)
} else {
separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key)
}
if separator == nil {
separator = w.dataBlock.prevKey
} else {
w.comparerScratch = separator
}
n := encodeBlockHandle(w.scratch[:20], w.pendingBH)
// Append the block handle to the index block.
w.indexBlock.append(separator, w.scratch[:n])
// Reset prev key of the data block.
w.dataBlock.prevKey = w.dataBlock.prevKey[:0]
// Clear pending block handle.
w.pendingBH = blockHandle{}
}
func (w *Writer) finishBlock() error {
w.dataBlock.finish()
bh, err := w.writeBlock(&w.dataBlock.buf, w.compression)
if err != nil {
return err
}
w.pendingBH = bh
// Reset the data block.
w.dataBlock.reset()
// Flush the filter block.
w.filterBlock.flush(w.offset)
return nil
}
// Append appends key/value pair to the table. The keys passed must
// be in increasing order.
//
// It is safe to modify the contents of the arguments after Append returns.
func (w *Writer) Append(key, value []byte) error {
if w.err != nil {
return w.err
}
if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 {
w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key)
return w.err
}
w.flushPendingBH(key)
// Append key/value pair to the data block.
w.dataBlock.append(key, value)
// Add key to the filter block.
w.filterBlock.add(key)
// Finish the data block if block size target reached.
if w.dataBlock.bytesLen() >= w.blockSize {
if err := w.finishBlock(); err != nil {
w.err = err
return w.err
}
}
w.nEntries++
return nil
}
// BlocksLen returns number of blocks written so far.
func (w *Writer) BlocksLen() int {
n := w.indexBlock.nEntries
if w.pendingBH.length > 0 {
// Includes the pending block.
n++
}
return n
}
// EntriesLen returns number of entries added so far.
func (w *Writer) EntriesLen() int {
return w.nEntries
}
// BytesLen returns number of bytes written so far.
func (w *Writer) BytesLen() int {
return int(w.offset)
}
// Close will finalize the table. Calling Append is not possible
// after Close, but calling BlocksLen, EntriesLen and BytesLen
// is still possible.
func (w *Writer) Close() error {
if w.err != nil {
return w.err
}
// Write the last data block. Or empty data block if there
// aren't any data blocks at all.
if w.dataBlock.nEntries > 0 || w.nEntries == 0 {
if err := w.finishBlock(); err != nil {
w.err = err
return w.err
}
}
w.flushPendingBH(nil)
// Write the filter block.
var filterBH blockHandle
w.filterBlock.finish()
if buf := &w.filterBlock.buf; buf.Len() > 0 {
filterBH, w.err = w.writeBlock(buf, opt.NoCompression)
if w.err != nil {
return w.err
}
}
// Write the metaindex block.
if filterBH.length > 0 {
key := []byte("filter." + w.filter.Name())
n := encodeBlockHandle(w.scratch[:20], filterBH)
w.dataBlock.append(key, w.scratch[:n])
}
w.dataBlock.finish()
metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression)
if err != nil {
w.err = err
return w.err
}
// Write the index block.
w.indexBlock.finish()
indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression)
if err != nil {
w.err = err
return w.err
}
// Write the table footer.
footer := w.scratch[:footerLen]
for i := range footer {
footer[i] = 0
}
n := encodeBlockHandle(footer, metaindexBH)
encodeBlockHandle(footer[n:], indexBH)
copy(footer[footerLen-len(magic):], magic)
if _, err := w.writer.Write(footer); err != nil {
w.err = err
return w.err
}
w.offset += footerLen
w.err = errors.New("leveldb/table: writer is closed")
return nil
}
// NewWriter creates a new initialized table writer for the file.
//
// Table writer is not safe for concurrent use.
func NewWriter(f io.Writer, o *opt.Options) *Writer {
w := &Writer{
writer: f,
cmp: o.GetComparer(),
filter: o.GetFilter(),
compression: o.GetCompression(),
blockSize: o.GetBlockSize(),
comparerScratch: make([]byte, 0),
}
// data block
w.dataBlock.restartInterval = o.GetBlockRestartInterval()
// The first 20-bytes are used for encoding block handle.
w.dataBlock.scratch = w.scratch[20:]
// index block
w.indexBlock.restartInterval = 1
w.indexBlock.scratch = w.scratch[20:]
// filter block
if w.filter != nil {
w.filterBlock.generator = w.filter.NewGenerator()
w.filterBlock.flush(0)
}
return w
}

View file

@ -1,98 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"fmt"
"sort"
"github.com/syndtr/goleveldb/leveldb/storage"
)
func shorten(str string) string {
if len(str) <= 8 {
return str
}
return str[:3] + ".." + str[len(str)-3:]
}
var bunits = [...]string{"", "Ki", "Mi", "Gi"}
func shortenb(bytes int) string {
i := 0
for ; bytes > 1024 && i < 4; i++ {
bytes /= 1024
}
return fmt.Sprintf("%d%sB", bytes, bunits[i])
}
func sshortenb(bytes int) string {
if bytes == 0 {
return "~"
}
sign := "+"
if bytes < 0 {
sign = "-"
bytes *= -1
}
i := 0
for ; bytes > 1024 && i < 4; i++ {
bytes /= 1024
}
return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i])
}
func sint(x int) string {
if x == 0 {
return "~"
}
sign := "+"
if x < 0 {
sign = "-"
x *= -1
}
return fmt.Sprintf("%s%d", sign, x)
}
func minInt(a, b int) int {
if a < b {
return a
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
type fdSorter []storage.FileDesc
func (p fdSorter) Len() int {
return len(p)
}
func (p fdSorter) Less(i, j int) bool {
return p[i].Num < p[j].Num
}
func (p fdSorter) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
func sortFds(fds []storage.FileDesc) {
sort.Sort(fdSorter(fds))
}
func ensureBuffer(b []byte, n int) []byte {
if cap(b) < n {
return make([]byte, n)
}
return b[:n]
}

View file

@ -1,293 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package util
// This a copy of Go std bytes.Buffer with some modification
// and some features stripped.
import (
"bytes"
"io"
)
// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
// The zero value for Buffer is an empty buffer ready to use.
type Buffer struct {
buf []byte // contents are the bytes buf[off : len(buf)]
off int // read at &buf[off], write at &buf[len(buf)]
bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation.
}
// Bytes returns a slice of the contents of the unread portion of the buffer;
// len(b.Bytes()) == b.Len(). If the caller changes the contents of the
// returned slice, the contents of the buffer will change provided there
// are no intervening method calls on the Buffer.
func (b *Buffer) Bytes() []byte { return b.buf[b.off:] }
// String returns the contents of the unread portion of the buffer
// as a string. If the Buffer is a nil pointer, it returns "<nil>".
func (b *Buffer) String() string {
if b == nil {
// Special case, useful in debugging.
return "<nil>"
}
return string(b.buf[b.off:])
}
// Len returns the number of bytes of the unread portion of the buffer;
// b.Len() == len(b.Bytes()).
func (b *Buffer) Len() int { return len(b.buf) - b.off }
// Truncate discards all but the first n unread bytes from the buffer.
// It panics if n is negative or greater than the length of the buffer.
func (b *Buffer) Truncate(n int) {
switch {
case n < 0 || n > b.Len():
panic("leveldb/util.Buffer: truncation out of range")
case n == 0:
// Reuse buffer space.
b.off = 0
}
b.buf = b.buf[0 : b.off+n]
}
// Reset resets the buffer so it has no content.
// b.Reset() is the same as b.Truncate(0).
func (b *Buffer) Reset() { b.Truncate(0) }
// grow grows the buffer to guarantee space for n more bytes.
// It returns the index where bytes should be written.
// If the buffer can't grow it will panic with bytes.ErrTooLarge.
func (b *Buffer) grow(n int) int {
m := b.Len()
// If buffer is empty, reset to recover space.
if m == 0 && b.off != 0 {
b.Truncate(0)
}
if len(b.buf)+n > cap(b.buf) {
var buf []byte
if b.buf == nil && n <= len(b.bootstrap) {
buf = b.bootstrap[0:]
} else if m+n <= cap(b.buf)/2 {
// We can slide things down instead of allocating a new
// slice. We only need m+n <= cap(b.buf) to slide, but
// we instead let capacity get twice as large so we
// don't spend all our time copying.
copy(b.buf[:], b.buf[b.off:])
buf = b.buf[:m]
} else {
// not enough space anywhere
buf = makeSlice(2*cap(b.buf) + n)
copy(buf, b.buf[b.off:])
}
b.buf = buf
b.off = 0
}
b.buf = b.buf[0 : b.off+m+n]
return b.off + m
}
// Alloc allocs n bytes of slice from the buffer, growing the buffer as
// needed. If n is negative, Alloc will panic.
// If the buffer can't grow it will panic with bytes.ErrTooLarge.
func (b *Buffer) Alloc(n int) []byte {
if n < 0 {
panic("leveldb/util.Buffer.Alloc: negative count")
}
m := b.grow(n)
return b.buf[m:]
}
// Grow grows the buffer's capacity, if necessary, to guarantee space for
// another n bytes. After Grow(n), at least n bytes can be written to the
// buffer without another allocation.
// If n is negative, Grow will panic.
// If the buffer can't grow it will panic with bytes.ErrTooLarge.
func (b *Buffer) Grow(n int) {
if n < 0 {
panic("leveldb/util.Buffer.Grow: negative count")
}
m := b.grow(n)
b.buf = b.buf[0:m]
}
// Write appends the contents of p to the buffer, growing the buffer as
// needed. The return value n is the length of p; err is always nil. If the
// buffer becomes too large, Write will panic with bytes.ErrTooLarge.
func (b *Buffer) Write(p []byte) (n int, err error) {
m := b.grow(len(p))
return copy(b.buf[m:], p), nil
}
// MinRead is the minimum slice size passed to a Read call by
// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond
// what is required to hold the contents of r, ReadFrom will not grow the
// underlying buffer.
const MinRead = 512
// ReadFrom reads data from r until EOF and appends it to the buffer, growing
// the buffer as needed. The return value n is the number of bytes read. Any
// error except io.EOF encountered during the read is also returned. If the
// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge.
func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) {
// If buffer is empty, reset to recover space.
if b.off >= len(b.buf) {
b.Truncate(0)
}
for {
if free := cap(b.buf) - len(b.buf); free < MinRead {
// not enough space at end
newBuf := b.buf
if b.off+free < MinRead {
// not enough space using beginning of buffer;
// double buffer capacity
newBuf = makeSlice(2*cap(b.buf) + MinRead)
}
copy(newBuf, b.buf[b.off:])
b.buf = newBuf[:len(b.buf)-b.off]
b.off = 0
}
m, e := r.Read(b.buf[len(b.buf):cap(b.buf)])
b.buf = b.buf[0 : len(b.buf)+m]
n += int64(m)
if e == io.EOF {
break
}
if e != nil {
return n, e
}
}
return n, nil // err is EOF, so return nil explicitly
}
// makeSlice allocates a slice of size n. If the allocation fails, it panics
// with bytes.ErrTooLarge.
func makeSlice(n int) []byte {
// If the make fails, give a known error.
defer func() {
if recover() != nil {
panic(bytes.ErrTooLarge)
}
}()
return make([]byte, n)
}
// WriteTo writes data to w until the buffer is drained or an error occurs.
// The return value n is the number of bytes written; it always fits into an
// int, but it is int64 to match the io.WriterTo interface. Any error
// encountered during the write is also returned.
func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) {
if b.off < len(b.buf) {
nBytes := b.Len()
m, e := w.Write(b.buf[b.off:])
if m > nBytes {
panic("leveldb/util.Buffer.WriteTo: invalid Write count")
}
b.off += m
n = int64(m)
if e != nil {
return n, e
}
// all bytes should have been written, by definition of
// Write method in io.Writer
if m != nBytes {
return n, io.ErrShortWrite
}
}
// Buffer is now empty; reset.
b.Truncate(0)
return
}
// WriteByte appends the byte c to the buffer, growing the buffer as needed.
// The returned error is always nil, but is included to match bufio.Writer's
// WriteByte. If the buffer becomes too large, WriteByte will panic with
// bytes.ErrTooLarge.
func (b *Buffer) WriteByte(c byte) error {
m := b.grow(1)
b.buf[m] = c
return nil
}
// Read reads the next len(p) bytes from the buffer or until the buffer
// is drained. The return value n is the number of bytes read. If the
// buffer has no data to return, err is io.EOF (unless len(p) is zero);
// otherwise it is nil.
func (b *Buffer) Read(p []byte) (n int, err error) {
if b.off >= len(b.buf) {
// Buffer is empty, reset to recover space.
b.Truncate(0)
if len(p) == 0 {
return
}
return 0, io.EOF
}
n = copy(p, b.buf[b.off:])
b.off += n
return
}
// Next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
// If there are fewer than n bytes in the buffer, Next returns the entire buffer.
// The slice is only valid until the next call to a read or write method.
func (b *Buffer) Next(n int) []byte {
m := b.Len()
if n > m {
n = m
}
data := b.buf[b.off : b.off+n]
b.off += n
return data
}
// ReadByte reads and returns the next byte from the buffer.
// If no byte is available, it returns error io.EOF.
func (b *Buffer) ReadByte() (c byte, err error) {
if b.off >= len(b.buf) {
// Buffer is empty, reset to recover space.
b.Truncate(0)
return 0, io.EOF
}
c = b.buf[b.off]
b.off++
return c, nil
}
// ReadBytes reads until the first occurrence of delim in the input,
// returning a slice containing the data up to and including the delimiter.
// If ReadBytes encounters an error before finding a delimiter,
// it returns the data read before the error and the error itself (often io.EOF).
// ReadBytes returns err != nil if and only if the returned data does not end in
// delim.
func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
slice, err := b.readSlice(delim)
// return a copy of slice. The buffer's backing array may
// be overwritten by later calls.
line = append(line, slice...)
return
}
// readSlice is like ReadBytes but returns a reference to internal buffer data.
func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
i := bytes.IndexByte(b.buf[b.off:], delim)
end := b.off + i + 1
if i < 0 {
end = len(b.buf)
err = io.EOF
}
line = b.buf[b.off:end]
b.off = end
return line, err
}
// NewBuffer creates and initializes a new Buffer using buf as its initial
// contents. It is intended to prepare a Buffer to read existing data. It
// can also be used to size the internal buffer for writing. To do that,
// buf should have the desired capacity but a length of zero.
//
// In most cases, new(Buffer) (or just declaring a Buffer variable) is
// sufficient to initialize a Buffer.
func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} }

View file

@ -1,239 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package util
import (
"fmt"
"sync"
"sync/atomic"
"time"
)
type buffer struct {
b []byte
miss int
}
// BufferPool is a 'buffer pool'.
type BufferPool struct {
pool [6]chan []byte
size [5]uint32
sizeMiss [5]uint32
sizeHalf [5]uint32
baseline [4]int
baseline0 int
mu sync.RWMutex
closed bool
closeC chan struct{}
get uint32
put uint32
half uint32
less uint32
equal uint32
greater uint32
miss uint32
}
func (p *BufferPool) poolNum(n int) int {
if n <= p.baseline0 && n > p.baseline0/2 {
return 0
}
for i, x := range p.baseline {
if n <= x {
return i + 1
}
}
return len(p.baseline) + 1
}
// Get returns buffer with length of n.
func (p *BufferPool) Get(n int) []byte {
if p == nil {
return make([]byte, n)
}
p.mu.RLock()
defer p.mu.RUnlock()
if p.closed {
return make([]byte, n)
}
atomic.AddUint32(&p.get, 1)
poolNum := p.poolNum(n)
pool := p.pool[poolNum]
if poolNum == 0 {
// Fast path.
select {
case b := <-pool:
switch {
case cap(b) > n:
if cap(b)-n >= n {
atomic.AddUint32(&p.half, 1)
select {
case pool <- b:
default:
}
return make([]byte, n)
} else {
atomic.AddUint32(&p.less, 1)
return b[:n]
}
case cap(b) == n:
atomic.AddUint32(&p.equal, 1)
return b[:n]
default:
atomic.AddUint32(&p.greater, 1)
}
default:
atomic.AddUint32(&p.miss, 1)
}
return make([]byte, n, p.baseline0)
} else {
sizePtr := &p.size[poolNum-1]
select {
case b := <-pool:
switch {
case cap(b) > n:
if cap(b)-n >= n {
atomic.AddUint32(&p.half, 1)
sizeHalfPtr := &p.sizeHalf[poolNum-1]
if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
atomic.StoreUint32(sizeHalfPtr, 0)
} else {
select {
case pool <- b:
default:
}
}
return make([]byte, n)
} else {
atomic.AddUint32(&p.less, 1)
return b[:n]
}
case cap(b) == n:
atomic.AddUint32(&p.equal, 1)
return b[:n]
default:
atomic.AddUint32(&p.greater, 1)
if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
select {
case pool <- b:
default:
}
}
}
default:
atomic.AddUint32(&p.miss, 1)
}
if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
if size == 0 {
atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
} else {
sizeMissPtr := &p.sizeMiss[poolNum-1]
if atomic.AddUint32(sizeMissPtr, 1) == 20 {
atomic.StoreUint32(sizePtr, uint32(n))
atomic.StoreUint32(sizeMissPtr, 0)
}
}
return make([]byte, n)
} else {
return make([]byte, n, size)
}
}
}
// Put adds given buffer to the pool.
func (p *BufferPool) Put(b []byte) {
if p == nil {
return
}
p.mu.RLock()
defer p.mu.RUnlock()
if p.closed {
return
}
atomic.AddUint32(&p.put, 1)
pool := p.pool[p.poolNum(cap(b))]
select {
case pool <- b:
default:
}
}
func (p *BufferPool) Close() {
if p == nil {
return
}
p.mu.Lock()
if !p.closed {
p.closed = true
p.closeC <- struct{}{}
}
p.mu.Unlock()
}
func (p *BufferPool) String() string {
if p == nil {
return "<nil>"
}
return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
}
func (p *BufferPool) drain() {
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
for _, ch := range p.pool {
select {
case <-ch:
default:
}
}
case <-p.closeC:
close(p.closeC)
for _, ch := range p.pool {
close(ch)
}
return
}
}
}
// NewBufferPool creates a new initialized 'buffer pool'.
func NewBufferPool(baseline int) *BufferPool {
if baseline <= 0 {
panic("baseline can't be <= 0")
}
p := &BufferPool{
baseline0: baseline,
baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
closeC: make(chan struct{}, 1),
}
for i, cap := range []int{2, 2, 4, 4, 2, 1} {
p.pool[i] = make(chan []byte, cap)
}
go p.drain()
return p
}

View file

@ -1,30 +0,0 @@
// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package util
import (
"hash/crc32"
)
var table = crc32.MakeTable(crc32.Castagnoli)
// CRC is a CRC-32 checksum computed using Castagnoli's polynomial.
type CRC uint32
// NewCRC creates a new crc based on the given bytes.
func NewCRC(b []byte) CRC {
return CRC(0).Update(b)
}
// Update updates the crc with the given bytes.
func (c CRC) Update(b []byte) CRC {
return CRC(crc32.Update(uint32(c), table, b))
}
// Value returns a masked crc.
func (c CRC) Value() uint32 {
return uint32(c>>15|c<<17) + 0xa282ead8
}

View file

@ -1,48 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package util
import (
"encoding/binary"
)
// Hash return hash of the given data.
func Hash(data []byte, seed uint32) uint32 {
// Similar to murmur hash
const (
m = uint32(0xc6a4a793)
r = uint32(24)
)
var (
h = seed ^ (uint32(len(data)) * m)
i int
)
for n := len(data) - len(data)%4; i < n; i += 4 {
h += binary.LittleEndian.Uint32(data[i:])
h *= m
h ^= (h >> 16)
}
switch len(data) - i {
default:
panic("not reached")
case 3:
h += uint32(data[i+2]) << 16
fallthrough
case 2:
h += uint32(data[i+1]) << 8
fallthrough
case 1:
h += uint32(data[i])
h *= m
h ^= (h >> r)
case 0:
}
return h
}

View file

@ -1,32 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package util
// Range is a key range.
type Range struct {
// Start of the key range, include in the range.
Start []byte
// Limit of the key range, not include in the range.
Limit []byte
}
// BytesPrefix returns key range that satisfy the given prefix.
// This only applicable for the standard 'bytes comparer'.
func BytesPrefix(prefix []byte) *Range {
var limit []byte
for i := len(prefix) - 1; i >= 0; i-- {
c := prefix[i]
if c < 0xff {
limit = make([]byte, i+1)
copy(limit, prefix)
limit[i] = c + 1
break
}
}
return &Range{prefix, limit}
}

View file

@ -1,73 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package util provides utilities used throughout leveldb.
package util
import (
"errors"
)
var (
ErrReleased = errors.New("leveldb: resource already relesed")
ErrHasReleaser = errors.New("leveldb: releaser already defined")
)
// Releaser is the interface that wraps the basic Release method.
type Releaser interface {
// Release releases associated resources. Release should always success
// and can be called multipe times without causing error.
Release()
}
// ReleaseSetter is the interface that wraps the basic SetReleaser method.
type ReleaseSetter interface {
// SetReleaser associates the given releaser to the resources. The
// releaser will be called once coresponding resources released.
// Calling SetReleaser with nil will clear the releaser.
//
// This will panic if a releaser already present or coresponding
// resource is already released. Releaser should be cleared first
// before assigned a new one.
SetReleaser(releaser Releaser)
}
// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
type BasicReleaser struct {
releaser Releaser
released bool
}
// Released returns whether Release method already called.
func (r *BasicReleaser) Released() bool {
return r.released
}
// Release implements Releaser.Release.
func (r *BasicReleaser) Release() {
if !r.released {
if r.releaser != nil {
r.releaser.Release()
r.releaser = nil
}
r.released = true
}
}
// SetReleaser implements ReleaseSetter.SetReleaser.
func (r *BasicReleaser) SetReleaser(releaser Releaser) {
if r.released {
panic(ErrReleased)
}
if r.releaser != nil && releaser != nil {
panic(ErrHasReleaser)
}
r.releaser = releaser
}
type NoopReleaser struct{}
func (NoopReleaser) Release() {}

View file

@ -1,524 +0,0 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"fmt"
"sync/atomic"
"unsafe"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
type tSet struct {
level int
table *tFile
}
type version struct {
s *session
levels []tFiles
// Level that should be compacted next and its compaction score.
// Score < 1 means compaction is not strictly needed. These fields
// are initialized by computeCompaction()
cLevel int
cScore float64
cSeek unsafe.Pointer
closing bool
ref int
// Succeeding version.
next *version
}
func newVersion(s *session) *version {
return &version{s: s}
}
func (v *version) releaseNB() {
v.ref--
if v.ref > 0 {
return
}
if v.ref < 0 {
panic("negative version ref")
}
nextTables := make(map[int64]bool)
for _, tt := range v.next.levels {
for _, t := range tt {
num := t.fd.Num
nextTables[num] = true
}
}
for _, tt := range v.levels {
for _, t := range tt {
num := t.fd.Num
if _, ok := nextTables[num]; !ok {
v.s.tops.remove(t)
}
}
}
v.next.releaseNB()
v.next = nil
}
func (v *version) release() {
v.s.vmu.Lock()
v.releaseNB()
v.s.vmu.Unlock()
}
func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
ukey := ikey.ukey()
// Aux level.
if aux != nil {
for _, t := range aux {
if t.overlaps(v.s.icmp, ukey, ukey) {
if !f(-1, t) {
return
}
}
}
if lf != nil && !lf(-1) {
return
}
}
// Walk tables level-by-level.
for level, tables := range v.levels {
if len(tables) == 0 {
continue
}
if level == 0 {
// Level-0 files may overlap each other. Find all files that
// overlap ukey.
for _, t := range tables {
if t.overlaps(v.s.icmp, ukey, ukey) {
if !f(level, t) {
return
}
}
}
} else {
if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
t := tables[i]
if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
if !f(level, t) {
return
}
}
}
}
if lf != nil && !lf(level) {
return
}
}
}
func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
if v.closing {
return nil, false, ErrClosed
}
ukey := ikey.ukey()
var (
tset *tSet
tseek bool
// Level-0.
zfound bool
zseq uint64
zkt keyType
zval []byte
)
err = ErrNotFound
// Since entries never hop across level, finding key/value
// in smaller level make later levels irrelevant.
v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
if level >= 0 && !tseek {
if tset == nil {
tset = &tSet{level, t}
} else {
tseek = true
}
}
var (
fikey, fval []byte
ferr error
)
if noValue {
fikey, ferr = v.s.tops.findKey(t, ikey, ro)
} else {
fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
}
switch ferr {
case nil:
case ErrNotFound:
return true
default:
err = ferr
return false
}
if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
if v.s.icmp.uCompare(ukey, fukey) == 0 {
// Level <= 0 may overlaps each-other.
if level <= 0 {
if fseq >= zseq {
zfound = true
zseq = fseq
zkt = fkt
zval = fval
}
} else {
switch fkt {
case keyTypeVal:
value = fval
err = nil
case keyTypeDel:
default:
panic("leveldb: invalid internalKey type")
}
return false
}
}
} else {
err = fkerr
return false
}
return true
}, func(level int) bool {
if zfound {
switch zkt {
case keyTypeVal:
value = zval
err = nil
case keyTypeDel:
default:
panic("leveldb: invalid internalKey type")
}
return false
}
return true
})
if tseek && tset.table.consumeSeek() <= 0 {
tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
}
return
}
func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
var tset *tSet
v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
if tset == nil {
tset = &tSet{level, t}
return true
}
if tset.table.consumeSeek() <= 0 {
tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
}
return false
}, nil)
return
}
func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
for level, tables := range v.levels {
if level == 0 {
// Merge all level zero files together since they may overlap.
for _, t := range tables {
its = append(its, v.s.tops.newIterator(t, slice, ro))
}
} else if len(tables) != 0 {
its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
}
}
return
}
func (v *version) newStaging() *versionStaging {
return &versionStaging{base: v}
}
// Spawn a new version based on this version.
func (v *version) spawn(r *sessionRecord) *version {
staging := v.newStaging()
staging.commit(r)
return staging.finish()
}
func (v *version) fillRecord(r *sessionRecord) {
for level, tables := range v.levels {
for _, t := range tables {
r.addTableFile(level, t)
}
}
}
func (v *version) tLen(level int) int {
if level < len(v.levels) {
return len(v.levels[level])
}
return 0
}
func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
for level, tables := range v.levels {
for _, t := range tables {
if v.s.icmp.Compare(t.imax, ikey) <= 0 {
// Entire file is before "ikey", so just add the file size
n += t.size
} else if v.s.icmp.Compare(t.imin, ikey) > 0 {
// Entire file is after "ikey", so ignore
if level > 0 {
// Files other than level 0 are sorted by meta->min, so
// no further files in this level will contain data for
// "ikey".
break
}
} else {
// "ikey" falls in the range for this table. Add the
// approximate offset of "ikey" within the table.
if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
n += m
} else {
return 0, err
}
}
}
}
return
}
func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
if maxLevel > 0 {
if len(v.levels) == 0 {
return maxLevel
}
if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
var overlaps tFiles
for ; level < maxLevel; level++ {
if pLevel := level + 1; pLevel >= len(v.levels) {
return maxLevel
} else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
break
}
if gpLevel := level + 2; gpLevel < len(v.levels) {
overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
break
}
}
}
}
}
return
}
func (v *version) computeCompaction() {
// Precomputed best level for next compaction
bestLevel := int(-1)
bestScore := float64(-1)
statFiles := make([]int, len(v.levels))
statSizes := make([]string, len(v.levels))
statScore := make([]string, len(v.levels))
statTotSize := int64(0)
for level, tables := range v.levels {
var score float64
size := tables.size()
if level == 0 {
// We treat level-0 specially by bounding the number of files
// instead of number of bytes for two reasons:
//
// (1) With larger write-buffer sizes, it is nice not to do too
// many level-0 compaction.
//
// (2) The files in level-0 are merged on every read and
// therefore we wish to avoid too many files when the individual
// file size is small (perhaps because of a small write-buffer
// setting, or very high compression ratios, or lots of
// overwrites/deletions).
score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
} else {
score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
}
if score > bestScore {
bestLevel = level
bestScore = score
}
statFiles[level] = len(tables)
statSizes[level] = shortenb(int(size))
statScore[level] = fmt.Sprintf("%.2f", score)
statTotSize += size
}
v.cLevel = bestLevel
v.cScore = bestScore
v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
}
func (v *version) needCompaction() bool {
return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
}
type tablesScratch struct {
added map[int64]atRecord
deleted map[int64]struct{}
}
type versionStaging struct {
base *version
levels []tablesScratch
}
func (p *versionStaging) getScratch(level int) *tablesScratch {
if level >= len(p.levels) {
newLevels := make([]tablesScratch, level+1)
copy(newLevels, p.levels)
p.levels = newLevels
}
return &(p.levels[level])
}
func (p *versionStaging) commit(r *sessionRecord) {
// Deleted tables.
for _, r := range r.deletedTables {
scratch := p.getScratch(r.level)
if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
if scratch.deleted == nil {
scratch.deleted = make(map[int64]struct{})
}
scratch.deleted[r.num] = struct{}{}
}
if scratch.added != nil {
delete(scratch.added, r.num)
}
}
// New tables.
for _, r := range r.addedTables {
scratch := p.getScratch(r.level)
if scratch.added == nil {
scratch.added = make(map[int64]atRecord)
}
scratch.added[r.num] = r
if scratch.deleted != nil {
delete(scratch.deleted, r.num)
}
}
}
func (p *versionStaging) finish() *version {
// Build new version.
nv := newVersion(p.base.s)
numLevel := len(p.levels)
if len(p.base.levels) > numLevel {
numLevel = len(p.base.levels)
}
nv.levels = make([]tFiles, numLevel)
for level := 0; level < numLevel; level++ {
var baseTabels tFiles
if level < len(p.base.levels) {
baseTabels = p.base.levels[level]
}
if level < len(p.levels) {
scratch := p.levels[level]
var nt tFiles
// Prealloc list if possible.
if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
nt = make(tFiles, 0, n)
}
// Base tables.
for _, t := range baseTabels {
if _, ok := scratch.deleted[t.fd.Num]; ok {
continue
}
if _, ok := scratch.added[t.fd.Num]; ok {
continue
}
nt = append(nt, t)
}
// New tables.
for _, r := range scratch.added {
nt = append(nt, tableFileFromRecord(r))
}
if len(nt) != 0 {
// Sort tables.
if level == 0 {
nt.sortByNum()
} else {
nt.sortByKey(p.base.s.icmp)
}
nv.levels[level] = nt
}
} else {
nv.levels[level] = baseTabels
}
}
// Trim levels.
n := len(nv.levels)
for ; n > 0 && nv.levels[n-1] == nil; n-- {
}
nv.levels = nv.levels[:n]
// Compute compaction score for new version.
nv.computeCompaction()
return nv
}
type versionReleaser struct {
v *version
once bool
}
func (vr *versionReleaser) Release() {
v := vr.v
v.s.vmu.Lock()
if !vr.once {
v.releaseNB()
vr.once = true
}
v.s.vmu.Unlock()
}

72
vendor/vendor.json vendored
View file

@ -582,78 +582,6 @@
"revision": "d77da356e56a7428ad25149ca77381849a6a5232",
"revisionTime": "2016-06-15T09:26:46Z"
},
{
"checksumSHA1": "VhcnDY37sYAnL8WjfYQN9YYl+W4=",
"path": "github.com/syndtr/goleveldb/leveldb",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "EKIow7XkgNdWvR/982ffIZxKG8Y=",
"path": "github.com/syndtr/goleveldb/leveldb/cache",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "5KPgnvCPlR0ysDAqo6jApzRQ3tw=",
"path": "github.com/syndtr/goleveldb/leveldb/comparer",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "1DRAxdlWzS4U0xKN/yQ/fdNN7f0=",
"path": "github.com/syndtr/goleveldb/leveldb/errors",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "eqKeD6DS7eNCtxVYZEHHRKkyZrw=",
"path": "github.com/syndtr/goleveldb/leveldb/filter",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "8dXuAVIsbtaMiGGuHjzGR6Ny/5c=",
"path": "github.com/syndtr/goleveldb/leveldb/iterator",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "gJY7bRpELtO0PJpZXgPQ2BYFJ88=",
"path": "github.com/syndtr/goleveldb/leveldb/journal",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "j+uaQ6DwJ50dkIdfMQu1TXdlQcY=",
"path": "github.com/syndtr/goleveldb/leveldb/memdb",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "UmQeotV+m8/FduKEfLOhjdp18rs=",
"path": "github.com/syndtr/goleveldb/leveldb/opt",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "/Wvv9HeJTN9UUjdjwUlz7X4ioIo=",
"path": "github.com/syndtr/goleveldb/leveldb/storage",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "JTJA+u8zk7EXy1UUmpFPNGvtO2A=",
"path": "github.com/syndtr/goleveldb/leveldb/table",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "4zil8Gwg8VPkDn1YzlgCvtukJFU=",
"path": "github.com/syndtr/goleveldb/leveldb/util",
"revision": "6b4daa5362b502898ddf367c5c11deb9e7a5c727",
"revisionTime": "2016-10-11T05:00:08Z"
},
{
"checksumSHA1": "f6Aew+ZA+HBAXCw6/xTST3mB0Lw=",
"origin": "k8s.io/client-go/1.5/vendor/github.com/ugorji/go/codec",