2013-02-07 02:38:01 -08:00
// Copyright 2013 Prometheus Team
2012-11-26 11:11:34 -08:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2012-11-26 10:56:51 -08:00
package leveldb
2012-11-24 03:33:34 -08:00
import (
2013-01-27 11:28:37 -08:00
"flag"
2013-04-01 04:22:38 -07:00
"fmt"
2012-11-24 03:33:34 -08:00
"github.com/jmhodges/levigo"
2013-01-27 09:49:45 -08:00
"github.com/prometheus/prometheus/coding"
2013-02-06 08:05:23 -08:00
"github.com/prometheus/prometheus/storage"
2013-01-27 09:49:45 -08:00
"github.com/prometheus/prometheus/storage/raw"
2013-04-01 04:22:38 -07:00
"time"
2012-11-24 03:33:34 -08:00
)
2013-01-27 11:28:37 -08:00
var (
2013-03-12 10:25:52 -07:00
leveldbFlushOnMutate = flag . Bool ( "leveldbFlushOnMutate" , false , "Whether LevelDB should flush every operation to disk upon mutation before returning (bool)." )
2013-02-01 04:35:07 -08:00
leveldbUseSnappy = flag . Bool ( "leveldbUseSnappy" , true , "Whether LevelDB attempts to use Snappy for compressing elements (bool)." )
leveldbUseParanoidChecks = flag . Bool ( "leveldbUseParanoidChecks" , true , "Whether LevelDB uses expensive checks (bool)." )
2013-01-27 11:28:37 -08:00
)
2013-03-04 11:43:07 -08:00
// LevelDBPersistence is a disk-backed sorted key-value store.
2012-11-28 11:22:49 -08:00
type LevelDBPersistence struct {
2012-11-24 03:33:34 -08:00
cache * levigo . Cache
filterPolicy * levigo . FilterPolicy
options * levigo . Options
storage * levigo . DB
readOptions * levigo . ReadOptions
writeOptions * levigo . WriteOptions
}
2013-03-25 02:24:59 -07:00
// levigoIterator wraps the LevelDB resources in a convenient manner for uniform
// resource access and closing through the raw.Iterator protocol.
type levigoIterator struct {
// iterator is the receiver of most proxied operation calls.
iterator * levigo . Iterator
// readOptions is only set if the iterator is a snapshot of an underlying
// database. This signals that it needs to be explicitly reaped upon the
// end of this iterator's life.
2012-11-26 10:56:51 -08:00
readOptions * levigo . ReadOptions
2013-03-25 02:24:59 -07:00
// snapshot is only set if the iterator is a snapshot of an underlying
// database. This signals that it needs to be explicitly reaped upon the
// end of this this iterator's life.
snapshot * levigo . Snapshot
// storage is only set if the iterator is a snapshot of an underlying
// database. This signals that it needs to be explicitly reaped upon the
// end of this this iterator's life. The snapshot must be freed in the
// context of an actual database.
storage * levigo . DB
// closed indicates whether the iterator has been closed before.
closed bool
2013-04-01 04:22:38 -07:00
// valid indicates whether the iterator may be used. If a LevelDB iterator
// ever becomes invalid, it must be disposed of and cannot be reused.
valid bool
// creationTime provides the time at which the iterator was made.
creationTime time . Time
2013-03-25 02:24:59 -07:00
}
2013-04-01 04:22:38 -07:00
func ( i levigoIterator ) String ( ) string {
var (
valid = "valid"
open = "open"
snapshotted = "snapshotted"
)
if i . closed {
open = "closed"
}
if ! i . valid {
valid = "invalid"
}
if i . snapshot == nil {
snapshotted = "unsnapshotted"
}
return fmt . Sprintf ( "levigoIterator created at %s that is %s and %s and %s" , i . creationTime , open , valid , snapshotted )
}
func ( i * levigoIterator ) Close ( ) {
2013-03-25 02:24:59 -07:00
if i . closed {
return
}
if i . iterator != nil {
i . iterator . Close ( )
}
if i . readOptions != nil {
i . readOptions . Close ( )
}
if i . snapshot != nil {
i . storage . ReleaseSnapshot ( i . snapshot )
}
// Explicitly dereference the pointers to prevent cycles, however unlikely.
i . iterator = nil
i . readOptions = nil
i . snapshot = nil
i . storage = nil
i . closed = true
2013-04-01 04:22:38 -07:00
i . valid = false
2013-03-25 02:24:59 -07:00
return
}
2013-04-01 04:22:38 -07:00
func ( i * levigoIterator ) Seek ( key [ ] byte ) bool {
2013-03-25 02:24:59 -07:00
i . iterator . Seek ( key )
2013-04-01 04:22:38 -07:00
i . valid = i . iterator . Valid ( )
return i . valid
2013-03-25 02:24:59 -07:00
}
2013-04-01 04:22:38 -07:00
func ( i * levigoIterator ) SeekToFirst ( ) bool {
2013-03-25 02:24:59 -07:00
i . iterator . SeekToFirst ( )
2013-04-01 04:22:38 -07:00
i . valid = i . iterator . Valid ( )
return i . valid
2013-03-25 02:24:59 -07:00
}
2013-04-01 04:22:38 -07:00
func ( i * levigoIterator ) SeekToLast ( ) bool {
2013-03-25 02:24:59 -07:00
i . iterator . SeekToLast ( )
2013-04-01 04:22:38 -07:00
i . valid = i . iterator . Valid ( )
return i . valid
2013-03-25 02:24:59 -07:00
}
2013-04-01 04:22:38 -07:00
func ( i * levigoIterator ) Next ( ) bool {
2013-03-25 02:24:59 -07:00
i . iterator . Next ( )
2013-04-01 04:22:38 -07:00
i . valid = i . iterator . Valid ( )
return i . valid
2013-03-25 02:24:59 -07:00
}
2013-04-01 04:22:38 -07:00
func ( i * levigoIterator ) Previous ( ) bool {
2013-03-25 02:24:59 -07:00
i . iterator . Prev ( )
2013-04-01 04:22:38 -07:00
i . valid = i . iterator . Valid ( )
return i . valid
2013-03-25 02:24:59 -07:00
}
func ( i levigoIterator ) Key ( ) ( key [ ] byte ) {
return i . iterator . Key ( )
}
func ( i levigoIterator ) Value ( ) ( value [ ] byte ) {
return i . iterator . Value ( )
}
func ( i levigoIterator ) GetError ( ) ( err error ) {
return i . iterator . GetError ( )
2012-11-26 10:56:51 -08:00
}
2012-12-25 04:50:36 -08:00
func NewLevelDBPersistence ( storageRoot string , cacheCapacity , bitsPerBloomFilterEncoded int ) ( p * LevelDBPersistence , err error ) {
2012-11-24 03:33:34 -08:00
options := levigo . NewOptions ( )
options . SetCreateIfMissing ( true )
2013-02-01 04:35:07 -08:00
options . SetParanoidChecks ( * leveldbUseParanoidChecks )
compression := levigo . NoCompression
if * leveldbUseSnappy {
compression = levigo . SnappyCompression
}
options . SetCompression ( compression )
2012-11-24 03:33:34 -08:00
cache := levigo . NewLRUCache ( cacheCapacity )
options . SetCache ( cache )
filterPolicy := levigo . NewBloomFilter ( bitsPerBloomFilterEncoded )
options . SetFilterPolicy ( filterPolicy )
2012-12-25 04:50:36 -08:00
storage , err := levigo . Open ( storageRoot , options )
if err != nil {
return
}
2012-11-24 03:33:34 -08:00
2013-03-25 02:24:59 -07:00
var (
readOptions = levigo . NewReadOptions ( )
writeOptions = levigo . NewWriteOptions ( )
)
2013-01-27 11:28:37 -08:00
writeOptions . SetSync ( * leveldbFlushOnMutate )
2012-12-25 04:50:36 -08:00
p = & LevelDBPersistence {
2012-11-24 03:33:34 -08:00
cache : cache ,
filterPolicy : filterPolicy ,
2013-05-10 07:41:02 -07:00
2012-11-24 03:33:34 -08:00
options : options ,
readOptions : readOptions ,
writeOptions : writeOptions ,
2013-05-10 07:41:02 -07:00
storage : storage ,
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
return
2012-11-24 03:33:34 -08:00
}
2013-04-01 04:22:38 -07:00
func ( l * LevelDBPersistence ) Close ( ) {
2012-12-25 04:50:36 -08:00
// These are deferred to take advantage of forced closing in case of stack
// unwinding due to anomalies.
defer func ( ) {
if l . storage != nil {
l . storage . Close ( )
}
} ( )
2012-11-24 03:33:34 -08:00
defer func ( ) {
if l . filterPolicy != nil {
l . filterPolicy . Close ( )
}
} ( )
defer func ( ) {
if l . cache != nil {
l . cache . Close ( )
}
} ( )
defer func ( ) {
if l . options != nil {
l . options . Close ( )
}
} ( )
defer func ( ) {
if l . readOptions != nil {
l . readOptions . Close ( )
}
} ( )
defer func ( ) {
if l . writeOptions != nil {
l . writeOptions . Close ( )
}
} ( )
2012-12-25 04:50:36 -08:00
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
func ( l * LevelDBPersistence ) Get ( value coding . Encoder ) ( b [ ] byte , err error ) {
key , err := value . Encode ( )
if err != nil {
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
return l . storage . Get ( l . readOptions , key )
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
func ( l * LevelDBPersistence ) Has ( value coding . Encoder ) ( h bool , err error ) {
raw , err := l . Get ( value )
if err != nil {
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
h = raw != nil
2012-11-24 03:33:34 -08:00
2012-12-25 04:50:36 -08:00
return
}
2012-11-24 03:33:34 -08:00
2012-12-25 04:50:36 -08:00
func ( l * LevelDBPersistence ) Drop ( value coding . Encoder ) ( err error ) {
key , err := value . Encode ( )
if err != nil {
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
err = l . storage . Delete ( l . writeOptions , key )
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
func ( l * LevelDBPersistence ) Put ( key , value coding . Encoder ) ( err error ) {
keyEncoded , err := key . Encode ( )
if err != nil {
return
}
2012-11-24 03:33:34 -08:00
2012-12-25 04:50:36 -08:00
valueEncoded , err := value . Encode ( )
if err != nil {
return
2012-11-24 03:33:34 -08:00
}
2012-12-25 04:50:36 -08:00
err = l . storage . Put ( l . writeOptions , keyEncoded , valueEncoded )
return
2012-11-24 03:33:34 -08:00
}
2013-03-23 23:00:17 -07:00
func ( l * LevelDBPersistence ) Commit ( b raw . Batch ) ( err error ) {
// XXX: This is a wart to clean up later. Ideally, after doing extensive
// tests, we could create a Batch struct that journals pending
// operations which the given Persistence implementation could convert
// to its specific commit requirements.
2013-04-05 09:03:45 -07:00
batch , ok := b . ( * batch )
2013-03-23 23:00:17 -07:00
if ! ok {
panic ( "leveldb.batch expected" )
}
return l . storage . Write ( l . writeOptions , batch . batch )
2013-02-08 09:03:26 -08:00
}
2013-05-10 16:02:57 -07:00
// CompactKeyspace compacts the entire database's keyspace.
2013-05-10 07:41:02 -07:00
//
// Beware that it would probably be imprudent to run this on a live user-facing
// server due to latency implications.
2013-05-10 16:02:57 -07:00
func ( l * LevelDBPersistence ) CompactKeyspace ( ) {
// Magic values per https://code.google.com/p/leveldb/source/browse/include/leveldb/db.h#131.
keyspace := levigo . Range {
Start : nil ,
Limit : nil ,
}
l . storage . CompactRange ( keyspace )
}
func ( l * LevelDBPersistence ) ApproximateSize ( ) ( uint64 , error ) {
2013-05-10 07:41:02 -07:00
iterator := l . NewIterator ( false )
defer iterator . Close ( )
if ! iterator . SeekToFirst ( ) {
2013-05-10 16:02:57 -07:00
return 0 , fmt . Errorf ( "could not seek to first key" )
2013-05-10 07:41:02 -07:00
}
keyspace := levigo . Range { }
keyspace . Start = iterator . Key ( )
if ! iterator . SeekToLast ( ) {
2013-05-10 16:02:57 -07:00
return 0 , fmt . Errorf ( "could not seek to last key" )
2013-05-10 07:41:02 -07:00
}
keyspace . Limit = iterator . Key ( )
2013-05-10 16:02:57 -07:00
sizes := l . storage . GetApproximateSizes ( [ ] levigo . Range { keyspace } )
total := uint64 ( 0 )
for _ , size := range sizes {
total += size
}
2013-05-10 07:41:02 -07:00
2013-05-10 16:02:57 -07:00
return total , nil
2013-05-10 07:41:02 -07:00
}
2013-03-25 02:24:59 -07:00
// NewIterator creates a new levigoIterator, which follows the Iterator
// interface.
//
// Important notes:
//
// For each of the iterator methods that have a return signature of (ok bool),
// if ok == false, the iterator may not be used any further and must be closed.
// Further work with the database requires the creation of a new iterator. This
// is due to LevelDB and Levigo design. Please refer to Jeff and Sanjay's notes
// in the LevelDB documentation for this behavior's rationale.
//
// The returned iterator must explicitly be closed; otherwise non-managed memory
// will be leaked.
//
// The iterator is optionally snapshotable.
2013-04-01 04:22:38 -07:00
func ( l * LevelDBPersistence ) NewIterator ( snapshotted bool ) Iterator {
2013-03-25 02:24:59 -07:00
var (
snapshot * levigo . Snapshot
readOptions * levigo . ReadOptions
iterator * levigo . Iterator
)
if snapshotted {
snapshot = l . storage . NewSnapshot ( )
readOptions = levigo . NewReadOptions ( )
readOptions . SetSnapshot ( snapshot )
iterator = l . storage . NewIterator ( readOptions )
} else {
iterator = l . storage . NewIterator ( l . readOptions )
}
2012-11-24 03:33:34 -08:00
2013-04-01 04:22:38 -07:00
return & levigoIterator {
creationTime : time . Now ( ) ,
iterator : iterator ,
readOptions : readOptions ,
snapshot : snapshot ,
storage : l . storage ,
2012-11-24 03:33:34 -08:00
}
}
2013-02-06 08:05:23 -08:00
func ( l * LevelDBPersistence ) ForEach ( decoder storage . RecordDecoder , filter storage . RecordFilter , operator storage . RecordOperator ) ( scannedEntireCorpus bool , err error ) {
2013-03-25 02:24:59 -07:00
var (
iterator = l . NewIterator ( true )
valid bool
)
defer iterator . Close ( )
2013-02-06 08:05:23 -08:00
2013-03-25 02:24:59 -07:00
for valid = iterator . SeekToFirst ( ) ; valid ; valid = iterator . Next ( ) {
2013-02-06 08:05:23 -08:00
err = iterator . GetError ( )
if err != nil {
return
}
decodedKey , decodeErr := decoder . DecodeKey ( iterator . Key ( ) )
if decodeErr != nil {
continue
}
decodedValue , decodeErr := decoder . DecodeValue ( iterator . Value ( ) )
if decodeErr != nil {
continue
}
switch filter . Filter ( decodedKey , decodedValue ) {
case storage . STOP :
return
case storage . SKIP :
continue
case storage . ACCEPT :
opErr := operator . Operate ( decodedKey , decodedValue )
if opErr != nil {
if opErr . Continuable {
continue
}
break
}
}
}
scannedEntireCorpus = true
return
}