mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-13 14:57:40 -08:00
783 lines
20 KiB
Go
783 lines
20 KiB
Go
package pages
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"hash/fnv"
|
|
"math"
|
|
"os"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
"unsafe"
|
|
)
|
|
|
|
// These errors can be returned when opening or calling methods on a DB.
|
|
var (
|
|
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
|
|
// is opened or after it is closed.
|
|
ErrDatabaseNotOpen = errors.New("database not open")
|
|
|
|
// ErrDatabaseOpen is returned when opening a database that is
|
|
// already open.
|
|
ErrDatabaseOpen = errors.New("database already open")
|
|
|
|
// ErrInvalid is returned when both meta pages on a database are invalid.
|
|
// This typically occurs when a file is not a bolt database.
|
|
ErrInvalid = errors.New("invalid database")
|
|
|
|
// ErrVersionMismatch is returned when the data file was created with a
|
|
// different version of Bolt.
|
|
ErrVersionMismatch = errors.New("version mismatch")
|
|
|
|
// ErrChecksum is returned when either meta page checksum does not match.
|
|
ErrChecksum = errors.New("checksum error")
|
|
|
|
// ErrTimeout is returned when a database cannot obtain an exclusive lock
|
|
// on the data file after the timeout passed to Open().
|
|
ErrTimeout = errors.New("timeout")
|
|
|
|
// ErrNotFound is returned when a user page for an ID could not be found.
|
|
ErrNotFound = errors.New("not found")
|
|
|
|
ErrTxClosed = errors.New("transaction closed")
|
|
|
|
ErrTxNotWritable = errors.New("transaction not writable")
|
|
)
|
|
|
|
// Marker value that indicates that a file is a pagebuf file.
|
|
const magic uint32 = 0xAFFEAFFE
|
|
|
|
// The data file version.
|
|
const version = 1
|
|
|
|
// The largest step that can be taken when remapping the mmap.
|
|
const maxMmapStep = 1 << 30 // 1GB
|
|
|
|
// defaultPageSize of the underlying buffers is set to the OS page size.
|
|
var defaultPageSize = os.Getpagesize()
|
|
|
|
// DB is an interface providing access to persistent byte chunks that
|
|
// are backed by memory-mapped pages.
|
|
type DB struct {
|
|
// If you want to read the entire database fast, you can set MmapFlag to
|
|
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
|
|
MmapFlags int
|
|
|
|
// AllocSize is the amount of space allocated when the database
|
|
// needs to create new pages. This is done to amortize the cost
|
|
// of truncate() and fsync() when growing the data file.
|
|
AllocSize int
|
|
|
|
path string // location of the pagebuf file
|
|
file *os.File // the opened file of path
|
|
opened bool
|
|
data *[maxMapSize]byte
|
|
dataref []byte // mmap'ed readonly, write throws SEGV
|
|
datasz int
|
|
filesz int // current on disk file size
|
|
pageSize int
|
|
meta0 *meta
|
|
meta1 *meta
|
|
freelist *freelist
|
|
rwtx *Tx
|
|
txs []*Tx
|
|
|
|
pagePool sync.Pool
|
|
|
|
rwlock sync.Mutex // Allows only one writer at a time.
|
|
metalock sync.Mutex // Protects meta page access.
|
|
mmaplock sync.RWMutex // Protects mmap access during remapping
|
|
|
|
ops struct {
|
|
writeAt func(b []byte, off int64) (n int, err error)
|
|
}
|
|
}
|
|
|
|
// Options defines configuration parameters with which a PageBuf is initialized.
|
|
type Options struct {
|
|
// Timeout is the amount of time to wait to obtain a file lock.
|
|
// When set to zero it will wait indefinitely. This option is only
|
|
// available on Darwin and Linux.
|
|
Timeout time.Duration
|
|
|
|
// Sets the DB.MmapFlags flag before memory mapping the file.
|
|
MmapFlags int
|
|
|
|
// XXX(fabxc): potentially allow setting different allocation strategies
|
|
// to fit different use cases.
|
|
|
|
// InitialMmapSize is the initial mmap size of the database
|
|
// in bytes.
|
|
//
|
|
// If <=0, the initial map size is 0.
|
|
// If initialMmapSize is smaller than the previous database size,
|
|
// it takes no effect.
|
|
InitialMmapSize int
|
|
|
|
// PageSize defines a custom page size used. It cannot be changed later.
|
|
// Must be a multiple of the operating system's default page size.
|
|
PageSize int
|
|
}
|
|
|
|
// DefaultOptions specifies a set of default parameters used when a pagebuf
|
|
// is opened without explicit options.
|
|
var DefaultOptions = Options{
|
|
// Use the OS's default page size.
|
|
PageSize: defaultPageSize,
|
|
}
|
|
|
|
// Default values if not set in a DB instance.
|
|
const (
|
|
DefaultAllocSize = 16 * 1024 * 1024
|
|
)
|
|
|
|
// Open and create a new database under the given path.
|
|
func Open(path string, mode os.FileMode, o *Options) (*DB, error) {
|
|
db := &DB{
|
|
opened: true,
|
|
}
|
|
|
|
// Set default options if no options are provided.
|
|
if o == nil {
|
|
o = &DefaultOptions
|
|
}
|
|
db.MmapFlags = o.MmapFlags
|
|
|
|
db.AllocSize = DefaultAllocSize
|
|
|
|
flag := os.O_RDWR
|
|
|
|
// Open data file and separate sync handler for metadata writes.
|
|
db.path = path
|
|
var err error
|
|
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
|
|
_ = db.close()
|
|
return nil, err
|
|
}
|
|
|
|
// Lock file so that other processes using pagebuf in read-write mode cannot
|
|
// use the underlying data at the same time.
|
|
if err := flock(db, mode, true, o.Timeout); err != nil {
|
|
_ = db.close()
|
|
return nil, err
|
|
}
|
|
|
|
// Default values for test hooks
|
|
db.ops.writeAt = db.file.WriteAt
|
|
|
|
// Initialize the database if it doesn't exist.
|
|
if info, err := db.file.Stat(); err != nil {
|
|
return nil, err
|
|
} else if info.Size() == 0 {
|
|
// Initialize new files with meta pages.
|
|
if err := db.init(o.PageSize); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// Read the first meta page to determine the page size.
|
|
var buf [0x1000]byte
|
|
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
|
|
m := db.pageInBuffer(buf[:], 0).meta()
|
|
if err := m.validate(); err != nil {
|
|
// We cannot verify which page sizes are used.
|
|
return nil, fmt.Errorf("cannot read page size: %s", err)
|
|
} else {
|
|
db.pageSize = int(m.pageSize)
|
|
}
|
|
} else {
|
|
return nil, fmt.Errorf("reading first meta page failed: %s", err)
|
|
}
|
|
}
|
|
|
|
// Initialize page pool.
|
|
db.pagePool = sync.Pool{
|
|
New: func() interface{} {
|
|
return make([]byte, db.pageSize)
|
|
},
|
|
}
|
|
|
|
// Memory map the data file.
|
|
if err := db.mmap(o.InitialMmapSize); err != nil {
|
|
_ = db.close()
|
|
return nil, err
|
|
}
|
|
|
|
// Read in the freelist.
|
|
db.freelist = newFreelist()
|
|
db.freelist.read(db.page(db.meta().freelist))
|
|
|
|
// Mark the database as opened and return.
|
|
return db, nil
|
|
}
|
|
|
|
func validatePageSize(psz int) error {
|
|
// Max value the content length can hold.
|
|
if defaultPageSize > math.MaxUint16 {
|
|
return fmt.Errorf("invalid page size %d", psz)
|
|
}
|
|
// Page size must be a multiple of OS page size so we stay
|
|
// page aligned.
|
|
if psz < defaultPageSize {
|
|
if defaultPageSize%psz != 0 {
|
|
return fmt.Errorf("invalid page size %d", psz)
|
|
}
|
|
} else if psz > defaultPageSize {
|
|
if psz%defaultPageSize != 0 {
|
|
return fmt.Errorf("invalid page size %d", psz)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// init creates a new database file and initializes its meta pages.
|
|
func (db *DB) init(psz int) error {
|
|
if err := validatePageSize(psz); err != nil {
|
|
return err
|
|
}
|
|
// Set the page size to the OS page size.
|
|
db.pageSize = psz
|
|
|
|
// Create two meta pages on a buffer.
|
|
buf := make([]byte, db.pageSize*4)
|
|
for i := 0; i < 2; i++ {
|
|
p := db.pageInBuffer(buf[:], pgid(i))
|
|
p.id = pgid(i)
|
|
p.flags = pageFlagMeta
|
|
|
|
// Initialize the meta page.
|
|
m := p.meta()
|
|
m.magic = magic
|
|
m.version = version
|
|
m.pageSize = uint32(db.pageSize)
|
|
m.freelist = 2
|
|
m.txid = txid(i)
|
|
m.pgid = 4 // TODO(fabxc): we initialize with zero pages, what to do here?
|
|
m.checksum = m.sum64()
|
|
}
|
|
|
|
// Write an empty freelist at page 3.
|
|
p := db.pageInBuffer(buf[:], pgid(2))
|
|
p.id = pgid(2)
|
|
p.flags = pageFlagFreelist
|
|
p.count = 0
|
|
|
|
// Write the first empty page.
|
|
p = db.pageInBuffer(buf[:], pgid(3))
|
|
p.id = pgid(3)
|
|
p.flags = pageFlagData
|
|
p.count = 0
|
|
|
|
// Write the buffer to our data file.
|
|
if _, err := db.ops.writeAt(buf, 0); err != nil {
|
|
return err
|
|
}
|
|
if err := fdatasync(db); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Sync executes fdatasync() against the database file handle.
|
|
func (db *DB) Sync() error { return fdatasync(db) }
|
|
|
|
// Close synchronizes and closes the memory-mapped pagebuf file.
|
|
func (db *DB) Close() error {
|
|
db.rwlock.Lock()
|
|
defer db.rwlock.Unlock()
|
|
|
|
db.metalock.Lock()
|
|
defer db.metalock.Unlock()
|
|
|
|
db.mmaplock.RLock()
|
|
defer db.mmaplock.RUnlock()
|
|
|
|
return db.close()
|
|
}
|
|
|
|
func (db *DB) close() error {
|
|
if !db.opened {
|
|
return nil
|
|
}
|
|
|
|
db.opened = false
|
|
db.freelist = nil
|
|
db.ops.writeAt = nil
|
|
|
|
// Close the mmap.
|
|
if err := db.munmap(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Close file handles.
|
|
if db.file != nil {
|
|
// Close the file descriptor.
|
|
if err := db.file.Close(); err != nil {
|
|
return fmt.Errorf("db file close: %s", err)
|
|
}
|
|
db.file = nil
|
|
}
|
|
|
|
db.path = ""
|
|
return nil
|
|
}
|
|
|
|
// Update executes a function within the context of a read-write managed transaction.
|
|
// If no error is returned from the function then the transaction is committed.
|
|
// If an error is returned then the entire transaction is rolled back.
|
|
// Any error that is returned from the function or returned from the commit is
|
|
// returned from the Update() method.
|
|
//
|
|
// Attempting to manually commit or rollback within the function will cause a panic.
|
|
func (db *DB) Update(fn func(*Tx) error) error {
|
|
t, err := db.Begin(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure the transaction rolls back in the event of a panic.
|
|
defer func() {
|
|
if t.db != nil {
|
|
t.rollback()
|
|
}
|
|
}()
|
|
|
|
// Mark as a managed tx so that the inner function cannot manually commit.
|
|
t.managed = true
|
|
|
|
// If an error is returned from the function then rollback and return error.
|
|
err = fn(t)
|
|
t.managed = false
|
|
if err != nil {
|
|
_ = t.Rollback()
|
|
return err
|
|
}
|
|
|
|
return t.Commit()
|
|
}
|
|
|
|
// View executes a function within the context of a managed read-only transaction.
|
|
// Any error that is returned from the function is returned from the View() method.
|
|
//
|
|
// Attempting to manually rollback within the function will cause a panic.
|
|
func (db *DB) View(fn func(*Tx) error) error {
|
|
t, err := db.Begin(false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure the transaction rolls back in the event of a panic.
|
|
defer func() {
|
|
if t.db != nil {
|
|
t.rollback()
|
|
}
|
|
}()
|
|
|
|
// Mark as a managed tx so that the inner function cannot manually rollback.
|
|
t.managed = true
|
|
|
|
// If an error is returned from the function then pass it through.
|
|
err = fn(t)
|
|
t.managed = false
|
|
if err != nil {
|
|
_ = t.Rollback()
|
|
return err
|
|
}
|
|
|
|
if err := t.Rollback(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// pageExists checks whether the page with the given id exists.
|
|
func (db *DB) pageExists(id pgid) bool {
|
|
// The page exists if it is not in the freelist or out of the data range.
|
|
return !db.freelist.cache[pgid(id)] && int(id+1)*db.pageSize < db.datasz
|
|
}
|
|
|
|
// page retrieves a page reference from the mmap based on the current page size.
|
|
func (db *DB) page(id pgid) *page {
|
|
pos := id * pgid(db.pageSize)
|
|
return (*page)(unsafe.Pointer(&db.data[pos]))
|
|
}
|
|
|
|
// pageInBuffer retrieves a page reference from a given byte array based on the current
|
|
// page size.
|
|
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
|
|
pos := id * pgid(db.pageSize)
|
|
return (*page)(unsafe.Pointer(&b[pos]))
|
|
}
|
|
|
|
// meta retrieves the current meta page reference.
|
|
func (db *DB) meta() *meta {
|
|
// We have to return the meta with the highest txid which doesn't fail
|
|
// validation. Otherwise, we can cause errors when in fact the database is
|
|
// in a consistent state. metaA is the one with the higher txid.
|
|
metaA := db.meta0
|
|
metaB := db.meta1
|
|
if db.meta1.txid > db.meta0.txid {
|
|
metaA = db.meta1
|
|
metaB = db.meta0
|
|
}
|
|
|
|
// Use higher meta page if valid. Otherwise fallback to previous, if valid.
|
|
if err := metaA.validate(); err == nil {
|
|
return metaA
|
|
} else if err := metaB.validate(); err == nil {
|
|
return metaB
|
|
}
|
|
|
|
// This should never be reached, because both meta1 and meta0 were validated
|
|
// on mmap() and we do fsync() on every write.
|
|
panic("pagebuf.PageBuf.meta(): invalid meta pages")
|
|
}
|
|
|
|
// allocate returns a contiguous block of memory starting at a given page.
|
|
func (db *DB) allocate(count int) (*page, error) {
|
|
// Allocate a temporary buffer for the page.
|
|
var buf []byte
|
|
if count == 1 {
|
|
buf = db.pagePool.Get().([]byte)
|
|
} else {
|
|
buf = make([]byte, count*db.pageSize)
|
|
}
|
|
p := (*page)(unsafe.Pointer(&buf[0]))
|
|
p.overflow = uint32(count - 1)
|
|
|
|
// Use pages from the freelist if they are available.
|
|
if p.id = db.freelist.allocate(count); p.id != 0 {
|
|
return p, nil
|
|
}
|
|
|
|
// Resize mmap() if we're at the end.
|
|
p.id = db.rwtx.meta.pgid
|
|
var minsz = int((p.id+pgid(count))+1) * db.pageSize
|
|
if minsz >= db.datasz {
|
|
if err := db.mmap(minsz); err != nil {
|
|
return nil, fmt.Errorf("mmap allocate error: %s", err)
|
|
}
|
|
}
|
|
|
|
// Move the page id high water mark.
|
|
db.rwtx.meta.pgid += pgid(count)
|
|
return p, nil
|
|
}
|
|
|
|
// grow grows the size of the database to the given sz.
|
|
func (db *DB) grow(sz int) error {
|
|
// Ignore if the new size is less than available file size.
|
|
if sz <= db.filesz {
|
|
return nil
|
|
}
|
|
|
|
// If the data is smaller than the alloc size then only allocate what's needed.
|
|
// Once it goes over the allocation size then allocate in chunks.
|
|
if db.datasz < db.AllocSize {
|
|
sz = db.datasz
|
|
} else {
|
|
sz += db.AllocSize
|
|
}
|
|
|
|
// Truncate and fsync to ensure file size metadata is flushed.
|
|
// https://github.com/boltdb/bolt/issues/284
|
|
if runtime.GOOS != "windows" {
|
|
if err := db.file.Truncate(int64(sz)); err != nil {
|
|
return fmt.Errorf("file resize error: %s", err)
|
|
}
|
|
}
|
|
if err := db.file.Sync(); err != nil {
|
|
return fmt.Errorf("file sync error: %s", err)
|
|
}
|
|
|
|
db.filesz = sz
|
|
return nil
|
|
}
|
|
|
|
// mmap opens the underlying memory-mapped file and initializes it.
|
|
// minsz is the minimum size that the mmap can be.
|
|
func (db *DB) mmap(minsz int) error {
|
|
db.mmaplock.Lock()
|
|
defer db.mmaplock.Unlock()
|
|
|
|
info, err := db.file.Stat()
|
|
if err != nil {
|
|
return fmt.Errorf("mmap stat error: %s", err)
|
|
} else if int(info.Size()) < db.pageSize*2 {
|
|
return fmt.Errorf("file size too small")
|
|
}
|
|
|
|
// Ensure the size is at least the minimum size.
|
|
var size = int(info.Size())
|
|
if size < minsz {
|
|
size = minsz
|
|
}
|
|
size, err = db.mmapSize(size)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Unmap existing data before continuing.
|
|
if err := db.munmap(); err != nil {
|
|
return err
|
|
}
|
|
// Memory-map the data file as a byte slice.
|
|
if err := mmap(db, size); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Save references to the meta pages.
|
|
db.meta0 = db.page(0).meta()
|
|
db.meta1 = db.page(1).meta()
|
|
|
|
// Validate the meta pages. We only return an error if both meta pages fail
|
|
// validation, since meta0 failing validation means that it wasn't saved
|
|
// properly -- but we can recover using meta1. And vice-versa.
|
|
err0 := db.meta0.validate()
|
|
err1 := db.meta1.validate()
|
|
if err0 != nil && err1 != nil {
|
|
return err0
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// munmap unmaps the data file from memory.
|
|
func (db *DB) munmap() error {
|
|
if err := munmap(db); err != nil {
|
|
return fmt.Errorf("unmap error: %s", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// mmapSize determines the appropriate size for the mmap given the current size
|
|
// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
|
|
// Returns an error if the new mmap size is greater than the max allowed.
|
|
func (db *DB) mmapSize(size int) (int, error) {
|
|
// Double the size from 32KB until 1GB.
|
|
for i := uint(15); i <= 30; i++ {
|
|
if size <= 1<<i {
|
|
return 1 << i, nil
|
|
}
|
|
}
|
|
|
|
// Verify the requested size is not above the maximum allowed.
|
|
if size > maxMapSize {
|
|
return 0, fmt.Errorf("mmap too large")
|
|
}
|
|
|
|
// If larger than 1GB then grow by 1GB at a time.
|
|
sz := int64(size)
|
|
if remainder := sz % int64(maxMmapStep); remainder > 0 {
|
|
sz += int64(maxMmapStep) - remainder
|
|
}
|
|
|
|
// Ensure that the mmap size is a multiple of the page size.
|
|
// This should always be true since we're incrementing in MBs.
|
|
pageSize := int64(db.pageSize)
|
|
if (sz % pageSize) != 0 {
|
|
sz = ((sz / pageSize) + 1) * pageSize
|
|
}
|
|
|
|
// If we've exceeded the max size then only grow up to the max size.
|
|
if sz > maxMapSize {
|
|
sz = maxMapSize
|
|
}
|
|
|
|
return int(sz), nil
|
|
}
|
|
|
|
func (db *DB) String() string {
|
|
return fmt.Sprintf("PageBuf<%s>", db.path)
|
|
}
|
|
|
|
// Path returns the path to the currently opened pagebuf file.
|
|
func (db *DB) Path() string {
|
|
return db.path
|
|
}
|
|
|
|
// Begin starts a new transaction.
|
|
// Multiple read-only transactions can be used concurrently but only one
|
|
// write transaction can be used at a time. Starting multiple write transactions
|
|
// will cause the calls to block and be serialized until the current write
|
|
// transaction finishes.
|
|
//
|
|
// Transactions should not be dependent on one another. Opening a read
|
|
// transaction and a write transaction in the same goroutine can cause the
|
|
// writer to deadlock because the database periodically needs to re-mmap itself
|
|
// as it grows and it cannot do that while a read transaction is open.
|
|
//
|
|
// If a long running read transaction (for example, a snapshot transaction) is
|
|
// needed, you might want to set PageBuf.InitialMmapSize to a large enough value
|
|
// to avoid potential blocking of write transaction.
|
|
//
|
|
// IMPORTANT: You must close read-only transactions after you are finished or
|
|
// else the database will not reclaim old pages.
|
|
func (db *DB) Begin(writable bool) (*Tx, error) {
|
|
if writable {
|
|
return db.beginRWTx()
|
|
}
|
|
return db.beginTx()
|
|
}
|
|
|
|
func (db *DB) beginTx() (*Tx, error) {
|
|
// Lock the meta pages while we initialize the transaction. We obtain
|
|
// the meta lock before the mmap lock because that's the order that the
|
|
// write transaction will obtain them.
|
|
db.metalock.Lock()
|
|
|
|
// Obtain a read-only lock on the mmap. When the mmap is remapped it will
|
|
// obtain a write lock so all transactions must finish before it can be
|
|
// remapped.
|
|
db.mmaplock.RLock()
|
|
|
|
// Exit if the database is not open yet.
|
|
if !db.opened {
|
|
db.mmaplock.RUnlock()
|
|
db.metalock.Unlock()
|
|
return nil, ErrDatabaseNotOpen
|
|
}
|
|
|
|
// Create a transaction associated with the database.
|
|
t := &Tx{}
|
|
t.init(db)
|
|
|
|
// Keep track of transaction until it closes.
|
|
db.txs = append(db.txs, t)
|
|
|
|
// Unlock the meta pages.
|
|
db.metalock.Unlock()
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func (db *DB) beginRWTx() (*Tx, error) {
|
|
// Obtain writer lock. This is released by the transaction when it closes.
|
|
// This enforces only one writer transaction at a time.
|
|
db.rwlock.Lock()
|
|
|
|
// Once we have the writer lock then we can lock the meta pages so that
|
|
// we can set up the transaction.
|
|
db.metalock.Lock()
|
|
defer db.metalock.Unlock()
|
|
|
|
// Exit if the database is not open yet.
|
|
if !db.opened {
|
|
db.rwlock.Unlock()
|
|
return nil, ErrDatabaseNotOpen
|
|
}
|
|
|
|
// Create a transaction associated with the database.
|
|
t := &Tx{writable: true}
|
|
t.init(db)
|
|
db.rwtx = t
|
|
|
|
// Free any pages associated with closed read-only transactions.
|
|
var minid txid = 0xFFFFFFFFFFFFFFFF
|
|
for _, t := range db.txs {
|
|
if t.meta.txid < minid {
|
|
minid = t.meta.txid
|
|
}
|
|
}
|
|
if minid > 0 {
|
|
db.freelist.release(minid - 1)
|
|
}
|
|
|
|
return t, nil
|
|
}
|
|
|
|
// removeTx removes a transaction from the database.
|
|
func (db *DB) removeTx(tx *Tx) {
|
|
// Release the read lock on the mmap.
|
|
db.mmaplock.RUnlock()
|
|
|
|
// Use the meta lock to restrict access to the DB object.
|
|
db.metalock.Lock()
|
|
|
|
// Remove the transaction.
|
|
for i, t := range db.txs {
|
|
if t == tx {
|
|
db.txs = append(db.txs[:i], db.txs[i+1:]...)
|
|
break
|
|
}
|
|
}
|
|
// Unlock the meta pages.
|
|
db.metalock.Unlock()
|
|
}
|
|
|
|
// Size represents a valid page size.
|
|
type Size int8
|
|
|
|
// The valid sizes for allocated pages.
|
|
const (
|
|
Size512 Size = -3
|
|
Size1024 = -2
|
|
Size2048 = -1
|
|
Size4096 = 0
|
|
Size8192 = 1
|
|
)
|
|
|
|
const (
|
|
upageSizeMin = Size512
|
|
upageSizeMax = Size8192
|
|
)
|
|
|
|
type meta struct {
|
|
magic uint32
|
|
version uint32
|
|
pageSize uint32
|
|
flags uint32
|
|
freelist pgid
|
|
txid txid
|
|
pgid pgid
|
|
checksum uint64
|
|
}
|
|
|
|
// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
|
|
func (m *meta) validate() error {
|
|
if m.magic != magic {
|
|
return ErrInvalid
|
|
} else if m.version != version {
|
|
return ErrVersionMismatch
|
|
} else if m.checksum != 0 && m.checksum != m.sum64() {
|
|
return ErrChecksum
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// copy copies one meta object to another.
|
|
func (m *meta) copy(dest *meta) {
|
|
*dest = *m
|
|
}
|
|
|
|
// write writes the meta onto a page.
|
|
func (m *meta) write(p *page) {
|
|
if m.freelist >= m.pgid {
|
|
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
|
|
}
|
|
|
|
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
|
|
p.id = pgid(m.txid % 2)
|
|
p.flags |= pageFlagMeta
|
|
|
|
// Calculate the checksum.
|
|
m.checksum = m.sum64()
|
|
|
|
m.copy(p.meta())
|
|
}
|
|
|
|
// generates the checksum for the meta.
|
|
func (m *meta) sum64() uint64 {
|
|
var h = fnv.New64a()
|
|
_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
|
|
return h.Sum64()
|
|
}
|
|
|
|
// _assert will panic with a given formatted message if the given condition is false.
|
|
func _assert(condition bool, msg string, v ...interface{}) {
|
|
if !condition {
|
|
panic(fmt.Sprintf("assertion failed: "+msg, v...))
|
|
}
|
|
}
|