vendor: update tsdb (#2840)

This commit is contained in:
Fabian Reinartz 2017-06-13 07:44:07 +02:00 committed by GitHub
parent baf5b0f0fc
commit 7640960469
11 changed files with 1116 additions and 202 deletions

View file

@ -1,4 +1,5 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -21,6 +22,7 @@ import (
"github.com/oklog/ulid"
"github.com/pkg/errors"
"github.com/prometheus/tsdb/labels"
)
// DiskBlock handles reads against a Block of time series data.
@ -37,6 +39,12 @@ type DiskBlock interface {
// Chunks returns a ChunkReader over the block's data.
Chunks() ChunkReader
// Tombstones returns a TombstoneReader over the block's deleted data.
Tombstones() TombstoneReader
// Delete deletes data from the block.
Delete(mint, maxt int64, ms ...labels.Matcher) error
// Close releases all underlying resources of the block.
Close() error
}
@ -45,6 +53,7 @@ type DiskBlock interface {
type Block interface {
DiskBlock
Queryable
Snapshottable
}
// headBlock is a regular block that can still be appended to.
@ -53,6 +62,11 @@ type headBlock interface {
Appendable
}
// Snapshottable defines an entity that can be backedup online.
type Snapshottable interface {
Snapshot(dir string) error
}
// Appendable defines an entity to which data can be appended.
type Appendable interface {
// Appender returns a new Appender against an underlying store.
@ -78,16 +92,27 @@ type BlockMeta struct {
MaxTime int64 `json:"maxTime"`
// Stats about the contents of the block.
Stats struct {
NumSamples uint64 `json:"numSamples,omitempty"`
NumSeries uint64 `json:"numSeries,omitempty"`
NumChunks uint64 `json:"numChunks,omitempty"`
} `json:"stats,omitempty"`
Stats BlockStats `json:"stats,omitempty"`
// Information on compactions the block was created from.
Compaction struct {
Generation int `json:"generation"`
} `json:"compaction"`
Compaction BlockMetaCompaction `json:"compaction"`
}
// BlockStats contains stats about contents of a block.
type BlockStats struct {
NumSamples uint64 `json:"numSamples,omitempty"`
NumSeries uint64 `json:"numSeries,omitempty"`
NumChunks uint64 `json:"numChunks,omitempty"`
NumTombstones uint64 `json:"numTombstones,omitempty"`
}
// BlockMetaCompaction holds information about compactions a block went through.
type BlockMetaCompaction struct {
// Maximum number of compaction cycles any source block has
// gone through.
Generation int `json:"generation"`
// ULIDs of all source head blocks that went into the block.
Sources []ulid.ULID `json:"sources,omitempty"`
}
const (
@ -136,7 +161,7 @@ func writeMetaFile(dir string, meta *BlockMeta) error {
var merr MultiError
if merr.Add(enc.Encode(&blockMeta{Version: 1, BlockMeta: meta})); merr.Err() != nil {
merr.Add(f.Close())
return merr
return merr.Err()
}
if err := f.Close(); err != nil {
return err
@ -150,6 +175,8 @@ type persistedBlock struct {
chunkr *chunkReader
indexr *indexReader
tombstones tombstoneReader
}
func newPersistedBlock(dir string) (*persistedBlock, error) {
@ -167,11 +194,17 @@ func newPersistedBlock(dir string) (*persistedBlock, error) {
return nil, err
}
tr, err := readTombstones(dir)
if err != nil {
return nil, err
}
pb := &persistedBlock{
dir: dir,
meta: *meta,
chunkr: cr,
indexr: ir,
dir: dir,
meta: *meta,
chunkr: cr,
indexr: ir,
tombstones: tr,
}
return pb, nil
}
@ -191,21 +224,124 @@ func (pb *persistedBlock) String() string {
func (pb *persistedBlock) Querier(mint, maxt int64) Querier {
return &blockQuerier{
mint: mint,
maxt: maxt,
index: pb.Index(),
chunks: pb.Chunks(),
mint: mint,
maxt: maxt,
index: pb.Index(),
chunks: pb.Chunks(),
tombstones: pb.Tombstones(),
}
}
func (pb *persistedBlock) Dir() string { return pb.dir }
func (pb *persistedBlock) Index() IndexReader { return pb.indexr }
func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr }
func (pb *persistedBlock) Meta() BlockMeta { return pb.meta }
func (pb *persistedBlock) Tombstones() TombstoneReader {
return pb.tombstones
}
func (pb *persistedBlock) Meta() BlockMeta { return pb.meta }
func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error {
pr := newPostingsReader(pb.indexr)
p, absent := pr.Select(ms...)
ir := pb.indexr
// Choose only valid postings which have chunks in the time-range.
stones := map[uint32]intervals{}
Outer:
for p.Next() {
lset, chunks, err := ir.Series(p.At())
if err != nil {
return err
}
for _, abs := range absent {
if lset.Get(abs) != "" {
continue Outer
}
}
for _, chk := range chunks {
if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) {
// Delete only until the current vlaues and not beyond.
tmin, tmax := clampInterval(mint, maxt, chunks[0].MinTime, chunks[len(chunks)-1].MaxTime)
stones[p.At()] = intervals{{tmin, tmax}}
continue Outer
}
}
}
if p.Err() != nil {
return p.Err()
}
// Merge the current and new tombstones.
for k, v := range stones {
pb.tombstones.add(k, v[0])
}
if err := writeTombstoneFile(pb.dir, pb.tombstones); err != nil {
return err
}
pb.meta.Stats.NumTombstones = uint64(len(pb.tombstones))
return writeMetaFile(pb.dir, &pb.meta)
}
func (pb *persistedBlock) Snapshot(dir string) error {
blockDir := filepath.Join(dir, pb.meta.ULID.String())
if err := os.MkdirAll(blockDir, 0777); err != nil {
return errors.Wrap(err, "create snapshot block dir")
}
chunksDir := chunkDir(blockDir)
if err := os.MkdirAll(chunksDir, 0777); err != nil {
return errors.Wrap(err, "create snapshot chunk dir")
}
// Hardlink meta, index and tombstones
for _, fname := range []string{
metaFilename,
indexFilename,
tombstoneFilename,
} {
if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil {
return errors.Wrapf(err, "create snapshot %s", fname)
}
}
// Hardlink the chunks
curChunkDir := chunkDir(pb.dir)
files, err := ioutil.ReadDir(curChunkDir)
if err != nil {
return errors.Wrap(err, "ReadDir the current chunk dir")
}
for _, f := range files {
err := os.Link(filepath.Join(curChunkDir, f.Name()), filepath.Join(chunksDir, f.Name()))
if err != nil {
return errors.Wrap(err, "hardlink a chunk")
}
}
return nil
}
func chunkDir(dir string) string { return filepath.Join(dir, "chunks") }
func walDir(dir string) string { return filepath.Join(dir, "wal") }
func clampInterval(a, b, mint, maxt int64) (int64, int64) {
if a < mint {
a = mint
}
if b > maxt {
b = maxt
}
return a, b
}
type mmapFile struct {
f *os.File
b []byte

View file

@ -54,6 +54,46 @@ func (cm *ChunkMeta) writeHash(h hash.Hash) error {
return nil
}
// deletedIterator wraps an Iterator and makes sure any deleted metrics are not
// returned.
type deletedIterator struct {
it chunks.Iterator
intervals intervals
}
func (it *deletedIterator) At() (int64, float64) {
return it.it.At()
}
func (it *deletedIterator) Next() bool {
Outer:
for it.it.Next() {
ts, _ := it.it.At()
for _, tr := range it.intervals {
if tr.inBounds(ts) {
continue Outer
}
if ts > tr.maxt {
it.intervals = it.intervals[1:]
continue
}
return true
}
return true
}
return false
}
func (it *deletedIterator) Err() error {
return it.it.Err()
}
// ChunkWriter serializes a time block of chunked series data.
type ChunkWriter interface {
// WriteChunks writes several chunks. The Chunk field of the ChunkMetas

View file

@ -26,6 +26,7 @@ import (
"github.com/oklog/ulid"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/tsdb/chunks"
"github.com/prometheus/tsdb/labels"
)
@ -70,7 +71,7 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
Name: "tsdb_compactions_failed_total",
Help: "Total number of compactions that failed for the partition.",
})
m.duration = prometheus.NewHistogram(prometheus.HistogramOpts{
m.duration = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "tsdb_compaction_duration",
Help: "Duration of compaction runs.",
})
@ -165,17 +166,35 @@ func (c *compactor) match(dirs []dirMeta) bool {
return uint64(dirs[len(dirs)-1].meta.MaxTime-dirs[0].meta.MinTime) <= c.opts.maxBlockRange
}
func mergeBlockMetas(blocks ...Block) (res BlockMeta) {
m0 := blocks[0].Meta()
func compactBlockMetas(blocks ...BlockMeta) (res BlockMeta) {
res.MinTime = blocks[0].MinTime
res.MaxTime = blocks[len(blocks)-1].MaxTime
res.MinTime = m0.MinTime
res.MaxTime = blocks[len(blocks)-1].Meta().MaxTime
res.Compaction.Generation = m0.Compaction.Generation + 1
sources := map[ulid.ULID]struct{}{}
for _, b := range blocks {
res.Stats.NumSamples += b.Meta().Stats.NumSamples
res.Stats.NumSamples += b.Stats.NumSamples
if b.Compaction.Generation > res.Compaction.Generation {
res.Compaction.Generation = b.Compaction.Generation
}
for _, s := range b.Compaction.Sources {
sources[s] = struct{}{}
}
// If it's an in memory block, its ULID goes into the sources.
if b.Compaction.Generation == 0 {
sources[b.ULID] = struct{}{}
}
}
res.Compaction.Generation++
for s := range sources {
res.Compaction.Sources = append(res.Compaction.Sources, s)
}
sort.Slice(res.Compaction.Sources, func(i, j int) bool {
return res.Compaction.Sources[i].Compare(res.Compaction.Sources[j]) < 0
})
return res
}
@ -219,6 +238,7 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) {
if err != nil {
c.metrics.failed.Inc()
}
c.metrics.ran.Inc()
c.metrics.duration.Observe(time.Since(t).Seconds())
}(time.Now())
@ -244,7 +264,7 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) {
return errors.Wrap(err, "open index writer")
}
meta, err := c.populate(blocks, indexw, chunkw)
meta, err := populateBlock(blocks, indexw, chunkw)
if err != nil {
return errors.Wrap(err, "write compaction")
}
@ -261,6 +281,11 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) {
return errors.Wrap(err, "close index writer")
}
// Create an empty tombstones file.
if err := writeTombstoneFile(tmp, newEmptyTombstoneReader()); err != nil {
return errors.Wrap(err, "write new tombstones file")
}
// Block successfully written, make visible and remove old ones.
if err := renameFile(tmp, dir); err != nil {
return errors.Wrap(err, "rename block dir")
@ -275,6 +300,8 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) {
if err != nil {
return errors.Wrap(err, "sync block dir")
}
defer df.Close()
if err := fileutil.Fsync(df); err != nil {
return errors.Wrap(err, "sync block dir")
}
@ -282,17 +309,20 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) {
return nil
}
// populate fills the index and chunk writers with new data gathered as the union
// populateBlock fills the index and chunk writers with new data gathered as the union
// of the provided blocks. It returns meta information for the new block.
func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*BlockMeta, error) {
func populateBlock(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*BlockMeta, error) {
var set compactionSet
var metas []BlockMeta
for i, b := range blocks {
metas = append(metas, b.Meta())
all, err := b.Index().Postings("", "")
if err != nil {
return nil, err
}
s := newCompactionSeriesSet(b.Index(), b.Chunks(), all)
s := newCompactionSeriesSet(b.Index(), b.Chunks(), b.Tombstones(), all)
if i == 0 {
set = s
@ -309,18 +339,40 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri
postings = &memPostings{m: make(map[term][]uint32, 512)}
values = map[string]stringset{}
i = uint32(0)
meta = mergeBlockMetas(blocks...)
meta = compactBlockMetas(metas...)
)
for set.Next() {
lset, chunks := set.At()
if err := chunkw.WriteChunks(chunks...); err != nil {
lset, chks, dranges := set.At() // The chunks here are not fully deleted.
if len(dranges) > 0 {
// Re-encode the chunk to not have deleted values.
for _, chk := range chks {
if intervalOverlap(dranges[0].mint, dranges[len(dranges)-1].maxt, chk.MinTime, chk.MaxTime) {
newChunk := chunks.NewXORChunk()
app, err := newChunk.Appender()
if err != nil {
return nil, err
}
it := &deletedIterator{it: chk.Chunk.Iterator(), intervals: dranges}
for it.Next() {
ts, v := it.At()
app.Append(ts, v)
}
chk.Chunk = newChunk
}
}
}
if err := chunkw.WriteChunks(chks...); err != nil {
return nil, err
}
indexw.AddSeries(i, lset, chunks...)
indexw.AddSeries(i, lset, chks...)
meta.Stats.NumChunks += uint64(len(chunks))
meta.Stats.NumChunks += uint64(len(chks))
meta.Stats.NumSeries++
for _, l := range lset {
@ -370,25 +422,28 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri
type compactionSet interface {
Next() bool
At() (labels.Labels, []*ChunkMeta)
At() (labels.Labels, []*ChunkMeta, intervals)
Err() error
}
type compactionSeriesSet struct {
p Postings
index IndexReader
chunks ChunkReader
p Postings
index IndexReader
chunks ChunkReader
tombstones TombstoneReader
l labels.Labels
c []*ChunkMeta
err error
l labels.Labels
c []*ChunkMeta
intervals intervals
err error
}
func newCompactionSeriesSet(i IndexReader, c ChunkReader, p Postings) *compactionSeriesSet {
func newCompactionSeriesSet(i IndexReader, c ChunkReader, t TombstoneReader, p Postings) *compactionSeriesSet {
return &compactionSeriesSet{
index: i,
chunks: c,
p: p,
index: i,
chunks: c,
tombstones: t,
p: p,
}
}
@ -397,10 +452,25 @@ func (c *compactionSeriesSet) Next() bool {
return false
}
c.intervals = c.tombstones.Get(c.p.At())
c.l, c.c, c.err = c.index.Series(c.p.At())
if c.err != nil {
return false
}
// Remove completely deleted chunks.
if len(c.intervals) > 0 {
chks := make([]*ChunkMeta, 0, len(c.c))
for _, chk := range c.c {
if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) {
chks = append(chks, chk)
}
}
c.c = chks
}
for _, chk := range c.c {
chk.Chunk, c.err = c.chunks.Chunk(chk.Ref)
if c.err != nil {
@ -418,16 +488,17 @@ func (c *compactionSeriesSet) Err() error {
return c.p.Err()
}
func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta) {
return c.l, c.c
func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta, intervals) {
return c.l, c.c, c.intervals
}
type compactionMerger struct {
a, b compactionSet
aok, bok bool
l labels.Labels
c []*ChunkMeta
aok, bok bool
l labels.Labels
c []*ChunkMeta
intervals intervals
}
type compactionSeries struct {
@ -455,8 +526,8 @@ func (c *compactionMerger) compare() int {
if !c.bok {
return -1
}
a, _ := c.a.At()
b, _ := c.b.At()
a, _, _ := c.a.At()
b, _, _ := c.b.At()
return labels.Compare(a, b)
}
@ -468,17 +539,21 @@ func (c *compactionMerger) Next() bool {
d := c.compare()
// Both sets contain the current series. Chain them into a single one.
if d > 0 {
c.l, c.c = c.b.At()
c.l, c.c, c.intervals = c.b.At()
c.bok = c.b.Next()
} else if d < 0 {
c.l, c.c = c.a.At()
c.l, c.c, c.intervals = c.a.At()
c.aok = c.a.Next()
} else {
l, ca := c.a.At()
_, cb := c.b.At()
l, ca, ra := c.a.At()
_, cb, rb := c.b.At()
for _, r := range rb {
ra = ra.add(r)
}
c.l = l
c.c = append(ca, cb...)
c.intervals = ra
c.aok = c.a.Next()
c.bok = c.b.Next()
@ -493,8 +568,8 @@ func (c *compactionMerger) Err() error {
return c.b.Err()
}
func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta) {
return c.l, c.c
func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta, intervals) {
return c.l, c.c, c.intervals
}
func renameFile(from, to string) error {
@ -510,6 +585,8 @@ func renameFile(from, to string) error {
if err != nil {
return err
}
defer pdir.Close()
if err = fileutil.Fsync(pdir); err != nil {
return err
}

View file

@ -119,16 +119,49 @@ type DB struct {
compactc chan struct{}
donec chan struct{}
stopc chan struct{}
// cmtx is used to control compactions and deletions.
cmtx sync.Mutex
compacting bool
}
type dbMetrics struct {
activeAppenders prometheus.Gauge
loadedBlocks prometheus.GaugeFunc
reloads prometheus.Counter
reloadsFailed prometheus.Counter
reloadDuration prometheus.Summary
samplesAppended prometheus.Counter
compactionsTriggered prometheus.Counter
}
func newDBMetrics(r prometheus.Registerer) *dbMetrics {
func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
m := &dbMetrics{}
m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "tsdb_active_appenders",
Help: "Number of currently active appender transactions",
})
m.loadedBlocks = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "tsdb_blocks_loaded",
Help: "Number of currently loaded data blocks",
}, func() float64 {
db.mtx.RLock()
defer db.mtx.RUnlock()
return float64(len(db.blocks))
})
m.reloads = prometheus.NewCounter(prometheus.CounterOpts{
Name: "tsdb_reloads_total",
Help: "Number of times the database reloaded block data from disk.",
})
m.reloadsFailed = prometheus.NewCounter(prometheus.CounterOpts{
Name: "tsdb_reloads_failures_total",
Help: "Number of times the database failed to reload black data from disk.",
})
m.reloadDuration = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "tsdb_reload_duration_seconds",
Help: "Duration of block reloads.",
})
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
Name: "tsdb_samples_appended_total",
Help: "Total number of appended sampledb.",
@ -140,6 +173,11 @@ func newDBMetrics(r prometheus.Registerer) *dbMetrics {
if r != nil {
r.MustRegister(
m.activeAppenders,
m.loadedBlocks,
m.reloads,
m.reloadsFailed,
m.reloadDuration,
m.samplesAppended,
m.compactionsTriggered,
)
@ -163,14 +201,16 @@ func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options) (db
}
db = &DB{
dir: dir,
logger: l,
metrics: newDBMetrics(r),
opts: opts,
compactc: make(chan struct{}, 1),
donec: make(chan struct{}),
stopc: make(chan struct{}),
dir: dir,
logger: l,
opts: opts,
compactc: make(chan struct{}, 1),
donec: make(chan struct{}),
stopc: make(chan struct{}),
compacting: true,
}
db.metrics = newDBMetrics(db, r)
if !opts.NoLockfile {
absdir, err := filepath.Abs(dir)
if err != nil {
@ -198,6 +238,11 @@ func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options) (db
return db, nil
}
// Dir returns the directory of the database.
func (db *DB) Dir() string {
return db.dir
}
func (db *DB) run() {
defer close(db.donec)
@ -261,6 +306,9 @@ func (db *DB) retentionCutoff() (bool, error) {
}
func (db *DB) compact() (changes bool, err error) {
db.cmtx.Lock()
defer db.cmtx.Unlock()
db.headmtx.RLock()
// Check whether we have pending head blocks that are ready to be persisted.
@ -338,6 +386,8 @@ func retentionCutoff(dir string, mint int64) (bool, error) {
if err != nil {
return false, errors.Wrapf(err, "open directory")
}
defer df.Close()
dirs, err := blockDirs(dir)
if err != nil {
return false, errors.Wrapf(err, "list block dirs %s", dir)
@ -374,7 +424,15 @@ func (db *DB) getBlock(id ulid.ULID) (Block, bool) {
return nil, false
}
func (db *DB) reloadBlocks() error {
func (db *DB) reloadBlocks() (err error) {
defer func(t time.Time) {
if err != nil {
db.metrics.reloadsFailed.Inc()
}
db.metrics.reloads.Inc()
db.metrics.reloadDuration.Observe(time.Since(t).Seconds())
}(time.Now())
var cs []io.Closer
defer func() { closeAll(cs...) }()
@ -418,6 +476,7 @@ func (db *DB) reloadBlocks() error {
if err := validateBlockSequence(blocks); err != nil {
return errors.Wrap(err, "invalid block sequence")
}
// Close all opened blocks that no longer exist after we returned all locks.
for _, b := range db.blocks {
if _, ok := exist[b.Meta().ULID]; !ok {
@ -447,7 +506,7 @@ func validateBlockSequence(bs []Block) error {
prev := bs[0]
for _, b := range bs[1:] {
if b.Meta().MinTime < prev.Meta().MaxTime {
return errors.Errorf("block time ranges overlap", b.Meta().MinTime, prev.Meta().MaxTime)
return errors.Errorf("block time ranges overlap (%d, %d)", b.Meta().MinTime, prev.Meta().MaxTime)
}
}
return nil
@ -478,8 +537,47 @@ func (db *DB) Close() error {
return merr.Err()
}
// DisableCompactions disables compactions.
func (db *DB) DisableCompactions() {
if db.compacting {
db.cmtx.Lock()
db.compacting = false
db.logger.Log("msg", "compactions disabled")
}
}
// EnableCompactions enables compactions.
func (db *DB) EnableCompactions() {
if !db.compacting {
db.cmtx.Unlock()
db.compacting = true
db.logger.Log("msg", "compactions enabled")
}
}
// Snapshot writes the current data to the directory.
func (db *DB) Snapshot(dir string) error {
db.mtx.Lock() // To block any appenders.
defer db.mtx.Unlock()
db.cmtx.Lock()
defer db.cmtx.Unlock()
blocks := db.blocks[:]
for _, b := range blocks {
db.logger.Log("msg", "snapshotting block", "block", b)
if err := b.Snapshot(dir); err != nil {
return errors.Wrap(err, "error snapshotting headblock")
}
}
return nil
}
// Appender returns a new Appender on the database.
func (db *DB) Appender() Appender {
db.metrics.activeAppenders.Inc()
db.mtx.RLock()
return &dbAppender{db: db}
}
@ -619,6 +717,7 @@ func (db *DB) ensureHead(t int64) error {
}
func (a *dbAppender) Commit() error {
defer a.db.metrics.activeAppenders.Dec()
defer a.db.mtx.RUnlock()
// Commits to partial appenders must be concurrent as concurrent appenders
@ -649,6 +748,7 @@ func (a *dbAppender) Commit() error {
}
func (a *dbAppender) Rollback() error {
defer a.db.metrics.activeAppenders.Dec()
defer a.db.mtx.RUnlock()
var g errgroup.Group
@ -660,6 +760,30 @@ func (a *dbAppender) Rollback() error {
return g.Wait()
}
// Delete implements deletion of metrics.
func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error {
db.cmtx.Lock()
defer db.cmtx.Unlock()
db.mtx.Lock()
defer db.mtx.Unlock()
blocks := db.blocksForInterval(mint, maxt)
var g errgroup.Group
for _, b := range blocks {
g.Go(func(b Block) func() error {
return func() error { return b.Delete(mint, maxt, ms...) }
}(b))
}
if err := g.Wait(); err != nil {
return err
}
return nil
}
// appendable returns a copy of a slice of HeadBlocks that can still be appended to.
func (db *DB) appendable() (r []headBlock) {
switch len(db.heads) {
@ -673,13 +797,8 @@ func (db *DB) appendable() (r []headBlock) {
}
func intervalOverlap(amin, amax, bmin, bmax int64) bool {
if bmin >= amin && bmin <= amax {
return true
}
if amin >= bmin && amin <= bmax {
return true
}
return false
// Checks Overlap: http://stackoverflow.com/questions/3269434/
return amin <= bmax && bmin <= amax
}
func intervalContains(min, max, t int64) bool {

View file

@ -22,6 +22,7 @@ func (e *encbuf) putByte(c byte) { e.b = append(e.b, c) }
func (e *encbuf) putBE32int(x int) { e.putBE32(uint32(x)) }
func (e *encbuf) putBE64int(x int) { e.putBE64(uint64(x)) }
func (e *encbuf) putBE64int64(x int64) { e.putBE64(uint64(x)) }
func (e *encbuf) putUvarint32(x uint32) { e.putUvarint64(uint64(x)) }
func (e *encbuf) putUvarint(x int) { e.putUvarint64(uint64(x)) }
@ -71,8 +72,10 @@ type decbuf struct {
e error
}
func (d *decbuf) uvarint() int { return int(d.uvarint64()) }
func (d *decbuf) be32int() int { return int(d.be32()) }
func (d *decbuf) uvarint() int { return int(d.uvarint64()) }
func (d *decbuf) uvarint32() uint32 { return uint32(d.uvarint64()) }
func (d *decbuf) be32int() int { return int(d.be32()) }
func (d *decbuf) be64int64() int64 { return int64(d.be64()) }
func (d *decbuf) uvarintStr() string {
l := d.uvarint64()
@ -140,6 +143,19 @@ func (d *decbuf) be32() uint32 {
return x
}
func (d *decbuf) byte() byte {
if d.e != nil {
return 0
}
if len(d.b) < 1 {
d.e = errInvalidSize
return 0
}
x := d.b[0]
d.b = d.b[1:]
return x
}
func (d *decbuf) decbuf(l int) decbuf {
if d.e != nil {
return decbuf{e: d.e}

View file

@ -69,6 +69,8 @@ type HeadBlock struct {
values map[string]stringset // label names to possible values
postings *memPostings // postings lists for terms
tombstones tombstoneReader
meta BlockMeta
}
@ -97,6 +99,7 @@ func TouchHeadBlock(dir string, mint, maxt int64) (string, error) {
}); err != nil {
return "", err
}
return dir, renameFile(tmp, dir)
}
@ -108,13 +111,14 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) {
}
h := &HeadBlock{
dir: dir,
wal: wal,
series: []*memSeries{nil}, // 0 is not a valid posting, filled with nil.
hashes: map[uint64][]*memSeries{},
values: map[string]stringset{},
postings: &memPostings{m: make(map[term][]uint32)},
meta: *meta,
dir: dir,
wal: wal,
series: []*memSeries{nil}, // 0 is not a valid posting, filled with nil.
hashes: map[uint64][]*memSeries{},
values: map[string]stringset{},
postings: &memPostings{m: make(map[term][]uint32)},
meta: *meta,
tombstones: newEmptyTombstoneReader(),
}
return h, h.init()
}
@ -122,16 +126,19 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) {
func (h *HeadBlock) init() error {
r := h.wal.Reader()
for r.Next() {
series, samples := r.At()
seriesFunc := func(series []labels.Labels) error {
for _, lset := range series {
h.create(lset.Hash(), lset)
h.meta.Stats.NumSeries++
}
return nil
}
samplesFunc := func(samples []RefSample) error {
for _, s := range samples {
if int(s.Ref) >= len(h.series) {
return errors.Errorf("unknown series reference %d (max %d); abort WAL restore", s.Ref, len(h.series))
return errors.Errorf("unknown series reference %d (max %d); abort WAL restore",
s.Ref, len(h.series))
}
h.series[s.Ref].append(s.T, s.V)
@ -140,8 +147,24 @@ func (h *HeadBlock) init() error {
}
h.meta.Stats.NumSamples++
}
return nil
}
return errors.Wrap(r.Err(), "consume WAL")
deletesFunc := func(stones []Stone) error {
for _, s := range stones {
for _, itv := range s.intervals {
h.tombstones.add(s.ref, itv)
}
}
return nil
}
if err := r.Read(seriesFunc, samplesFunc, deletesFunc); err != nil {
return errors.Wrap(err, "consume WAL")
}
return nil
}
// inBounds returns true if the given timestamp is within the valid
@ -195,6 +218,114 @@ func (h *HeadBlock) Meta() BlockMeta {
return m
}
// Tombstones returns the TombstoneReader against the block.
func (h *HeadBlock) Tombstones() TombstoneReader {
return h.tombstones
}
// Delete implements headBlock.
func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error {
ir := h.Index()
pr := newPostingsReader(ir)
p, absent := pr.Select(ms...)
var stones []Stone
Outer:
for p.Next() {
ref := p.At()
lset := h.series[ref].lset
for _, abs := range absent {
if lset.Get(abs) != "" {
continue Outer
}
}
// Delete only until the current values and not beyond.
tmin, tmax := clampInterval(mint, maxt, h.series[ref].chunks[0].minTime, h.series[ref].head().maxTime)
stones = append(stones, Stone{ref, intervals{{tmin, tmax}}})
}
if p.Err() != nil {
return p.Err()
}
if err := h.wal.LogDeletes(stones); err != nil {
return err
}
for _, s := range stones {
h.tombstones.add(s.ref, s.intervals[0])
}
h.meta.Stats.NumTombstones = uint64(len(h.tombstones))
return nil
}
// Snapshot persists the current state of the headblock to the given directory.
// TODO(gouthamve): Snapshot must be called when there are no active appenders.
// This has been ensured by acquiring a Lock on DB.mtx, but this limitation should
// be removed in the future.
func (h *HeadBlock) Snapshot(snapshotDir string) error {
if h.meta.Stats.NumSeries == 0 {
return nil
}
entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
uid := ulid.MustNew(ulid.Now(), entropy)
dir := filepath.Join(snapshotDir, uid.String())
tmp := dir + ".tmp"
if err := os.RemoveAll(tmp); err != nil {
return err
}
if err := os.MkdirAll(tmp, 0777); err != nil {
return err
}
// Populate chunk and index files into temporary directory with
// data of all blocks.
chunkw, err := newChunkWriter(chunkDir(tmp))
if err != nil {
return errors.Wrap(err, "open chunk writer")
}
indexw, err := newIndexWriter(tmp)
if err != nil {
return errors.Wrap(err, "open index writer")
}
meta, err := populateBlock([]Block{h}, indexw, chunkw)
if err != nil {
return errors.Wrap(err, "write snapshot")
}
meta.ULID = uid
if err = writeMetaFile(tmp, meta); err != nil {
return errors.Wrap(err, "write merged meta")
}
if err = chunkw.Close(); err != nil {
return errors.Wrap(err, "close chunk writer")
}
if err = indexw.Close(); err != nil {
return errors.Wrap(err, "close index writer")
}
// Create an empty tombstones file.
if err := writeTombstoneFile(tmp, newEmptyTombstoneReader()); err != nil {
return errors.Wrap(err, "write new tombstones file")
}
// Block successfully written, make visible
if err := renameFile(tmp, dir); err != nil {
return errors.Wrap(err, "rename block dir")
}
return nil
}
// Dir returns the directory of the block.
func (h *HeadBlock) Dir() string { return h.dir }
@ -217,10 +348,12 @@ func (h *HeadBlock) Querier(mint, maxt int64) Querier {
series := h.series[:]
return &blockQuerier{
mint: mint,
maxt: maxt,
index: h.Index(),
chunks: h.Chunks(),
mint: mint,
maxt: maxt,
index: h.Index(),
chunks: h.Chunks(),
tombstones: h.Tombstones(),
postingsMapper: func(p Postings) Postings {
ep := make([]uint32, 0, 64)
@ -388,15 +521,17 @@ func (a *headAppender) AddFast(ref string, t int64, v float64) error {
return nil
}
func (a *headAppender) createSeries() {
func (a *headAppender) createSeries() error {
if len(a.newSeries) == 0 {
return
return nil
}
a.newLabels = make([]labels.Labels, 0, len(a.newSeries))
base0 := len(a.series)
a.mtx.RUnlock()
defer a.mtx.RLock()
a.mtx.Lock()
defer a.mtx.Unlock()
base1 := len(a.series)
@ -416,15 +551,22 @@ func (a *headAppender) createSeries() {
a.create(l.hash, l.labels)
}
a.mtx.Unlock()
a.mtx.RLock()
// Write all new series to the WAL.
if err := a.wal.LogSeries(a.newLabels); err != nil {
return errors.Wrap(err, "WAL log series")
}
return nil
}
func (a *headAppender) Commit() error {
defer atomic.AddUint64(&a.activeWriters, ^uint64(0))
defer putHeadAppendBuffer(a.samples)
defer a.mtx.RUnlock()
a.createSeries()
if err := a.createSeries(); err != nil {
return err
}
// We have to update the refs of samples for series we just created.
for i := range a.samples {
@ -434,11 +576,10 @@ func (a *headAppender) Commit() error {
}
}
// Write all new series and samples to the WAL and add it to the
// Write all new samples to the WAL and add them to the
// in-mem database on success.
if err := a.wal.Log(a.newLabels, a.samples); err != nil {
a.mtx.RUnlock()
return err
if err := a.wal.LogSamples(a.samples); err != nil {
return errors.Wrap(err, "WAL log samples")
}
total := uint64(len(a.samples))
@ -449,8 +590,6 @@ func (a *headAppender) Commit() error {
}
}
a.mtx.RUnlock()
atomic.AddUint64(&a.meta.Stats.NumSamples, total)
atomic.AddUint64(&a.meta.Stats.NumSeries, uint64(len(a.newSeries)))
@ -538,6 +677,7 @@ func (h *headIndexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error
if int(ref) >= len(h.series) {
return nil, nil, ErrNotFound
}
s := h.series[ref]
if s == nil {
return nil, nil, ErrNotFound
@ -584,12 +724,7 @@ func (h *HeadBlock) get(hash uint64, lset labels.Labels) *memSeries {
}
func (h *HeadBlock) create(hash uint64, lset labels.Labels) *memSeries {
s := &memSeries{
lset: lset,
ref: uint32(len(h.series)),
}
// create the initial chunk and appender
s.cut()
s := newMemSeries(lset, uint32(len(h.series)), h.meta.MaxTime)
// Allocate empty space until we can insert at the given index.
h.series = append(h.series, s)
@ -624,15 +759,18 @@ type memSeries struct {
lset labels.Labels
chunks []*memChunk
nextAt int64 // timestamp at which to cut the next chunk.
maxt int64 // maximum timestamp for the series.
lastValue float64
sampleBuf [4]sample
app chunks.Appender // Current appender for the chunk.
}
func (s *memSeries) cut() *memChunk {
func (s *memSeries) cut(mint int64) *memChunk {
c := &memChunk{
chunk: chunks.NewXORChunk(),
minTime: mint,
maxTime: math.MinInt64,
}
s.chunks = append(s.chunks, c)
@ -641,32 +779,47 @@ func (s *memSeries) cut() *memChunk {
if err != nil {
panic(err)
}
s.app = app
return c
}
func newMemSeries(lset labels.Labels, id uint32, maxt int64) *memSeries {
s := &memSeries{
lset: lset,
ref: id,
maxt: maxt,
nextAt: math.MinInt64,
}
return s
}
func (s *memSeries) append(t int64, v float64) bool {
const samplesPerChunk = 120
s.mtx.Lock()
defer s.mtx.Unlock()
var c *memChunk
if s.head().samples > 130 {
c = s.cut()
c.minTime = t
} else {
c = s.head()
// Skip duplicate and out of order samples.
if c.maxTime >= t {
return false
}
if len(s.chunks) == 0 {
c = s.cut(t)
}
c = s.head()
if c.maxTime >= t {
return false
}
if c.samples > samplesPerChunk/4 && t >= s.nextAt {
c = s.cut(t)
}
s.app.Append(t, v)
c.maxTime = t
c.samples++
if c.samples == samplesPerChunk/4 {
s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.maxt)
}
s.lastValue = v
s.sampleBuf[0] = s.sampleBuf[1]
@ -677,6 +830,17 @@ func (s *memSeries) append(t int64, v float64) bool {
return true
}
// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
// its current timestamp and the upper bound up to which we insert data.
// It assumes that the time range is 1/4 full.
func computeChunkEndTime(start, cur, max int64) int64 {
a := (max - start) / ((cur - start + 1) * 4)
if a == 0 {
return max
}
return start + (max-start)/a
}
func (s *memSeries) iterator(i int) chunks.Iterator {
c := s.chunks[i]

View file

@ -39,6 +39,8 @@ const (
indexFormatV1 = 1
)
const indexFilename = "index"
const compactionPageBytes = minSectorSize * 64
type indexWriterSeries struct {
@ -138,7 +140,7 @@ func newIndexWriter(dir string) (*indexWriter, error) {
if err != nil {
return nil, err
}
f, err := os.OpenFile(filepath.Join(dir, "index"), os.O_CREATE|os.O_WRONLY, 0666)
f, err := os.OpenFile(filepath.Join(dir, indexFilename), os.O_CREATE|os.O_WRONLY, 0666)
if err != nil {
return nil, err
}
@ -569,11 +571,7 @@ func newIndexReader(dir string) (*indexReader, error) {
return nil, errors.Wrap(err, "read label index table")
}
r.postings, err = r.readOffsetTable(r.toc.postingsTable)
if err != nil {
return nil, errors.Wrap(err, "read postings table")
}
return r, nil
return r, errors.Wrap(err, "read postings table")
}
func (r *indexReader) readTOC() error {

View file

@ -126,8 +126,9 @@ func (q *querier) Close() error {
// blockQuerier provides querying access to a single block database.
type blockQuerier struct {
index IndexReader
chunks ChunkReader
index IndexReader
chunks ChunkReader
tombstones TombstoneReader
postingsMapper func(Postings) Postings
@ -149,6 +150,8 @@ func (q *blockQuerier) Select(ms ...labels.Matcher) SeriesSet {
p: p,
index: q.index,
absent: absent,
tombstones: q.tombstones,
},
chunks: q.chunks,
mint: q.mint,
@ -366,29 +369,35 @@ func (s *mergedSeriesSet) Next() bool {
type chunkSeriesSet interface {
Next() bool
At() (labels.Labels, []*ChunkMeta)
At() (labels.Labels, []*ChunkMeta, intervals)
Err() error
}
// baseChunkSeries loads the label set and chunk references for a postings
// list from an index. It filters out series that have labels set that should be unset.
type baseChunkSeries struct {
p Postings
index IndexReader
absent []string // labels that must be unset in results.
p Postings
index IndexReader
tombstones TombstoneReader
absent []string // labels that must be unset in results.
lset labels.Labels
chks []*ChunkMeta
err error
lset labels.Labels
chks []*ChunkMeta
intervals intervals
err error
}
func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta) { return s.lset, s.chks }
func (s *baseChunkSeries) Err() error { return s.err }
func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta, intervals) {
return s.lset, s.chks, s.intervals
}
func (s *baseChunkSeries) Err() error { return s.err }
func (s *baseChunkSeries) Next() bool {
Outer:
for s.p.Next() {
lset, chunks, err := s.index.Series(s.p.At())
ref := s.p.At()
lset, chunks, err := s.index.Series(ref)
if err != nil {
s.err = err
return false
@ -403,6 +412,19 @@ Outer:
s.lset = lset
s.chks = chunks
s.intervals = s.tombstones.Get(s.p.At())
if len(s.intervals) > 0 {
// Only those chunks that are not entirely deleted.
chks := make([]*ChunkMeta, 0, len(s.chks))
for _, chk := range s.chks {
if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) {
chks = append(chks, chk)
}
}
s.chks = chks
}
return true
}
@ -420,17 +442,20 @@ type populatedChunkSeries struct {
chunks ChunkReader
mint, maxt int64
err error
chks []*ChunkMeta
lset labels.Labels
err error
chks []*ChunkMeta
lset labels.Labels
intervals intervals
}
func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta) { return s.lset, s.chks }
func (s *populatedChunkSeries) Err() error { return s.err }
func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta, intervals) {
return s.lset, s.chks, s.intervals
}
func (s *populatedChunkSeries) Err() error { return s.err }
func (s *populatedChunkSeries) Next() bool {
for s.set.Next() {
lset, chks := s.set.At()
lset, chks, dranges := s.set.At()
for len(chks) > 0 {
if chks[0].MaxTime >= s.mint {
@ -457,6 +482,7 @@ func (s *populatedChunkSeries) Next() bool {
s.lset = lset
s.chks = chks
s.intervals = dranges
return true
}
@ -477,8 +503,15 @@ type blockSeriesSet struct {
func (s *blockSeriesSet) Next() bool {
for s.set.Next() {
lset, chunks := s.set.At()
s.cur = &chunkSeries{labels: lset, chunks: chunks, mint: s.mint, maxt: s.maxt}
lset, chunks, dranges := s.set.At()
s.cur = &chunkSeries{
labels: lset,
chunks: chunks,
mint: s.mint,
maxt: s.maxt,
intervals: dranges,
}
return true
}
if s.set.Err() != nil {
@ -497,6 +530,8 @@ type chunkSeries struct {
chunks []*ChunkMeta // in-order chunk refs
mint, maxt int64
intervals intervals
}
func (s *chunkSeries) Labels() labels.Labels {
@ -504,7 +539,7 @@ func (s *chunkSeries) Labels() labels.Labels {
}
func (s *chunkSeries) Iterator() SeriesIterator {
return newChunkSeriesIterator(s.chunks, s.mint, s.maxt)
return newChunkSeriesIterator(s.chunks, s.intervals, s.mint, s.maxt)
}
// SeriesIterator iterates over the data of a time series.
@ -601,16 +636,24 @@ type chunkSeriesIterator struct {
cur chunks.Iterator
maxt, mint int64
intervals intervals
}
func newChunkSeriesIterator(cs []*ChunkMeta, mint, maxt int64) *chunkSeriesIterator {
func newChunkSeriesIterator(cs []*ChunkMeta, dranges intervals, mint, maxt int64) *chunkSeriesIterator {
it := cs[0].Chunk.Iterator()
if len(dranges) > 0 {
it = &deletedIterator{it: it, intervals: dranges}
}
return &chunkSeriesIterator{
chunks: cs,
i: 0,
cur: cs[0].Chunk.Iterator(),
cur: it,
mint: mint,
maxt: maxt,
intervals: dranges,
}
}
@ -645,6 +688,9 @@ func (it *chunkSeriesIterator) Seek(t int64) (ok bool) {
it.i = x
it.cur = it.chunks[x].Chunk.Iterator()
if len(it.intervals) > 0 {
it.cur = &deletedIterator{it: it.cur, intervals: it.intervals}
}
for it.cur.Next() {
t0, _ := it.cur.At()
@ -676,6 +722,9 @@ func (it *chunkSeriesIterator) Next() bool {
it.i++
it.cur = it.chunks[it.i].Chunk.Iterator()
if len(it.intervals) > 0 {
it.cur = &deletedIterator{it: it.cur, intervals: it.intervals}
}
return it.Next()
}

223
vendor/github.com/prometheus/tsdb/tombstones.go generated vendored Normal file
View file

@ -0,0 +1,223 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tsdb
import (
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"io/ioutil"
"os"
"path/filepath"
"github.com/pkg/errors"
)
const tombstoneFilename = "tombstones"
const (
// MagicTombstone is 4 bytes at the head of a tombstone file.
MagicTombstone = 0x130BA30
tombstoneFormatV1 = 1
)
func writeTombstoneFile(dir string, tr tombstoneReader) error {
path := filepath.Join(dir, tombstoneFilename)
tmp := path + ".tmp"
hash := crc32.New(crc32.MakeTable(crc32.Castagnoli))
f, err := os.Create(tmp)
if err != nil {
return err
}
defer f.Close()
buf := encbuf{b: make([]byte, 3*binary.MaxVarintLen64)}
buf.reset()
// Write the meta.
buf.putBE32(MagicTombstone)
buf.putByte(tombstoneFormatV1)
_, err = f.Write(buf.get())
if err != nil {
return err
}
mw := io.MultiWriter(f, hash)
for k, v := range tr {
for _, itv := range v {
buf.reset()
buf.putUvarint32(k)
buf.putVarint64(itv.mint)
buf.putVarint64(itv.maxt)
_, err = mw.Write(buf.get())
if err != nil {
return err
}
}
}
_, err = f.Write(hash.Sum(nil))
if err != nil {
return err
}
return renameFile(tmp, path)
}
// Stone holds the information on the posting and time-range
// that is deleted.
type Stone struct {
ref uint32
intervals intervals
}
// TombstoneReader is the iterator over tombstones.
type TombstoneReader interface {
Get(ref uint32) intervals
}
func readTombstones(dir string) (tombstoneReader, error) {
b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename))
if err != nil {
return nil, err
}
if len(b) < 5 {
return nil, errors.Wrap(errInvalidSize, "tombstones header")
}
d := &decbuf{b: b[:len(b)-4]} // 4 for the checksum.
if mg := d.be32(); mg != MagicTombstone {
return nil, fmt.Errorf("invalid magic number %x", mg)
}
if flag := d.byte(); flag != tombstoneFormatV1 {
return nil, fmt.Errorf("invalid tombstone format %x", flag)
}
if d.err() != nil {
return nil, d.err()
}
// Verify checksum
hash := crc32.New(crc32.MakeTable(crc32.Castagnoli))
if _, err := hash.Write(d.get()); err != nil {
return nil, errors.Wrap(err, "write to hash")
}
if binary.BigEndian.Uint32(b[len(b)-4:]) != hash.Sum32() {
return nil, errors.New("checksum did not match")
}
stonesMap := newEmptyTombstoneReader()
for d.len() > 0 {
k := d.uvarint32()
mint := d.varint64()
maxt := d.varint64()
if d.err() != nil {
return nil, d.err()
}
stonesMap.add(k, interval{mint, maxt})
}
return newTombstoneReader(stonesMap), nil
}
type tombstoneReader map[uint32]intervals
func newTombstoneReader(ts map[uint32]intervals) tombstoneReader {
return tombstoneReader(ts)
}
func newEmptyTombstoneReader() tombstoneReader {
return tombstoneReader(make(map[uint32]intervals))
}
func (t tombstoneReader) Get(ref uint32) intervals {
return t[ref]
}
func (t tombstoneReader) add(ref uint32, itv interval) {
t[ref] = t[ref].add(itv)
}
type interval struct {
mint, maxt int64
}
func (tr interval) inBounds(t int64) bool {
return t >= tr.mint && t <= tr.maxt
}
func (tr interval) isSubrange(dranges intervals) bool {
for _, r := range dranges {
if r.inBounds(tr.mint) && r.inBounds(tr.maxt) {
return true
}
}
return false
}
type intervals []interval
// This adds the new time-range to the existing ones.
// The existing ones must be sorted.
func (itvs intervals) add(n interval) intervals {
for i, r := range itvs {
// TODO(gouthamve): Make this codepath easier to digest.
if r.inBounds(n.mint-1) || r.inBounds(n.mint) {
if n.maxt > r.maxt {
itvs[i].maxt = n.maxt
}
j := 0
for _, r2 := range itvs[i+1:] {
if n.maxt < r2.mint {
break
}
j++
}
if j != 0 {
if itvs[i+j].maxt > n.maxt {
itvs[i].maxt = itvs[i+j].maxt
}
itvs = append(itvs[:i+1], itvs[i+j+1:]...)
}
return itvs
}
if r.inBounds(n.maxt+1) || r.inBounds(n.maxt) {
if n.mint < r.maxt {
itvs[i].mint = n.mint
}
return itvs
}
if n.mint < r.mint {
newRange := make(intervals, i, len(itvs[:i])+1)
copy(newRange, itvs[:i])
newRange = append(newRange, n)
newRange = append(newRange, itvs[i:]...)
return newRange
}
}
itvs = append(itvs, n)
return itvs
}

View file

@ -46,8 +46,18 @@ const (
WALEntrySymbols WALEntryType = 1
WALEntrySeries WALEntryType = 2
WALEntrySamples WALEntryType = 3
WALEntryDeletes WALEntryType = 4
)
// SamplesCB is the callback after reading samples.
type SamplesCB func([]RefSample) error
// SeriesCB is the callback after reading series.
type SeriesCB func([]labels.Labels) error
// DeletesCB is the callback after reading deletes.
type DeletesCB func([]Stone) error
// SegmentWAL is a write ahead log for series data.
type SegmentWAL struct {
mtx sync.Mutex
@ -71,15 +81,15 @@ type SegmentWAL struct {
// It must be completely read before new entries are logged.
type WAL interface {
Reader() WALReader
Log([]labels.Labels, []RefSample) error
LogSeries([]labels.Labels) error
LogSamples([]RefSample) error
LogDeletes([]Stone) error
Close() error
}
// WALReader reads entries from a WAL.
type WALReader interface {
At() ([]labels.Labels, []RefSample)
Next() bool
Err() error
Read(SeriesCB, SamplesCB, DeletesCB) error
}
// RefSample is a timestamp/value pair associated with a reference to a series.
@ -141,13 +151,40 @@ func (w *SegmentWAL) Reader() WALReader {
}
// Log writes a batch of new series labels and samples to the log.
func (w *SegmentWAL) Log(series []labels.Labels, samples []RefSample) error {
//func (w *SegmentWAL) Log(series []labels.Labels, samples []RefSample) error {
//return nil
//}
// LogSeries writes a batch of new series labels to the log.
func (w *SegmentWAL) LogSeries(series []labels.Labels) error {
if err := w.encodeSeries(series); err != nil {
return err
}
if w.flushInterval <= 0 {
return w.Sync()
}
return nil
}
// LogSamples writes a batch of new samples to the log.
func (w *SegmentWAL) LogSamples(samples []RefSample) error {
if err := w.encodeSamples(samples); err != nil {
return err
}
if w.flushInterval <= 0 {
return w.Sync()
}
return nil
}
// LogDeletes write a batch of new deletes to the log.
func (w *SegmentWAL) LogDeletes(stones []Stone) error {
if err := w.encodeDeletes(stones); err != nil {
return err
}
if w.flushInterval <= 0 {
return w.Sync()
}
@ -369,6 +406,7 @@ func (w *SegmentWAL) entry(et WALEntryType, flag byte, buf []byte) error {
const (
walSeriesSimple = 1
walSamplesSimple = 1
walDeletesSimple = 1
)
var walBuffers = sync.Pool{}
@ -445,6 +483,23 @@ func (w *SegmentWAL) encodeSamples(samples []RefSample) error {
return w.entry(WALEntrySamples, walSamplesSimple, buf)
}
func (w *SegmentWAL) encodeDeletes(stones []Stone) error {
b := make([]byte, 2*binary.MaxVarintLen64)
eb := &encbuf{b: b}
buf := getWALBuffer()
for _, s := range stones {
for _, itv := range s.intervals {
eb.reset()
eb.putUvarint32(s.ref)
eb.putVarint64(itv.mint)
eb.putVarint64(itv.maxt)
buf = append(buf, eb.get()...)
}
}
return w.entry(WALEntryDeletes, walDeletesSimple, buf)
}
// walReader decodes and emits write ahead log entries.
type walReader struct {
logger log.Logger
@ -454,9 +509,11 @@ type walReader struct {
buf []byte
crc32 hash.Hash32
err error
labels []labels.Labels
samples []RefSample
curType WALEntryType
curFlag byte
curBuf []byte
err error
}
func newWALReader(w *SegmentWAL, l log.Logger) *walReader {
@ -471,18 +528,41 @@ func newWALReader(w *SegmentWAL, l log.Logger) *walReader {
}
}
// At returns the last decoded entry of labels or samples.
// The returned slices are only valid until the next call to Next(). Their elements
// have to be copied to preserve them.
func (r *walReader) At() ([]labels.Labels, []RefSample) {
return r.labels, r.samples
}
// Err returns the last error the reader encountered.
func (r *walReader) Err() error {
return r.err
}
func (r *walReader) Read(seriesf SeriesCB, samplesf SamplesCB, deletesf DeletesCB) error {
for r.next() {
et, flag, b := r.at()
// In decoding below we never return a walCorruptionErr for now.
// Those should generally be catched by entry decoding before.
switch et {
case WALEntrySeries:
s, err := r.decodeSeries(flag, b)
if err != nil {
return err
}
seriesf(s)
case WALEntrySamples:
s, err := r.decodeSamples(flag, b)
if err != nil {
return err
}
samplesf(s)
case WALEntryDeletes:
s, err := r.decodeDeletes(flag, b)
if err != nil {
return err
}
deletesf(s)
}
}
return r.Err()
}
// nextEntry retrieves the next entry. It is also used as a testing hook.
func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) {
if r.cur >= len(r.wal.files) {
@ -505,12 +585,13 @@ func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) {
return et, flag, b, err
}
// Next returns decodes the next entry pair and returns true
// if it was succesful.
func (r *walReader) Next() bool {
r.labels = r.labels[:0]
r.samples = r.samples[:0]
func (r *walReader) at() (WALEntryType, byte, []byte) {
return r.curType, r.curFlag, r.curBuf
}
// next returns decodes the next entry pair and returns true
// if it was succesful.
func (r *walReader) next() bool {
if r.cur >= len(r.wal.files) {
return false
}
@ -537,7 +618,7 @@ func (r *walReader) Next() bool {
return false
}
r.cur++
return r.Next()
return r.next()
}
if err != nil {
r.err = err
@ -548,19 +629,9 @@ func (r *walReader) Next() bool {
return false
}
// In decoding below we never return a walCorruptionErr for now.
// Those should generally be catched by entry decoding before.
switch et {
case WALEntrySamples:
if err := r.decodeSamples(flag, b); err != nil {
r.err = err
}
case WALEntrySeries:
if err := r.decodeSeries(flag, b); err != nil {
r.err = err
}
}
r.curType = et
r.curFlag = flag
r.curBuf = b
return r.err == nil
}
@ -617,7 +688,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
if etype == 0 {
return 0, 0, nil, io.EOF
}
if etype != WALEntrySeries && etype != WALEntrySamples {
if etype != WALEntrySeries && etype != WALEntrySamples && etype != WALEntryDeletes {
return 0, 0, nil, walCorruptionErrf("invalid entry type %d", etype)
}
@ -644,11 +715,12 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
return etype, flag, buf, nil
}
func (r *walReader) decodeSeries(flag byte, b []byte) error {
func (r *walReader) decodeSeries(flag byte, b []byte) ([]labels.Labels, error) {
series := []labels.Labels{}
for len(b) > 0 {
l, n := binary.Uvarint(b)
if n < 1 {
return errors.Wrap(errInvalidSize, "number of labels")
return nil, errors.Wrap(errInvalidSize, "number of labels")
}
b = b[n:]
lset := make(labels.Labels, l)
@ -656,27 +728,29 @@ func (r *walReader) decodeSeries(flag byte, b []byte) error {
for i := 0; i < int(l); i++ {
nl, n := binary.Uvarint(b)
if n < 1 || len(b) < n+int(nl) {
return errors.Wrap(errInvalidSize, "label name")
return nil, errors.Wrap(errInvalidSize, "label name")
}
lset[i].Name = string(b[n : n+int(nl)])
b = b[n+int(nl):]
vl, n := binary.Uvarint(b)
if n < 1 || len(b) < n+int(vl) {
return errors.Wrap(errInvalidSize, "label value")
return nil, errors.Wrap(errInvalidSize, "label value")
}
lset[i].Value = string(b[n : n+int(vl)])
b = b[n+int(vl):]
}
r.labels = append(r.labels, lset)
series = append(series, lset)
}
return nil
return series, nil
}
func (r *walReader) decodeSamples(flag byte, b []byte) error {
func (r *walReader) decodeSamples(flag byte, b []byte) ([]RefSample, error) {
samples := []RefSample{}
if len(b) < 16 {
return errors.Wrap(errInvalidSize, "header length")
return nil, errors.Wrap(errInvalidSize, "header length")
}
var (
baseRef = binary.BigEndian.Uint64(b)
@ -689,7 +763,7 @@ func (r *walReader) decodeSamples(flag byte, b []byte) error {
dref, n := binary.Varint(b)
if n < 1 {
return errors.Wrap(errInvalidSize, "sample ref delta")
return nil, errors.Wrap(errInvalidSize, "sample ref delta")
}
b = b[n:]
@ -697,18 +771,36 @@ func (r *walReader) decodeSamples(flag byte, b []byte) error {
dtime, n := binary.Varint(b)
if n < 1 {
return errors.Wrap(errInvalidSize, "sample timestamp delta")
return nil, errors.Wrap(errInvalidSize, "sample timestamp delta")
}
b = b[n:]
smpl.T = baseTime + dtime
if len(b) < 8 {
return errors.Wrapf(errInvalidSize, "sample value bits %d", len(b))
return nil, errors.Wrapf(errInvalidSize, "sample value bits %d", len(b))
}
smpl.V = float64(math.Float64frombits(binary.BigEndian.Uint64(b)))
b = b[8:]
r.samples = append(r.samples, smpl)
samples = append(samples, smpl)
}
return nil
return samples, nil
}
func (r *walReader) decodeDeletes(flag byte, b []byte) ([]Stone, error) {
db := &decbuf{b: b}
stones := []Stone{}
for db.len() > 0 {
var s Stone
s.ref = db.uvarint32()
s.intervals = intervals{{db.varint64(), db.varint64()}}
if db.err() != nil {
return nil, db.err()
}
stones = append(stones, s)
}
return stones, nil
}

6
vendor/vendor.json vendored
View file

@ -751,10 +751,10 @@
"revisionTime": "2016-04-11T19:08:41Z"
},
{
"checksumSHA1": "q2GxuO+ppV/gqBir/Z6ijx7aOOU=",
"checksumSHA1": "XXXDHMZe3Y3gosaF/1staHm3INc=",
"path": "github.com/prometheus/tsdb",
"revision": "4f2eb2057ee0a7f2b984503886bff970a9dab1a8",
"revisionTime": "2017-05-22T06:49:09Z"
"revision": "9963a4c7c3b2a742e00a63c54084b051e3174b06",
"revisionTime": "2017-06-12T09:17:49Z"
},
{
"checksumSHA1": "9EH3v+JdbikCUJAgD4VEOPIaWfs=",