Fix bug of unsorted postings lists being created

The former approach created unordered postings list by either
map iteration of new series being unsorted (fixable) or concurrent
writers creating new series interleaved.

We switch back to generating ephemeral references for a single batch.
Newly created series have to be re-set upon the next insert.
This commit is contained in:
Fabian Reinartz 2017-01-13 16:14:40 +01:00
parent c7f5590a71
commit 1c80c33e72
4 changed files with 75 additions and 73 deletions

10
db.go
View file

@ -352,7 +352,7 @@ func (a *dbAppender) SetSeries(lset labels.Labels) (uint64, error) {
if err != nil { if err != nil {
return 0, err return 0, err
} }
return ref | (uint64(a.gen) << 32), nil return ref | (uint64(a.gen) << 40), nil
} }
func (a *dbAppender) setSeries(hash uint64, lset labels.Labels) (uint64, error) { func (a *dbAppender) setSeries(hash uint64, lset labels.Labels) (uint64, error) {
@ -360,13 +360,13 @@ func (a *dbAppender) setSeries(hash uint64, lset labels.Labels) (uint64, error)
if err != nil { if err != nil {
return 0, err return 0, err
} }
return ref | (uint64(a.gen) << 32), nil return ref | (uint64(a.gen) << 40), nil
} }
func (a *dbAppender) Add(ref uint64, t int64, v float64) error { func (a *dbAppender) Add(ref uint64, t int64, v float64) error {
// We store the head generation in the 4th byte and use it to reject // We store the head generation in the 4th byte and use it to reject
// stale references. // stale references.
gen := uint8((ref << 24) >> 56) gen := uint8((ref << 16) >> 56)
if gen != a.gen { if gen != a.gen {
return errNotFound return errNotFound
@ -647,11 +647,11 @@ func (a *partitionedAppender) SetSeries(lset labels.Labels) (uint64, error) {
if err != nil { if err != nil {
return 0, err return 0, err
} }
return ref | (p << 40), nil return ref | (p << 48), nil
} }
func (a *partitionedAppender) Add(ref uint64, t int64, v float64) error { func (a *partitionedAppender) Add(ref uint64, t int64, v float64) error {
p := uint8((ref << 16) >> 56) p := uint8((ref << 8) >> 56)
return a.partitions[p].Add(ref, t, v) return a.partitions[p].Add(ref, t, v)
} }

105
head.go
View file

@ -3,9 +3,9 @@ package tsdb
import ( import (
"errors" "errors"
"math" "math"
"math/rand"
"sort" "sort"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/bradfitz/slice" "github.com/bradfitz/slice"
@ -29,8 +29,6 @@ type headBlock struct {
// to their chunk descs. // to their chunk descs.
hashes map[uint64][]*memSeries hashes map[uint64][]*memSeries
nextSeriesID uint64
values map[string]stringset // label names to possible values values map[string]stringset // label names to possible values
postings *memPostings // postings lists for terms postings *memPostings // postings lists for terms
@ -62,11 +60,10 @@ func openHeadBlock(dir string, l log.Logger) (*headBlock, error) {
err = wal.ReadAll(&walHandler{ err = wal.ReadAll(&walHandler{
series: func(lset labels.Labels) { series: func(lset labels.Labels) {
b.create(uint32(b.nextSeriesID), lset.Hash(), lset) b.create(lset.Hash(), lset)
b.nextSeriesID++
b.stats.SeriesCount++ b.stats.SeriesCount++
}, },
sample: func(s hashedSample) { sample: func(s refdSample) {
si := s.ref si := s.ref
cd := b.series[si] cd := b.series[si]
@ -112,27 +109,27 @@ func (h *headBlock) Appender() Appender {
var headPool = sync.Pool{} var headPool = sync.Pool{}
func getHeadAppendBuffer() []hashedSample { func getHeadAppendBuffer() []refdSample {
b := headPool.Get() b := headPool.Get()
if b == nil { if b == nil {
return make([]hashedSample, 0, 512) return make([]refdSample, 0, 512)
} }
return b.([]hashedSample) return b.([]refdSample)
} }
func putHeadAppendBuffer(b []hashedSample) { func putHeadAppendBuffer(b []refdSample) {
headPool.Put(b[:0]) headPool.Put(b[:0])
} }
type headAppender struct { type headAppender struct {
*headBlock *headBlock
newSeries map[uint32]hashedLabels newSeries map[uint64]hashedLabels
newHashes map[uint64]uint32 newHashes map[uint64]uint64
refmap map[uint64]uint64
newLabels []labels.Labels newLabels []labels.Labels
newRefs []uint32
samples []hashedSample samples []refdSample
} }
type hashedLabels struct { type hashedLabels struct {
@ -140,6 +137,12 @@ type hashedLabels struct {
labels labels.Labels labels labels.Labels
} }
type refdSample struct {
ref uint64
t int64
v float64
}
func (a *headAppender) SetSeries(lset labels.Labels) (uint64, error) { func (a *headAppender) SetSeries(lset labels.Labels) (uint64, error) {
return a.setSeries(lset.Hash(), lset) return a.setSeries(lset.Hash(), lset)
} }
@ -152,35 +155,41 @@ func (a *headAppender) setSeries(hash uint64, lset labels.Labels) (uint64, error
return uint64(ref), nil return uint64(ref), nil
} }
id := atomic.AddUint64(&a.nextSeriesID, 1) - 1 // We only know the actual reference after committing. We generate an
if a.newSeries == nil { // intermediate reference only valid for this batch.
a.newSeries = map[uint32]hashedLabels{} // It is indicated by the the LSB of the 4th byte being set to 1.
a.newHashes = map[uint64]uint32{} // We use a random ID to avoid collisions when new series are created
} // in two subsequent batches. (TODO(fabxc): safe enough?)
a.newSeries[uint32(id)] = hashedLabels{hash: hash, labels: lset} ref := uint64(rand.Int31()) | (1 << 32)
a.newHashes[hash] = uint32(id)
a.newRefs = append(a.newRefs, uint32(id))
return id, nil if a.newSeries == nil {
a.newSeries = map[uint64]hashedLabels{}
a.newHashes = map[uint64]uint64{}
a.refmap = map[uint64]uint64{}
}
a.newSeries[ref] = hashedLabels{hash: hash, labels: lset}
a.newHashes[hash] = ref
return ref, nil
} }
func (a *headAppender) Add(ref uint64, t int64, v float64) error { func (a *headAppender) Add(ref uint64, t int64, v float64) error {
// We only act on the last 4 bytes. Anything before is used by higher-order // We only own the first 5 bytes of the reference. Anything before is
// appenders. We erase it to avoid issues. // used by higher-order appenders. We erase it to avoid issues.
ref = (ref << 32) >> 32 ref = (ref << 31) >> 31
// Distinguish between existing series and series created in // Distinguish between existing series and series created in
// this transaction. // this transaction.
if int(ref) >= len(a.series) { if ref&(1<<32) > 0 {
if _, ok := a.newSeries[uint32(ref)]; !ok { if _, ok := a.newSeries[ref]; !ok {
return errNotFound return errNotFound
} }
// TODO(fabxc): we also have to validate here that the // TODO(fabxc): we also have to validate here that the
// sample sequence is valid. // sample sequence is valid.
// We also have to revalidate it as we switch locks an create // We also have to revalidate it as we switch locks an create
// the new series. // the new series.
a.samples = append(a.samples, hashedSample{ a.samples = append(a.samples, refdSample{
ref: uint32(ref), ref: ref,
t: t, t: t,
v: v, v: v,
}) })
@ -202,8 +211,8 @@ func (a *headAppender) Add(ref uint64, t int64, v float64) error {
return ErrAmendSample return ErrAmendSample
} }
a.samples = append(a.samples, hashedSample{ a.samples = append(a.samples, refdSample{
ref: uint32(ref), ref: ref,
t: t, t: t,
v: v, v: v,
}) })
@ -215,21 +224,27 @@ func (a *headAppender) createSeries() {
return return
} }
a.newLabels = make([]labels.Labels, 0, len(a.newSeries)) a.newLabels = make([]labels.Labels, 0, len(a.newSeries))
base0 := len(a.series)
a.mtx.RUnlock() a.mtx.RUnlock()
a.mtx.Lock() a.mtx.Lock()
for _, ref := range a.newRefs { base1 := len(a.series)
l := a.newSeries[ref]
for ref, l := range a.newSeries {
// We switched locks and have to re-validate that the series were not // We switched locks and have to re-validate that the series were not
// created by another goroutine in the meantime. // created by another goroutine in the meantime.
if int(ref) < len(a.series) && a.series[ref] != nil { if base1 > base0 {
continue if ms := a.get(l.hash, l.labels); ms != nil {
a.refmap[ref] = uint64(ms.ref)
continue
}
} }
// Series is still new. // Series is still new.
a.newLabels = append(a.newLabels, l.labels) a.newLabels = append(a.newLabels, l.labels)
a.refmap[ref] = uint64(len(a.series))
a.create(ref, l.hash, l.labels) a.create(l.hash, l.labels)
} }
a.mtx.Unlock() a.mtx.Unlock()
@ -253,7 +268,12 @@ func (a *headAppender) Commit() error {
maxt = int64(math.MinInt64) maxt = int64(math.MinInt64)
) )
for _, s := range a.samples { for i := range a.samples {
s := &a.samples[i]
if s.ref&(1<<32) > 0 {
s.ref = a.refmap[s.ref]
}
if !a.series[s.ref].append(s.t, s.v) { if !a.series[s.ref].append(s.t, s.v) {
total-- total--
} }
@ -401,17 +421,14 @@ func (h *headBlock) get(hash uint64, lset labels.Labels) *memSeries {
return nil return nil
} }
func (h *headBlock) create(ref uint32, hash uint64, lset labels.Labels) *memSeries { func (h *headBlock) create(hash uint64, lset labels.Labels) *memSeries {
s := &memSeries{ s := &memSeries{
ref: ref,
lset: lset, lset: lset,
ref: uint32(len(h.series)),
} }
// Allocate empty space until we can insert at the given index. // Allocate empty space until we can insert at the given index.
for int(ref) >= len(h.series) { h.series = append(h.series, s)
h.series = append(h.series, nil)
}
h.series[ref] = s
h.hashes[hash] = append(h.hashes[hash], s) h.hashes[hash] = append(h.hashes[hash], s)

View file

@ -26,23 +26,7 @@ func (p *memPostings) get(t term) Postings {
// term argument appears twice. // term argument appears twice.
func (p *memPostings) add(id uint32, terms ...term) { func (p *memPostings) add(id uint32, terms ...term) {
for _, t := range terms { for _, t := range terms {
// We expect IDs to roughly be appended in order but some concurrency p.m[t] = append(p.m[t], id)
// related out of order at the end. We do insertion sort from the end
// to account for it.
l := p.m[t]
i := len(l) - 1
for ; i >= 0; i-- {
if id > l[i] {
break
}
}
l = append(l, 0)
copy(l[i+2:], l[i+1:])
l[i+1] = id
p.m[t] = l
} }
} }

15
wal.go
View file

@ -88,7 +88,7 @@ func OpenWAL(dir string, l log.Logger, flushInterval time.Duration) (*WAL, error
} }
type walHandler struct { type walHandler struct {
sample func(hashedSample) sample func(refdSample)
series func(labels.Labels) series func(labels.Labels)
} }
@ -110,7 +110,7 @@ func (w *WAL) ReadAll(h *walHandler) error {
} }
// Log writes a batch of new series labels and samples to the log. // Log writes a batch of new series labels and samples to the log.
func (w *WAL) Log(series []labels.Labels, samples []hashedSample) error { func (w *WAL) Log(series []labels.Labels, samples []refdSample) error {
if err := w.enc.encodeSeries(series); err != nil { if err := w.enc.encodeSeries(series); err != nil {
return err return err
} }
@ -268,7 +268,7 @@ func (e *walEncoder) encodeSeries(series []labels.Labels) error {
return e.entry(WALEntrySeries, walSeriesSimple, buf) return e.entry(WALEntrySeries, walSeriesSimple, buf)
} }
func (e *walEncoder) encodeSamples(samples []hashedSample) error { func (e *walEncoder) encodeSamples(samples []refdSample) error {
if len(samples) == 0 { if len(samples) == 0 {
return nil return nil
} }
@ -282,7 +282,7 @@ func (e *walEncoder) encodeSamples(samples []hashedSample) error {
// TODO(fabxc): optimize for all samples having the same timestamp. // TODO(fabxc): optimize for all samples having the same timestamp.
first := samples[0] first := samples[0]
binary.BigEndian.PutUint32(b, first.ref) binary.BigEndian.PutUint64(b, first.ref)
buf = append(buf, b[:4]...) buf = append(buf, b[:4]...)
binary.BigEndian.PutUint64(b, uint64(first.t)) binary.BigEndian.PutUint64(b, uint64(first.t))
buf = append(buf, b[:8]...) buf = append(buf, b[:8]...)
@ -351,20 +351,21 @@ func (d *walDecoder) decodeSamples(flag byte, b []byte) error {
return errors.Wrap(errInvalidSize, "header length") return errors.Wrap(errInvalidSize, "header length")
} }
var ( var (
baseRef = binary.BigEndian.Uint32(b) baseRef = binary.BigEndian.Uint64(b)
baseTime = int64(binary.BigEndian.Uint64(b[4:])) baseTime = int64(binary.BigEndian.Uint64(b[4:]))
) )
b = b[12:] b = b[12:]
for len(b) > 0 { for len(b) > 0 {
var smpl hashedSample var smpl refdSample
dref, n := binary.Varint(b) dref, n := binary.Varint(b)
if n < 1 { if n < 1 {
return errors.Wrap(errInvalidSize, "sample ref delta") return errors.Wrap(errInvalidSize, "sample ref delta")
} }
b = b[n:] b = b[n:]
smpl.ref = uint32(int64(baseRef) + dref)
smpl.ref = uint64(int64(baseRef) + dref)
dtime, n := binary.Varint(b) dtime, n := binary.Varint(b)
if n < 1 { if n < 1 {