Add postings wrapper that emits head postings in label set order

This adds a position mapper that takes series from a head block
in the order they were appended and creates a mapping representing
them in order of their label sets.

Write-repair of the postings list would cause very expensive writing.
Hence, we keep them as they are and only apply the postition mapping
at the very end, after a postings list has been sufficienctly reduced
through intersections etc.
This commit is contained in:
Fabian Reinartz 2017-01-05 15:13:01 +01:00
parent 5aa7f7cce8
commit 9790aa98ac
4 changed files with 131 additions and 34 deletions

View file

@ -249,6 +249,10 @@ func (c *compactor) write(blocks []block, indexw IndexWriter, chunkw SeriesWrite
if err != nil { if err != nil {
return err return err
} }
// TODO(fabxc): find more transparent way of handling this.
if hb, ok := b.(*HeadBlock); ok {
all = hb.remapPostings(all)
}
s := newCompactionSeriesSet(b.index(), b.series(), all) s := newCompactionSeriesSet(b.index(), b.series(), all)
if i == 0 { if i == 0 {
@ -274,7 +278,6 @@ func (c *compactor) write(blocks []block, indexw IndexWriter, chunkw SeriesWrite
if err := chunkw.WriteSeries(i, lset, chunks); err != nil { if err := chunkw.WriteSeries(i, lset, chunks); err != nil {
return err return err
} }
fmt.Println("next", lset, chunks)
stats.ChunkCount += uint32(len(chunks)) stats.ChunkCount += uint32(len(chunks))
stats.SeriesCount++ stats.SeriesCount++

79
head.go
View file

@ -6,6 +6,7 @@ import (
"sort" "sort"
"sync" "sync"
"github.com/bradfitz/slice"
"github.com/fabxc/tsdb/chunks" "github.com/fabxc/tsdb/chunks"
"github.com/fabxc/tsdb/labels" "github.com/fabxc/tsdb/labels"
) )
@ -18,6 +19,9 @@ type HeadBlock struct {
// descs holds all chunk descs for the head block. Each chunk implicitly // descs holds all chunk descs for the head block. Each chunk implicitly
// is assigned the index as its ID. // is assigned the index as its ID.
descs []*chunkDesc descs []*chunkDesc
// mapping maps a series ID to its position in an ordered list
// of all series. The orderDirty flag indicates that it has gone stale.
mapper *positionMapper
// hashes contains a collision map of label set hashes of chunks // hashes contains a collision map of label set hashes of chunks
// to their chunk descs. // to their chunk descs.
hashes map[uint64][]*chunkDesc hashes map[uint64][]*chunkDesc
@ -60,6 +64,8 @@ func OpenHeadBlock(dir string, baseTime int64) (*HeadBlock, error) {
return nil, err return nil, err
} }
b.rewriteMapping()
return b, nil return b, nil
} }
@ -103,11 +109,7 @@ func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {
} }
sort.Strings(sl) sort.Strings(sl)
t := &stringTuples{ return &stringTuples{l: len(names), s: sl}, nil
l: len(names),
s: sl,
}
return t, nil
} }
// Postings returns the postings list iterator for the label pair. // Postings returns the postings list iterator for the label pair.
@ -115,6 +117,23 @@ func (h *HeadBlock) Postings(name, value string) (Postings, error) {
return h.postings.get(term{name: name, value: value}), nil return h.postings.get(term{name: name, value: value}), nil
} }
// remapPostings changes the order of the postings from their ID to the ordering
// of the series they reference.
// Returned postings have no longer monotonic IDs and MUST NOT be used for regular
// postings set operations, i.e. intersect and merge.
func (h *HeadBlock) remapPostings(p Postings) Postings {
list, err := expandPostings(p)
if err != nil {
return errPostings{err: err}
}
slice.Sort(list, func(i, j int) bool {
return h.mapper.fw[list[i]] < h.mapper.fw[list[j]]
})
return newListPostings(list)
}
// Series returns the series for the given reference. // Series returns the series for the given reference.
func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) { func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) {
if int(ref) >= len(h.descs) { if int(ref) >= len(h.descs) {
@ -253,6 +272,11 @@ func (h *HeadBlock) appendBatch(samples []hashedSample) error {
for i, s := range newSeries { for i, s := range newSeries {
h.create(newHashes[i], s) h.create(newHashes[i], s)
} }
// TODO(fabxc): just mark as dirty instead and trigger a remapping
// periodically and upon querying.
if len(newSeries) > 0 {
h.rewriteMapping()
}
for _, s := range samples { for _, s := range samples {
cd := h.descs[s.ref] cd := h.descs[s.ref]
@ -270,3 +294,48 @@ func (h *HeadBlock) appendBatch(samples []hashedSample) error {
return nil return nil
} }
func (h *HeadBlock) rewriteMapping() {
cds := make([]*chunkDesc, len(h.descs))
copy(cds, h.descs)
s := slice.SortInterface(cds, func(i, j int) bool {
return labels.Compare(cds[i].lset, cds[j].lset) < 0
})
h.mapper = newPositionMapper(s)
}
// positionMapper stores a position mapping from unsorted to
// sorted indices of a sortable collection.
type positionMapper struct {
sortable sort.Interface
iv, fw []int
}
func newPositionMapper(s sort.Interface) *positionMapper {
m := &positionMapper{
sortable: s,
iv: make([]int, s.Len()),
fw: make([]int, s.Len()),
}
for i := range m.iv {
m.iv[i] = i
}
sort.Sort(m)
for i, k := range m.iv {
m.fw[k] = i
}
return m
}
func (m *positionMapper) Len() int { return m.sortable.Len() }
func (m *positionMapper) Less(i, j int) bool { return m.sortable.Less(i, j) }
func (m *positionMapper) Swap(i, j int) {
m.sortable.Swap(i, j)
m.iv[i], m.iv[j] = m.iv[j], m.iv[i]
}

35
head_test.go Normal file
View file

@ -0,0 +1,35 @@
package tsdb
import (
"sort"
"testing"
"github.com/stretchr/testify/require"
)
func TestPositionMapper(t *testing.T) {
cases := []struct {
in []int
res []int
}{
{
in: []int{5, 4, 3, 2, 1, 0},
res: []int{5, 4, 3, 2, 1, 0},
},
{
in: []int{1, 2, 0, 3},
res: []int{1, 2, 0, 3},
},
{
in: []int{1, 2, 0, 3, 10, 100, -10},
res: []int{2, 3, 1, 4, 5, 6, 0},
},
}
for _, c := range cases {
m := newPositionMapper(sort.IntSlice(c.in))
require.True(t, sort.IsSorted(m.sortable))
require.Equal(t, c.res, m.fw)
}
}

View file

@ -6,7 +6,6 @@ import (
"sort" "sort"
"strings" "strings"
"github.com/bradfitz/slice"
"github.com/fabxc/tsdb/chunks" "github.com/fabxc/tsdb/chunks"
"github.com/fabxc/tsdb/labels" "github.com/fabxc/tsdb/labels"
) )
@ -147,19 +146,24 @@ func (s *Shard) Querier(mint, maxt int64) Querier {
} }
for _, b := range blocks { for _, b := range blocks {
sq.blocks = append(sq.blocks, &blockQuerier{ q := &blockQuerier{
mint: mint, mint: mint,
maxt: maxt, maxt: maxt,
index: b.index(), index: b.index(),
series: b.series(), series: b.series(),
}) }
sq.blocks = append(sq.blocks, q)
// TODO(fabxc): find nicer solution.
if hb, ok := b.(*HeadBlock); ok {
q.postingsMapper = hb.remapPostings
}
} }
return sq return sq
} }
func (q *shardQuerier) LabelValues(n string) ([]string, error) { func (q *shardQuerier) LabelValues(n string) ([]string, error) {
// TODO(fabxc): return returned merged result.
res, err := q.blocks[0].LabelValues(n) res, err := q.blocks[0].LabelValues(n)
if err != nil { if err != nil {
return nil, err return nil, err
@ -211,6 +215,8 @@ type blockQuerier struct {
index IndexReader index IndexReader
series SeriesReader series SeriesReader
postingsMapper func(Postings) Postings
mint, maxt int64 mint, maxt int64
} }
@ -238,36 +244,20 @@ func (q *blockQuerier) Select(ms ...labels.Matcher) SeriesSet {
its = append(its, q.selectSingle(m)) its = append(its, q.selectSingle(m))
} }
set := &blockSeriesSet{ p := Intersect(its...)
if q.postingsMapper != nil {
p = q.postingsMapper(p)
}
return &blockSeriesSet{
index: q.index, index: q.index,
chunks: q.series, chunks: q.series,
it: Intersect(its...), it: p,
absent: absent, absent: absent,
mint: q.mint, mint: q.mint,
maxt: q.maxt, maxt: q.maxt,
} }
// TODO(fabxc): the head block indexes new series in order they come in.
// SeriesSets are expected to emit labels in order of their label sets.
// We expand the set and sort it for now. This is not a scalable approach
// however, and the head block should re-sort itself eventually.
// This comes with an initial cost as long as new series come in but should
// flatten out quickly after a warump.
// When cutting new head blocks, the index would ideally be transferred to
// the new head.
var all []Series
for set.Next() {
all = append(all, set.At())
}
if set.Err() != nil {
return errSeriesSet{err: set.Err()}
}
slice.Sort(all, func(i, j int) bool {
return labels.Compare(all[i].Labels(), all[j].Labels()) < 0
})
// TODO(fabxc): additionally bad because this static set uses function pointers
// in a mock series set.
return newListSeriesSet(all)
} }
func (q *blockQuerier) selectSingle(m labels.Matcher) Postings { func (q *blockQuerier) selectSingle(m labels.Matcher) Postings {