mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 13:57:36 -08:00
Add postings wrapper that emits head postings in label set order
This adds a position mapper that takes series from a head block in the order they were appended and creates a mapping representing them in order of their label sets. Write-repair of the postings list would cause very expensive writing. Hence, we keep them as they are and only apply the postition mapping at the very end, after a postings list has been sufficienctly reduced through intersections etc.
This commit is contained in:
parent
5aa7f7cce8
commit
9790aa98ac
|
@ -249,6 +249,10 @@ func (c *compactor) write(blocks []block, indexw IndexWriter, chunkw SeriesWrite
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// TODO(fabxc): find more transparent way of handling this.
|
||||||
|
if hb, ok := b.(*HeadBlock); ok {
|
||||||
|
all = hb.remapPostings(all)
|
||||||
|
}
|
||||||
s := newCompactionSeriesSet(b.index(), b.series(), all)
|
s := newCompactionSeriesSet(b.index(), b.series(), all)
|
||||||
|
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
|
@ -274,7 +278,6 @@ func (c *compactor) write(blocks []block, indexw IndexWriter, chunkw SeriesWrite
|
||||||
if err := chunkw.WriteSeries(i, lset, chunks); err != nil {
|
if err := chunkw.WriteSeries(i, lset, chunks); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fmt.Println("next", lset, chunks)
|
|
||||||
|
|
||||||
stats.ChunkCount += uint32(len(chunks))
|
stats.ChunkCount += uint32(len(chunks))
|
||||||
stats.SeriesCount++
|
stats.SeriesCount++
|
||||||
|
|
79
head.go
79
head.go
|
@ -6,6 +6,7 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/bradfitz/slice"
|
||||||
"github.com/fabxc/tsdb/chunks"
|
"github.com/fabxc/tsdb/chunks"
|
||||||
"github.com/fabxc/tsdb/labels"
|
"github.com/fabxc/tsdb/labels"
|
||||||
)
|
)
|
||||||
|
@ -18,6 +19,9 @@ type HeadBlock struct {
|
||||||
// descs holds all chunk descs for the head block. Each chunk implicitly
|
// descs holds all chunk descs for the head block. Each chunk implicitly
|
||||||
// is assigned the index as its ID.
|
// is assigned the index as its ID.
|
||||||
descs []*chunkDesc
|
descs []*chunkDesc
|
||||||
|
// mapping maps a series ID to its position in an ordered list
|
||||||
|
// of all series. The orderDirty flag indicates that it has gone stale.
|
||||||
|
mapper *positionMapper
|
||||||
// hashes contains a collision map of label set hashes of chunks
|
// hashes contains a collision map of label set hashes of chunks
|
||||||
// to their chunk descs.
|
// to their chunk descs.
|
||||||
hashes map[uint64][]*chunkDesc
|
hashes map[uint64][]*chunkDesc
|
||||||
|
@ -60,6 +64,8 @@ func OpenHeadBlock(dir string, baseTime int64) (*HeadBlock, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b.rewriteMapping()
|
||||||
|
|
||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,11 +109,7 @@ func (h *HeadBlock) LabelValues(names ...string) (StringTuples, error) {
|
||||||
}
|
}
|
||||||
sort.Strings(sl)
|
sort.Strings(sl)
|
||||||
|
|
||||||
t := &stringTuples{
|
return &stringTuples{l: len(names), s: sl}, nil
|
||||||
l: len(names),
|
|
||||||
s: sl,
|
|
||||||
}
|
|
||||||
return t, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Postings returns the postings list iterator for the label pair.
|
// Postings returns the postings list iterator for the label pair.
|
||||||
|
@ -115,6 +117,23 @@ func (h *HeadBlock) Postings(name, value string) (Postings, error) {
|
||||||
return h.postings.get(term{name: name, value: value}), nil
|
return h.postings.get(term{name: name, value: value}), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remapPostings changes the order of the postings from their ID to the ordering
|
||||||
|
// of the series they reference.
|
||||||
|
// Returned postings have no longer monotonic IDs and MUST NOT be used for regular
|
||||||
|
// postings set operations, i.e. intersect and merge.
|
||||||
|
func (h *HeadBlock) remapPostings(p Postings) Postings {
|
||||||
|
list, err := expandPostings(p)
|
||||||
|
if err != nil {
|
||||||
|
return errPostings{err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
slice.Sort(list, func(i, j int) bool {
|
||||||
|
return h.mapper.fw[list[i]] < h.mapper.fw[list[j]]
|
||||||
|
})
|
||||||
|
|
||||||
|
return newListPostings(list)
|
||||||
|
}
|
||||||
|
|
||||||
// Series returns the series for the given reference.
|
// Series returns the series for the given reference.
|
||||||
func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) {
|
func (h *HeadBlock) Series(ref uint32) (labels.Labels, []ChunkMeta, error) {
|
||||||
if int(ref) >= len(h.descs) {
|
if int(ref) >= len(h.descs) {
|
||||||
|
@ -253,6 +272,11 @@ func (h *HeadBlock) appendBatch(samples []hashedSample) error {
|
||||||
for i, s := range newSeries {
|
for i, s := range newSeries {
|
||||||
h.create(newHashes[i], s)
|
h.create(newHashes[i], s)
|
||||||
}
|
}
|
||||||
|
// TODO(fabxc): just mark as dirty instead and trigger a remapping
|
||||||
|
// periodically and upon querying.
|
||||||
|
if len(newSeries) > 0 {
|
||||||
|
h.rewriteMapping()
|
||||||
|
}
|
||||||
|
|
||||||
for _, s := range samples {
|
for _, s := range samples {
|
||||||
cd := h.descs[s.ref]
|
cd := h.descs[s.ref]
|
||||||
|
@ -270,3 +294,48 @@ func (h *HeadBlock) appendBatch(samples []hashedSample) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *HeadBlock) rewriteMapping() {
|
||||||
|
cds := make([]*chunkDesc, len(h.descs))
|
||||||
|
copy(cds, h.descs)
|
||||||
|
|
||||||
|
s := slice.SortInterface(cds, func(i, j int) bool {
|
||||||
|
return labels.Compare(cds[i].lset, cds[j].lset) < 0
|
||||||
|
})
|
||||||
|
|
||||||
|
h.mapper = newPositionMapper(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// positionMapper stores a position mapping from unsorted to
|
||||||
|
// sorted indices of a sortable collection.
|
||||||
|
type positionMapper struct {
|
||||||
|
sortable sort.Interface
|
||||||
|
iv, fw []int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPositionMapper(s sort.Interface) *positionMapper {
|
||||||
|
m := &positionMapper{
|
||||||
|
sortable: s,
|
||||||
|
iv: make([]int, s.Len()),
|
||||||
|
fw: make([]int, s.Len()),
|
||||||
|
}
|
||||||
|
for i := range m.iv {
|
||||||
|
m.iv[i] = i
|
||||||
|
}
|
||||||
|
sort.Sort(m)
|
||||||
|
|
||||||
|
for i, k := range m.iv {
|
||||||
|
m.fw[k] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *positionMapper) Len() int { return m.sortable.Len() }
|
||||||
|
func (m *positionMapper) Less(i, j int) bool { return m.sortable.Less(i, j) }
|
||||||
|
|
||||||
|
func (m *positionMapper) Swap(i, j int) {
|
||||||
|
m.sortable.Swap(i, j)
|
||||||
|
|
||||||
|
m.iv[i], m.iv[j] = m.iv[j], m.iv[i]
|
||||||
|
}
|
||||||
|
|
35
head_test.go
Normal file
35
head_test.go
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
package tsdb
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPositionMapper(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
in []int
|
||||||
|
res []int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
in: []int{5, 4, 3, 2, 1, 0},
|
||||||
|
res: []int{5, 4, 3, 2, 1, 0},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: []int{1, 2, 0, 3},
|
||||||
|
res: []int{1, 2, 0, 3},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
in: []int{1, 2, 0, 3, 10, 100, -10},
|
||||||
|
res: []int{2, 3, 1, 4, 5, 6, 0},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
m := newPositionMapper(sort.IntSlice(c.in))
|
||||||
|
|
||||||
|
require.True(t, sort.IsSorted(m.sortable))
|
||||||
|
require.Equal(t, c.res, m.fw)
|
||||||
|
}
|
||||||
|
}
|
46
querier.go
46
querier.go
|
@ -6,7 +6,6 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/bradfitz/slice"
|
|
||||||
"github.com/fabxc/tsdb/chunks"
|
"github.com/fabxc/tsdb/chunks"
|
||||||
"github.com/fabxc/tsdb/labels"
|
"github.com/fabxc/tsdb/labels"
|
||||||
)
|
)
|
||||||
|
@ -147,19 +146,24 @@ func (s *Shard) Querier(mint, maxt int64) Querier {
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, b := range blocks {
|
for _, b := range blocks {
|
||||||
sq.blocks = append(sq.blocks, &blockQuerier{
|
q := &blockQuerier{
|
||||||
mint: mint,
|
mint: mint,
|
||||||
maxt: maxt,
|
maxt: maxt,
|
||||||
index: b.index(),
|
index: b.index(),
|
||||||
series: b.series(),
|
series: b.series(),
|
||||||
})
|
}
|
||||||
|
sq.blocks = append(sq.blocks, q)
|
||||||
|
|
||||||
|
// TODO(fabxc): find nicer solution.
|
||||||
|
if hb, ok := b.(*HeadBlock); ok {
|
||||||
|
q.postingsMapper = hb.remapPostings
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return sq
|
return sq
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *shardQuerier) LabelValues(n string) ([]string, error) {
|
func (q *shardQuerier) LabelValues(n string) ([]string, error) {
|
||||||
// TODO(fabxc): return returned merged result.
|
|
||||||
res, err := q.blocks[0].LabelValues(n)
|
res, err := q.blocks[0].LabelValues(n)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -211,6 +215,8 @@ type blockQuerier struct {
|
||||||
index IndexReader
|
index IndexReader
|
||||||
series SeriesReader
|
series SeriesReader
|
||||||
|
|
||||||
|
postingsMapper func(Postings) Postings
|
||||||
|
|
||||||
mint, maxt int64
|
mint, maxt int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,36 +244,20 @@ func (q *blockQuerier) Select(ms ...labels.Matcher) SeriesSet {
|
||||||
its = append(its, q.selectSingle(m))
|
its = append(its, q.selectSingle(m))
|
||||||
}
|
}
|
||||||
|
|
||||||
set := &blockSeriesSet{
|
p := Intersect(its...)
|
||||||
|
|
||||||
|
if q.postingsMapper != nil {
|
||||||
|
p = q.postingsMapper(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &blockSeriesSet{
|
||||||
index: q.index,
|
index: q.index,
|
||||||
chunks: q.series,
|
chunks: q.series,
|
||||||
it: Intersect(its...),
|
it: p,
|
||||||
absent: absent,
|
absent: absent,
|
||||||
mint: q.mint,
|
mint: q.mint,
|
||||||
maxt: q.maxt,
|
maxt: q.maxt,
|
||||||
}
|
}
|
||||||
// TODO(fabxc): the head block indexes new series in order they come in.
|
|
||||||
// SeriesSets are expected to emit labels in order of their label sets.
|
|
||||||
// We expand the set and sort it for now. This is not a scalable approach
|
|
||||||
// however, and the head block should re-sort itself eventually.
|
|
||||||
// This comes with an initial cost as long as new series come in but should
|
|
||||||
// flatten out quickly after a warump.
|
|
||||||
// When cutting new head blocks, the index would ideally be transferred to
|
|
||||||
// the new head.
|
|
||||||
var all []Series
|
|
||||||
for set.Next() {
|
|
||||||
all = append(all, set.At())
|
|
||||||
}
|
|
||||||
if set.Err() != nil {
|
|
||||||
return errSeriesSet{err: set.Err()}
|
|
||||||
}
|
|
||||||
slice.Sort(all, func(i, j int) bool {
|
|
||||||
return labels.Compare(all[i].Labels(), all[j].Labels()) < 0
|
|
||||||
})
|
|
||||||
|
|
||||||
// TODO(fabxc): additionally bad because this static set uses function pointers
|
|
||||||
// in a mock series set.
|
|
||||||
return newListSeriesSet(all)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *blockQuerier) selectSingle(m labels.Matcher) Postings {
|
func (q *blockQuerier) selectSingle(m labels.Matcher) Postings {
|
||||||
|
|
Loading…
Reference in a new issue