prometheus/tsdb/index/index_test.go
Brian Brazil 48d25e6fe7 Reduce memory used by postings offset table.
Rather than keeping the offset of each postings list, instead
keep the nth offset of the offset of the posting list. As postings
list offsets have always been sorted, we can then get to the closest
entry before the one we want an iterate forwards.

I haven't done much tuning on the 32 number, it was chosen to try
not to read through more than a 4k page of data.

Switch to a bulk interface for fetching postings. Use it to avoid having
to re-read parts of the posting offset table when querying lots of it.

For a index with what BenchmarkHeadPostingForMatchers uses RAM
for r.postings drops from 3.79MB to 80.19kB or about 48x.
Bytes allocated go down by 30%, and suprisingly CPU usage drops by
4-6% for typical queries too.

benchmark                                                               old ns/op      new ns/op      delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              35231          36673          +4.09%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      563380         540627         -4.04%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      536782         534186         -0.48%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     533990         541550         +1.42%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            113374598      117969608      +4.05%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            146329884      139651442      -4.56%
BenchmarkPostingsForMatchers/Block/i=~""-4                              50346510       44961127       -10.70%
BenchmarkPostingsForMatchers/Block/i!=""-4                              41261550       35356165       -14.31%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              112544418      116904010      +3.87%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       112487086      116864918      +3.89%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        41094758       35457904       -13.72%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                41906372       36151473       -13.73%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              147262414      140424800      -4.64%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             28615629       27872072       -2.60%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       147117177      140462403      -4.52%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     175096826      167902298      -4.11%

benchmark                                                               old allocs     new allocs     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              4              6              +50.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      7              11             +57.14%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      7              11             +57.14%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     15             17             +13.33%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            100010         100012         +0.00%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            200069         200040         -0.01%
BenchmarkPostingsForMatchers/Block/i=~""-4                              200072         200045         -0.01%
BenchmarkPostingsForMatchers/Block/i!=""-4                              200070         200041         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              100013         100017         +0.00%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       100017         100023         +0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        200073         200046         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                200075         200050         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              200074         200049         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             111165         111150         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       200078         200055         -0.01%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     311282         311238         -0.01%

benchmark                                                               old bytes     new bytes     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              264           296           +12.12%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      360           424           +17.78%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      360           424           +17.78%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     520           552           +6.15%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            1600461       1600482       +0.00%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            24900801      17259077      -30.69%
BenchmarkPostingsForMatchers/Block/i=~""-4                              24900836      17259151      -30.69%
BenchmarkPostingsForMatchers/Block/i!=""-4                              24900760      17259048      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              1600557       1600621       +0.00%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       1600717       1600813       +0.01%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        24900856      17259176      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                24900952      17259304      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              24900993      17259333      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             3788311       3142630       -17.04%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       24901137      17259509      -30.69%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     28693086      20405680      -28.88%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-11 19:59:31 +00:00

523 lines
13 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package index
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"sort"
"testing"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/chunks"
"github.com/prometheus/prometheus/tsdb/encoding"
"github.com/prometheus/prometheus/util/testutil"
)
type series struct {
l labels.Labels
chunks []chunks.Meta
}
type mockIndex struct {
series map[uint64]series
labelIndex map[string][]string
postings map[labels.Label][]uint64
symbols map[string]struct{}
}
func newMockIndex() mockIndex {
ix := mockIndex{
series: make(map[uint64]series),
labelIndex: make(map[string][]string),
postings: make(map[labels.Label][]uint64),
symbols: make(map[string]struct{}),
}
return ix
}
func (m mockIndex) Symbols() (map[string]struct{}, error) {
return m.symbols, nil
}
func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error {
if _, ok := m.series[ref]; ok {
return errors.Errorf("series with reference %d already added", ref)
}
for _, lbl := range l {
m.symbols[lbl.Name] = struct{}{}
m.symbols[lbl.Value] = struct{}{}
}
s := series{l: l}
// Actual chunk data is not stored in the index.
for _, c := range chunks {
c.Chunk = nil
s.chunks = append(s.chunks, c)
}
m.series[ref] = s
return nil
}
func (m mockIndex) WriteLabelIndex(names []string, values []string) error {
// TODO support composite indexes
if len(names) != 1 {
return errors.New("composite indexes not supported yet")
}
sort.Strings(values)
m.labelIndex[names[0]] = values
return nil
}
func (m mockIndex) WritePostings(name, value string, it Postings) error {
l := labels.Label{Name: name, Value: value}
if _, ok := m.postings[l]; ok {
return errors.Errorf("postings for %s already added", l)
}
ep, err := ExpandPostings(it)
if err != nil {
return err
}
m.postings[l] = ep
return nil
}
func (m mockIndex) Close() error {
return nil
}
func (m mockIndex) LabelValues(names ...string) (StringTuples, error) {
// TODO support composite indexes
if len(names) != 1 {
return nil, errors.New("composite indexes not supported yet")
}
return NewStringTuples(m.labelIndex[names[0]], 1)
}
func (m mockIndex) Postings(name string, values ...string) (Postings, error) {
p := []Postings{}
for _, value := range values {
l := labels.Label{Name: name, Value: value}
p = append(p, NewListPostings(m.postings[l]))
}
return Merge(p...), nil
}
func (m mockIndex) SortedPostings(p Postings) Postings {
ep, err := ExpandPostings(p)
if err != nil {
return ErrPostings(errors.Wrap(err, "expand postings"))
}
sort.Slice(ep, func(i, j int) bool {
return labels.Compare(m.series[ep[i]].l, m.series[ep[j]].l) < 0
})
return NewListPostings(ep)
}
func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error {
s, ok := m.series[ref]
if !ok {
return errors.New("not found")
}
*lset = append((*lset)[:0], s.l...)
*chks = append((*chks)[:0], s.chunks...)
return nil
}
func (m mockIndex) LabelIndices() ([][]string, error) {
res := make([][]string, 0, len(m.labelIndex))
for k := range m.labelIndex {
res = append(res, []string{k})
}
return res, nil
}
func TestIndexRW_Create_Open(t *testing.T) {
dir, err := ioutil.TempDir("", "test_index_create")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(dir))
}()
fn := filepath.Join(dir, indexFilename)
// An empty index must still result in a readable file.
iw, err := NewWriter(fn)
testutil.Ok(t, err)
testutil.Ok(t, iw.Close())
ir, err := NewFileReader(fn)
testutil.Ok(t, err)
testutil.Ok(t, ir.Close())
// Modify magic header must cause open to fail.
f, err := os.OpenFile(fn, os.O_WRONLY, 0666)
testutil.Ok(t, err)
_, err = f.WriteAt([]byte{0, 0}, 0)
testutil.Ok(t, err)
f.Close()
_, err = NewFileReader(dir)
testutil.NotOk(t, err)
}
func TestIndexRW_Postings(t *testing.T) {
dir, err := ioutil.TempDir("", "test_index_postings")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(dir))
}()
fn := filepath.Join(dir, indexFilename)
iw, err := NewWriter(fn)
testutil.Ok(t, err)
series := []labels.Labels{
labels.FromStrings("a", "1", "b", "1"),
labels.FromStrings("a", "1", "b", "2"),
labels.FromStrings("a", "1", "b", "3"),
labels.FromStrings("a", "1", "b", "4"),
}
err = iw.AddSymbols(map[string]struct{}{
"a": {},
"b": {},
"1": {},
"2": {},
"3": {},
"4": {},
})
testutil.Ok(t, err)
// Postings lists are only written if a series with the respective
// reference was added before.
testutil.Ok(t, iw.AddSeries(1, series[0]))
testutil.Ok(t, iw.AddSeries(2, series[1]))
testutil.Ok(t, iw.AddSeries(3, series[2]))
testutil.Ok(t, iw.AddSeries(4, series[3]))
err = iw.WritePostings("a", "1", newListPostings(1, 2, 3, 4))
testutil.Ok(t, err)
testutil.Ok(t, iw.Close())
ir, err := NewFileReader(fn)
testutil.Ok(t, err)
p, err := ir.Postings("a", "1")
testutil.Ok(t, err)
var l labels.Labels
var c []chunks.Meta
for i := 0; p.Next(); i++ {
err := ir.Series(p.At(), &l, &c)
testutil.Ok(t, err)
testutil.Equals(t, 0, len(c))
testutil.Equals(t, series[i], l)
}
testutil.Ok(t, p.Err())
testutil.Ok(t, ir.Close())
}
func TestPostingsMany(t *testing.T) {
dir, err := ioutil.TempDir("", "test_postings_many")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(dir))
}()
fn := filepath.Join(dir, indexFilename)
iw, err := NewWriter(fn)
testutil.Ok(t, err)
// Create a label in the index which has 999 values.
symbols := map[string]struct{}{}
series := []labels.Labels{}
for i := 1; i < 1000; i++ {
v := fmt.Sprintf("%03d", i)
series = append(series, labels.FromStrings("i", v, "foo", "bar"))
symbols[v] = struct{}{}
}
symbols["i"] = struct{}{}
symbols["foo"] = struct{}{}
symbols["bar"] = struct{}{}
testutil.Ok(t, iw.AddSymbols(symbols))
for i, s := range series {
testutil.Ok(t, iw.AddSeries(uint64(i), s))
}
for i, s := range series {
testutil.Ok(t, iw.WritePostings("i", s.Get("i"), newListPostings(uint64(i))))
}
testutil.Ok(t, iw.Close())
ir, err := NewFileReader(fn)
testutil.Ok(t, err)
cases := []struct {
in []string
}{
// Simple cases, everything is present.
{in: []string{"002"}},
{in: []string{"031", "032", "033"}},
{in: []string{"032", "033"}},
{in: []string{"127", "128"}},
{in: []string{"127", "128", "129"}},
{in: []string{"127", "129"}},
{in: []string{"128", "129"}},
{in: []string{"998", "999"}},
{in: []string{"999"}},
// Before actual values.
{in: []string{"000"}},
{in: []string{"000", "001"}},
{in: []string{"000", "002"}},
// After actual values.
{in: []string{"999a"}},
{in: []string{"999", "999a"}},
{in: []string{"998", "999", "999a"}},
// In the middle of actual values.
{in: []string{"126a", "127", "128"}},
{in: []string{"127", "127a", "128"}},
{in: []string{"127", "127a", "128", "128a", "129"}},
{in: []string{"127", "128a", "129"}},
{in: []string{"128", "128a", "129"}},
{in: []string{"128", "129", "129a"}},
{in: []string{"126a", "126b", "127", "127a", "127b", "128", "128a", "128b", "129", "129a", "129b"}},
}
for _, c := range cases {
it, err := ir.Postings("i", c.in...)
testutil.Ok(t, err)
got := []string{}
var lbls labels.Labels
var metas []chunks.Meta
for it.Next() {
testutil.Ok(t, ir.Series(it.At(), &lbls, &metas))
got = append(got, lbls.Get("i"))
}
testutil.Ok(t, it.Err())
exp := []string{}
for _, e := range c.in {
if _, ok := symbols[e]; ok && e != "l" {
exp = append(exp, e)
}
}
testutil.Equals(t, exp, got, fmt.Sprintf("input: %v", c.in))
}
}
func TestPersistence_index_e2e(t *testing.T) {
dir, err := ioutil.TempDir("", "test_persistence_e2e")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(dir))
}()
lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 20000)
testutil.Ok(t, err)
// Sort labels as the index writer expects series in sorted order.
sort.Sort(labels.Slice(lbls))
symbols := map[string]struct{}{}
for _, lset := range lbls {
for _, l := range lset {
symbols[l.Name] = struct{}{}
symbols[l.Value] = struct{}{}
}
}
var input indexWriterSeriesSlice
// Generate ChunkMetas for every label set.
for i, lset := range lbls {
var metas []chunks.Meta
for j := 0; j <= (i % 20); j++ {
metas = append(metas, chunks.Meta{
MinTime: int64(j * 10000),
MaxTime: int64((j + 1) * 10000),
Ref: rand.Uint64(),
Chunk: chunkenc.NewXORChunk(),
})
}
input = append(input, &indexWriterSeries{
labels: lset,
chunks: metas,
})
}
iw, err := NewWriter(filepath.Join(dir, indexFilename))
testutil.Ok(t, err)
testutil.Ok(t, iw.AddSymbols(symbols))
// Population procedure as done by compaction.
var (
postings = NewMemPostings()
values = map[string]map[string]struct{}{}
)
mi := newMockIndex()
for i, s := range input {
err = iw.AddSeries(uint64(i), s.labels, s.chunks...)
testutil.Ok(t, err)
testutil.Ok(t, mi.AddSeries(uint64(i), s.labels, s.chunks...))
for _, l := range s.labels {
valset, ok := values[l.Name]
if !ok {
valset = map[string]struct{}{}
values[l.Name] = valset
}
valset[l.Value] = struct{}{}
}
postings.Add(uint64(i), s.labels)
}
for k, v := range values {
var vals []string
for e := range v {
vals = append(vals, e)
}
sort.Strings(vals)
testutil.Ok(t, iw.WriteLabelIndex([]string{k}, vals))
testutil.Ok(t, mi.WriteLabelIndex([]string{k}, vals))
}
all := make([]uint64, len(lbls))
for i := range all {
all[i] = uint64(i)
}
err = iw.WritePostings("", "", newListPostings(all...))
testutil.Ok(t, err)
testutil.Ok(t, mi.WritePostings("", "", newListPostings(all...)))
for _, l := range postings.SortedKeys() {
err := iw.WritePostings(l.Name, l.Value, postings.Get(l.Name, l.Value))
testutil.Ok(t, err)
mi.WritePostings(l.Name, l.Value, postings.Get(l.Name, l.Value))
}
err = iw.Close()
testutil.Ok(t, err)
ir, err := NewFileReader(filepath.Join(dir, indexFilename))
testutil.Ok(t, err)
for p := range mi.postings {
gotp, err := ir.Postings(p.Name, p.Value)
testutil.Ok(t, err)
expp, err := mi.Postings(p.Name, p.Value)
testutil.Ok(t, err)
var lset, explset labels.Labels
var chks, expchks []chunks.Meta
for gotp.Next() {
testutil.Assert(t, expp.Next() == true, "")
ref := gotp.At()
err := ir.Series(ref, &lset, &chks)
testutil.Ok(t, err)
err = mi.Series(expp.At(), &explset, &expchks)
testutil.Ok(t, err)
testutil.Equals(t, explset, lset)
testutil.Equals(t, expchks, chks)
}
testutil.Assert(t, expp.Next() == false, "Unexpected Next() for "+p.Name+" "+p.Value)
testutil.Ok(t, gotp.Err())
}
for k, v := range mi.labelIndex {
tplsExp, err := NewStringTuples(v, 1)
testutil.Ok(t, err)
tplsRes, err := ir.LabelValues(k)
testutil.Ok(t, err)
testutil.Equals(t, tplsExp.Len(), tplsRes.Len())
for i := 0; i < tplsExp.Len(); i++ {
strsExp, err := tplsExp.At(i)
testutil.Ok(t, err)
strsRes, err := tplsRes.At(i)
testutil.Ok(t, err)
testutil.Equals(t, strsExp, strsRes)
}
}
gotSymbols, err := ir.Symbols()
testutil.Ok(t, err)
testutil.Equals(t, len(mi.symbols), len(gotSymbols))
for s := range mi.symbols {
_, ok := gotSymbols[s]
testutil.Assert(t, ok, "")
}
testutil.Ok(t, ir.Close())
}
func TestDecbufUvariantWithInvalidBuffer(t *testing.T) {
b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
db := encoding.NewDecbufUvarintAt(b, 0, castagnoliTable)
testutil.NotOk(t, db.Err())
}
func TestReaderWithInvalidBuffer(t *testing.T) {
b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
_, err := NewReader(b)
testutil.NotOk(t, err)
}
// TestNewFileReaderErrorNoOpenFiles ensures that in case of an error no file remains open.
func TestNewFileReaderErrorNoOpenFiles(t *testing.T) {
dir := testutil.NewTemporaryDirectory("block", t)
idxName := filepath.Join(dir.Path(), "index")
err := ioutil.WriteFile(idxName, []byte("corrupted contents"), 0644)
testutil.Ok(t, err)
_, err = NewFileReader(idxName)
testutil.NotOk(t, err)
// dir.Close will fail on Win if idxName fd is not closed on error path.
dir.Close()
}