prometheus/tsdb/block_test.go
Brian Brazil 767fa704b6 Load only some offsets into the symbol table into memory.
Rather than keeping the entire symbol table in memory, keep every nth
offset and walk from there to the entry we need. This ends up slightly
slower, ~360ms per 1M series returned from PostingsForMatchers which is
not much considering the rest of the CPU such a query would go on to
use.

Make LabelValues use the postings tables, rather than having
to do symbol lookups. Use yoloString, as PostingsForMatchers
doesn't need the strings to stick around and adjust the API
call to keep the Querier open until it's all marshalled.

Remove allocatedSymbols memory optimisation, we no longer keep all the
symbol strings in heap memory. Remove LabelValuesFor and LabelIndices,
they're dead code. Ensure we've still tests for label indices,
and add missing test that we can work with old V1 Format index files.

PostingForMatchers performance is slightly better, with a big drop in
allocation counts due to using yoloString for LabelValues:

benchmark                                                               old ns/op     new ns/op     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              36698         36681         -0.05%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      522786        560887        +7.29%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      511652        537680        +5.09%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     522102        564239        +8.07%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            113689911     111795919     -1.67%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            135825572     132871085     -2.18%
BenchmarkPostingsForMatchers/Block/i=~""-4                              40782628      38038181      -6.73%
BenchmarkPostingsForMatchers/Block/i!=""-4                              31267869      29194327      -6.63%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              112733329     111568823     -1.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       112868153     111232029     -1.45%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        31338257      29349446      -6.35%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                32054482      29972436      -6.50%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              136504654     133968442     -1.86%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             27960350      27264997      -2.49%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       136765564     133860724     -2.12%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     163714583     159453668     -2.60%

benchmark                                                               old allocs     new allocs     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              6              6              +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      11             11             +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      11             11             +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     17             15             -11.76%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            100012         12             -99.99%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            200040         100040         -49.99%
BenchmarkPostingsForMatchers/Block/i=~""-4                              200045         100045         -49.99%
BenchmarkPostingsForMatchers/Block/i!=""-4                              200041         100041         -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              100017         17             -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       100023         23             -99.98%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        200046         100046         -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                200050         100050         -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              200049         100049         -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             111150         11150          -89.97%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       200055         100055         -49.99%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     311238         111238         -64.26%

benchmark                                                               old bytes     new bytes     delta
BenchmarkPostingsForMatchers/Block/n="1"-4                              296           296           +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4                      424           424           +0.00%
BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4                      424           424           +0.00%
BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4                     552           1544          +179.71%
BenchmarkPostingsForMatchers/Block/i=~".*"-4                            1600482       1606125       +0.35%
BenchmarkPostingsForMatchers/Block/i=~".+"-4                            17259065      17264709      +0.03%
BenchmarkPostingsForMatchers/Block/i=~""-4                              17259150      17264780      +0.03%
BenchmarkPostingsForMatchers/Block/i!=""-4                              17259048      17264680      +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4              1600610       1606242       +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4       1600813       1606434       +0.35%
BenchmarkPostingsForMatchers/Block/n="1",i!=""-4                        17259176      17264808      +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4                17259304      17264936      +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4              17259333      17264965      +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4             3142628       3148262       +0.18%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4       17259509      17265141      +0.03%
BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4     20405680      20416944      +0.06%

However overall Select performance is down and involves more allocs, due to
having to do more than a simple map lookup to resolve a symbol and that all the strings
returned are allocated:

benchmark                                           old ns/op     new ns/op      delta
BenchmarkQuerierSelect/Block/1of1000000-4           506092636     862678244      +70.46%
BenchmarkQuerierSelect/Block/10of1000000-4          505638968     860917636      +70.26%
BenchmarkQuerierSelect/Block/100of1000000-4         505229450     882150048      +74.60%
BenchmarkQuerierSelect/Block/1000of1000000-4        515905414     862241115      +67.13%
BenchmarkQuerierSelect/Block/10000of1000000-4       516785354     874841110      +69.29%
BenchmarkQuerierSelect/Block/100000of1000000-4      540742808     907030187      +67.74%
BenchmarkQuerierSelect/Block/1000000of1000000-4     815224288     1181236903     +44.90%

benchmark                                           old allocs     new allocs     delta
BenchmarkQuerierSelect/Block/1of1000000-4           4000020        6000020        +50.00%
BenchmarkQuerierSelect/Block/10of1000000-4          4000038        6000038        +50.00%
BenchmarkQuerierSelect/Block/100of1000000-4         4000218        6000218        +50.00%
BenchmarkQuerierSelect/Block/1000of1000000-4        4002018        6002018        +49.97%
BenchmarkQuerierSelect/Block/10000of1000000-4       4020018        6020018        +49.75%
BenchmarkQuerierSelect/Block/100000of1000000-4      4200018        6200018        +47.62%
BenchmarkQuerierSelect/Block/1000000of1000000-4     6000018        8000019        +33.33%

benchmark                                           old bytes     new bytes     delta
BenchmarkQuerierSelect/Block/1of1000000-4           176001468     227201476     +29.09%
BenchmarkQuerierSelect/Block/10of1000000-4          176002620     227202628     +29.09%
BenchmarkQuerierSelect/Block/100of1000000-4         176014140     227214148     +29.09%
BenchmarkQuerierSelect/Block/1000of1000000-4        176129340     227329348     +29.07%
BenchmarkQuerierSelect/Block/10000of1000000-4       177281340     228481348     +28.88%
BenchmarkQuerierSelect/Block/100000of1000000-4      188801340     240001348     +27.12%
BenchmarkQuerierSelect/Block/1000000of1000000-4     304001340     355201616     +16.84%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-12-17 18:56:58 +00:00

390 lines
12 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tsdb
import (
"context"
"encoding/binary"
"errors"
"hash/crc32"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"strconv"
"testing"
"github.com/go-kit/kit/log"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/tsdb/chunks"
"github.com/prometheus/prometheus/tsdb/fileutil"
"github.com/prometheus/prometheus/tsdb/tsdbutil"
"github.com/prometheus/prometheus/util/testutil"
)
// In Prometheus 2.1.0 we had a bug where the meta.json version was falsely bumped
// to 2. We had a migration in place resetting it to 1 but we should move immediately to
// version 3 next time to avoid confusion and issues.
func TestBlockMetaMustNeverBeVersion2(t *testing.T) {
dir, err := ioutil.TempDir("", "metaversion")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(dir))
}()
_, err = writeMetaFile(log.NewNopLogger(), dir, &BlockMeta{})
testutil.Ok(t, err)
meta, _, err := readMetaFile(dir)
testutil.Ok(t, err)
testutil.Assert(t, meta.Version != 2, "meta.json version must never be 2")
}
func TestSetCompactionFailed(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "test")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(tmpdir))
}()
blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 1))
b, err := OpenBlock(nil, blockDir, nil)
testutil.Ok(t, err)
testutil.Equals(t, false, b.meta.Compaction.Failed)
testutil.Ok(t, b.setCompactionFailed())
testutil.Equals(t, true, b.meta.Compaction.Failed)
testutil.Ok(t, b.Close())
b, err = OpenBlock(nil, blockDir, nil)
testutil.Ok(t, err)
testutil.Equals(t, true, b.meta.Compaction.Failed)
testutil.Ok(t, b.Close())
}
func TestCreateBlock(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "test")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(tmpdir))
}()
b, err := OpenBlock(nil, createBlock(t, tmpdir, genSeries(1, 1, 0, 10)), nil)
if err == nil {
testutil.Ok(t, b.Close())
}
testutil.Ok(t, err)
}
func TestCorruptedChunk(t *testing.T) {
for name, test := range map[string]struct {
corrFunc func(f *os.File) // Func that applies the corruption.
openErr error
queryErr error
}{
"invalid header size": {
func(f *os.File) {
err := f.Truncate(1)
testutil.Ok(t, err)
},
errors.New("invalid segment header in segment 0: invalid size"),
nil,
},
"invalid magic number": {
func(f *os.File) {
magicChunksOffset := int64(0)
_, err := f.Seek(magicChunksOffset, 0)
testutil.Ok(t, err)
// Set invalid magic number.
b := make([]byte, chunks.MagicChunksSize)
binary.BigEndian.PutUint32(b[:chunks.MagicChunksSize], 0x00000000)
n, err := f.Write(b)
testutil.Ok(t, err)
testutil.Equals(t, chunks.MagicChunksSize, n)
},
errors.New("invalid magic number 0"),
nil,
},
"invalid chunk format version": {
func(f *os.File) {
chunksFormatVersionOffset := int64(4)
_, err := f.Seek(chunksFormatVersionOffset, 0)
testutil.Ok(t, err)
// Set invalid chunk format version.
b := make([]byte, chunks.ChunksFormatVersionSize)
b[0] = 0
n, err := f.Write(b)
testutil.Ok(t, err)
testutil.Equals(t, chunks.ChunksFormatVersionSize, n)
},
errors.New("invalid chunk format version 0"),
nil,
},
"chunk not enough bytes to read the chunk length": {
func(f *os.File) {
// Truncate one byte after the segment header.
err := f.Truncate(chunks.SegmentHeaderSize + 1)
testutil.Ok(t, err)
},
nil,
errors.New("segment doesn't include enough bytes to read the chunk size data field - required:13, available:9"),
},
"chunk not enough bytes to read the data": {
func(f *os.File) {
fi, err := f.Stat()
testutil.Ok(t, err)
err = f.Truncate(fi.Size() - 1)
testutil.Ok(t, err)
},
nil,
errors.New("segment doesn't include enough bytes to read the chunk - required:26, available:25"),
},
"checksum mismatch": {
func(f *os.File) {
fi, err := f.Stat()
testutil.Ok(t, err)
// Get the chunk data end offset.
chkEndOffset := int(fi.Size()) - crc32.Size
// Seek to the last byte of chunk data and modify it.
_, err = f.Seek(int64(chkEndOffset-1), 0)
testutil.Ok(t, err)
n, err := f.Write([]byte("x"))
testutil.Ok(t, err)
testutil.Equals(t, n, 1)
},
nil,
errors.New("checksum mismatch expected:cfc0526c, actual:34815eae"),
},
} {
t.Run(name, func(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "test_open_block_chunk_corrupted")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(tmpdir))
}()
series := newSeries(map[string]string{"a": "b"}, []tsdbutil.Sample{sample{1, 1}})
blockDir := createBlock(t, tmpdir, []Series{series})
files, err := sequenceFiles(chunkDir(blockDir))
testutil.Ok(t, err)
testutil.Assert(t, len(files) > 0, "No chunk created.")
f, err := os.OpenFile(files[0], os.O_RDWR, 0666)
testutil.Ok(t, err)
// Apply corruption function.
test.corrFunc(f)
testutil.Ok(t, f.Close())
// Check open err.
b, err := OpenBlock(nil, blockDir, nil)
if test.openErr != nil {
testutil.Equals(t, test.openErr.Error(), err.Error())
return
}
querier, err := NewBlockQuerier(b, 0, 1)
testutil.Ok(t, err)
set, err := querier.Select(labels.MustNewMatcher(labels.MatchEqual, "a", "b"))
testutil.Ok(t, err)
// Check query err.
testutil.Equals(t, false, set.Next())
testutil.Equals(t, test.queryErr.Error(), set.Err().Error())
})
}
}
// TestBlockSize ensures that the block size is calculated correctly.
func TestBlockSize(t *testing.T) {
tmpdir, err := ioutil.TempDir("", "test_blockSize")
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, os.RemoveAll(tmpdir))
}()
var (
blockInit *Block
expSizeInit int64
blockDirInit string
)
// Create a block and compare the reported size vs actual disk size.
{
blockDirInit = createBlock(t, tmpdir, genSeries(10, 1, 1, 100))
blockInit, err = OpenBlock(nil, blockDirInit, nil)
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, blockInit.Close())
}()
expSizeInit = blockInit.Size()
actSizeInit, err := fileutil.DirSize(blockInit.Dir())
testutil.Ok(t, err)
testutil.Equals(t, expSizeInit, actSizeInit)
}
// Delete some series and check the sizes again.
{
testutil.Ok(t, blockInit.Delete(1, 10, labels.MustNewMatcher(labels.MatchRegexp, "", ".*")))
expAfterDelete := blockInit.Size()
testutil.Assert(t, expAfterDelete > expSizeInit, "after a delete the block size should be bigger as the tombstone file should grow %v > %v", expAfterDelete, expSizeInit)
actAfterDelete, err := fileutil.DirSize(blockDirInit)
testutil.Ok(t, err)
testutil.Equals(t, expAfterDelete, actAfterDelete, "after a delete reported block size doesn't match actual disk size")
c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil)
testutil.Ok(t, err)
blockDirAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil)
testutil.Ok(t, err)
blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirAfterCompact.String()), nil)
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, blockAfterCompact.Close())
}()
expAfterCompact := blockAfterCompact.Size()
actAfterCompact, err := fileutil.DirSize(blockAfterCompact.Dir())
testutil.Ok(t, err)
testutil.Assert(t, actAfterDelete > actAfterCompact, "after a delete and compaction the block size should be smaller %v,%v", actAfterDelete, actAfterCompact)
testutil.Equals(t, expAfterCompact, actAfterCompact, "after a delete and compaction reported block size doesn't match actual disk size")
}
}
func TestReadIndexFormatV1(t *testing.T) {
/* The block here was produced at commit
07ef80820ef1250db82f9544f3fcf7f0f63ccee0 with:
db, _ := Open("v1db", nil, nil, nil)
app := db.Appender()
app.Add(labels.FromStrings("foo", "bar"), 1, 2)
app.Add(labels.FromStrings("foo", "baz"), 3, 4)
app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block.
app.Commit()
db.compact()
db.Close()
*/
blockDir := filepath.Join("testdata", "index_format_v1")
block, err := OpenBlock(nil, blockDir, nil)
testutil.Ok(t, err)
q, err := NewBlockQuerier(block, 0, 1000)
testutil.Ok(t, err)
testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")),
map[string][]tsdbutil.Sample{`{foo="bar"}`: []tsdbutil.Sample{sample{t: 1, v: 2}}})
q, err = NewBlockQuerier(block, 0, 1000)
testutil.Ok(t, err)
testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchNotRegexp, "foo", "^.$")),
map[string][]tsdbutil.Sample{
`{foo="bar"}`: []tsdbutil.Sample{sample{t: 1, v: 2}},
`{foo="baz"}`: []tsdbutil.Sample{sample{t: 3, v: 4}},
})
}
// createBlock creates a block with given set of series and returns its dir.
func createBlock(tb testing.TB, dir string, series []Series) string {
return createBlockFromHead(tb, dir, createHead(tb, series))
}
func createBlockFromHead(tb testing.TB, dir string, head *Head) string {
compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil)
testutil.Ok(tb, err)
testutil.Ok(tb, os.MkdirAll(dir, 0777))
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
ulid, err := compactor.Write(dir, head, head.MinTime(), head.MaxTime()+1, nil)
testutil.Ok(tb, err)
return filepath.Join(dir, ulid.String())
}
func createHead(tb testing.TB, series []Series) *Head {
head, err := NewHead(nil, nil, nil, 2*60*60*1000)
testutil.Ok(tb, err)
defer head.Close()
app := head.Appender()
for _, s := range series {
ref := uint64(0)
it := s.Iterator()
for it.Next() {
t, v := it.At()
if ref != 0 {
err := app.AddFast(ref, t, v)
if err == nil {
continue
}
}
ref, err = app.Add(s.Labels(), t, v)
testutil.Ok(tb, err)
}
testutil.Ok(tb, it.Err())
}
err = app.Commit()
testutil.Ok(tb, err)
return head
}
const (
defaultLabelName = "labelName"
defaultLabelValue = "labelValue"
)
// genSeries generates series with a given number of labels and values.
func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
if totalSeries == 0 || labelCount == 0 {
return nil
}
series := make([]Series, totalSeries)
for i := 0; i < totalSeries; i++ {
lbls := make(map[string]string, labelCount)
lbls[defaultLabelName] = strconv.Itoa(i)
for j := 1; len(lbls) < labelCount; j++ {
lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j)
}
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
for t := mint; t < maxt; t++ {
samples = append(samples, sample{t: t, v: rand.Float64()})
}
series[i] = newSeries(lbls, samples)
}
return series
}
// populateSeries generates series from given labels, mint and maxt.
func populateSeries(lbls []map[string]string, mint, maxt int64) []Series {
if len(lbls) == 0 {
return nil
}
series := make([]Series, 0, len(lbls))
for _, lbl := range lbls {
if len(lbl) == 0 {
continue
}
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
for t := mint; t <= maxt; t++ {
samples = append(samples, sample{t: t, v: rand.Float64()})
}
series = append(series, newSeries(lbl, samples))
}
return series
}