From 767fa704b6c08309198ba028d2acccd566d219b7 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Thu, 12 Dec 2019 16:55:32 +0000 Subject: [PATCH] Load only some offsets into the symbol table into memory. Rather than keeping the entire symbol table in memory, keep every nth offset and walk from there to the entry we need. This ends up slightly slower, ~360ms per 1M series returned from PostingsForMatchers which is not much considering the rest of the CPU such a query would go on to use. Make LabelValues use the postings tables, rather than having to do symbol lookups. Use yoloString, as PostingsForMatchers doesn't need the strings to stick around and adjust the API call to keep the Querier open until it's all marshalled. Remove allocatedSymbols memory optimisation, we no longer keep all the symbol strings in heap memory. Remove LabelValuesFor and LabelIndices, they're dead code. Ensure we've still tests for label indices, and add missing test that we can work with old V1 Format index files. PostingForMatchers performance is slightly better, with a big drop in allocation counts due to using yoloString for LabelValues: benchmark old ns/op new ns/op delta BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07% BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67% BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18% BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73% BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60% benchmark old allocs new allocs delta BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76% BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99% BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99% BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99% BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26% benchmark old bytes new bytes delta BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71% BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35% BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03% BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03% BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06% However overall Select performance is down and involves more allocs, due to having to do more than a simple map lookup to resolve a symbol and that all the strings returned are allocated: benchmark old ns/op new ns/op delta BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46% BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26% BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60% BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13% BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29% BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74% BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90% benchmark old allocs new allocs delta BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00% BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00% BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00% BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97% BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75% BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62% BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33% benchmark old bytes new bytes delta BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09% BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09% BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09% BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07% BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88% BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12% BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84% Signed-off-by: Brian Brazil --- tsdb/block.go | 9 - tsdb/block_test.go | 31 ++ tsdb/head.go | 10 - tsdb/index/index.go | 339 +++++++++++--------- tsdb/index/index_test.go | 42 ++- tsdb/querier.go | 9 +- tsdb/querier_test.go | 8 - tsdb/testdata/index_format_v1/chunks/000001 | Bin 0 -> 44 bytes tsdb/testdata/index_format_v1/index | Bin 0 -> 238 bytes tsdb/testdata/index_format_v1/meta.json | 17 + tsdb/testdata/index_format_v1/tombstones | Bin 0 -> 9 bytes web/api/v1/api.go | 9 +- 12 files changed, 268 insertions(+), 206 deletions(-) create mode 100644 tsdb/testdata/index_format_v1/chunks/000001 create mode 100644 tsdb/testdata/index_format_v1/index create mode 100644 tsdb/testdata/index_format_v1/meta.json create mode 100644 tsdb/testdata/index_format_v1/tombstones diff --git a/tsdb/block.go b/tsdb/block.go index 8f8d5e0555..28b3148605 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -83,10 +83,6 @@ type IndexReader interface { // Returns ErrNotFound if the ref does not resolve to a known series. Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error - // LabelIndices returns a list of string tuples for which a label value index exists. - // NOTE: This is deprecated. Use `LabelNames()` instead. - LabelIndices() ([][]string, error) - // LabelNames returns all the unique label names present in the index in sorted order. LabelNames() ([]string, error) @@ -465,11 +461,6 @@ func (r blockIndexReader) Series(ref uint64, lset *labels.Labels, chks *[]chunks return nil } -func (r blockIndexReader) LabelIndices() ([][]string, error) { - ss, err := r.ir.LabelIndices() - return ss, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) -} - func (r blockIndexReader) LabelNames() ([]string, error) { return r.b.LabelNames() } diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 41d022e91b..f746ccd4b1 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -264,6 +264,37 @@ func TestBlockSize(t *testing.T) { } } +func TestReadIndexFormatV1(t *testing.T) { + /* The block here was produced at commit + 07ef80820ef1250db82f9544f3fcf7f0f63ccee0 with: + db, _ := Open("v1db", nil, nil, nil) + app := db.Appender() + app.Add(labels.FromStrings("foo", "bar"), 1, 2) + app.Add(labels.FromStrings("foo", "baz"), 3, 4) + app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block. + app.Commit() + db.compact() + db.Close() + */ + + blockDir := filepath.Join("testdata", "index_format_v1") + block, err := OpenBlock(nil, blockDir, nil) + testutil.Ok(t, err) + + q, err := NewBlockQuerier(block, 0, 1000) + testutil.Ok(t, err) + testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")), + map[string][]tsdbutil.Sample{`{foo="bar"}`: []tsdbutil.Sample{sample{t: 1, v: 2}}}) + + q, err = NewBlockQuerier(block, 0, 1000) + testutil.Ok(t, err) + testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchNotRegexp, "foo", "^.$")), + map[string][]tsdbutil.Sample{ + `{foo="bar"}`: []tsdbutil.Sample{sample{t: 1, v: 2}}, + `{foo="baz"}`: []tsdbutil.Sample{sample{t: 3, v: 4}}, + }) +} + // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []Series) string { return createBlockFromHead(tb, dir, createHead(tb, series)) diff --git a/tsdb/head.go b/tsdb/head.go index 08379d614c..550e9a984c 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -1456,16 +1456,6 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks return nil } -func (h *headIndexReader) LabelIndices() ([][]string, error) { - h.head.symMtx.RLock() - defer h.head.symMtx.RUnlock() - res := [][]string{} - for s := range h.head.values { - res = append(res, []string{s}) - } - return res, nil -} - func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) { // Just using `getOrSet` below would be semantically sufficient, but we'd create // a new series on every sample inserted via Add(), which causes allocations diff --git a/tsdb/index/index.go b/tsdb/index/index.go index e51ff0e8d7..7a4cc3ae93 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -26,6 +26,7 @@ import ( "path/filepath" "sort" "strings" + "unsafe" "github.com/pkg/errors" "github.com/prometheus/prometheus/pkg/labels" @@ -46,8 +47,6 @@ const ( // FormatV2 represents 2 version of index. FormatV2 = 2 - labelNameSeparator = "\xff" - indexFilename = "index" ) @@ -790,20 +789,13 @@ type Reader struct { // Close that releases the underlying resources of the byte slice. c io.Closer - // Cached hashmaps of section offsets. - labels map[string]uint64 // Map of LabelName to a list of some LabelValues's position in the offset table. // The first and last values for each name are always present. postings map[string][]postingOffset - // Cache of read symbols. Strings that are returned when reading from the - // block are always backed by true strings held in here rather than - // strings that are backed by byte slices from the mmap'd index file. This - // prevents memory faults when applications work with read symbols after - // the block has been unmapped. The older format has sparse indexes so a map - // must be used, but the new format is not so we can use a slice. - symbolsV1 map[uint32]string - symbolsV2 []string - symbolsTableSize uint64 + + symbols *Symbols + nameSymbols map[uint32]string // Cache of the label name symbol lookups, + // as there are not many and they are half of all lookups. dec *Decoder @@ -862,7 +854,6 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { r := &Reader{ b: b, c: c, - labels: map[string]uint64{}, postings: map[string][]postingOffset{}, } @@ -885,35 +876,11 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { return nil, errors.Wrap(err, "read TOC") } - r.symbolsV2, r.symbolsV1, err = ReadSymbols(r.b, r.version, int(r.toc.Symbols)) + r.symbols, err = NewSymbols(r.b, r.version, int(r.toc.Symbols)) if err != nil { return nil, errors.Wrap(err, "read symbols") } - // Use the strings already allocated by symbols, rather than - // re-allocating them again below. - // Additionally, calculate symbolsTableSize. - allocatedSymbols := make(map[string]string, len(r.symbolsV1)+len(r.symbolsV2)) - for _, s := range r.symbolsV1 { - r.symbolsTableSize += uint64(len(s) + 8) - allocatedSymbols[s] = s - } - for _, s := range r.symbolsV2 { - r.symbolsTableSize += uint64(len(s) + 8) - allocatedSymbols[s] = s - } - - if err := ReadOffsetTable(r.b, r.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error { - if len(key) != 1 { - return errors.Errorf("unexpected key length for label indices table %d", len(key)) - } - - r.labels[allocatedSymbols[key[0]]] = off - return nil - }); err != nil { - return nil, errors.Wrap(err, "read label index table") - } - var lastKey []string lastOff := 0 valueCount := 0 @@ -925,16 +892,16 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { } if _, ok := r.postings[key[0]]; !ok { // Next label name. - r.postings[allocatedSymbols[key[0]]] = []postingOffset{} + r.postings[key[0]] = []postingOffset{} if lastKey != nil { // Always include last value for each label name. - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: allocatedSymbols[lastKey[1]], off: lastOff}) + r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) } lastKey = nil valueCount = 0 } if valueCount%32 == 0 { - r.postings[key[0]] = append(r.postings[key[0]], postingOffset{value: allocatedSymbols[key[1]], off: off}) + r.postings[key[0]] = append(r.postings[key[0]], postingOffset{value: key[1], off: off}) lastKey = nil } else { lastKey = key @@ -946,7 +913,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { return nil, errors.Wrap(err, "read postings table") } if lastKey != nil { - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: allocatedSymbols[lastKey[1]], off: lastOff}) + r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) } // Trim any extra space in the slices. for k, v := range r.postings { @@ -955,6 +922,18 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { r.postings[k] = l } + r.nameSymbols = make(map[uint32]string, len(r.postings)) + for k := range r.postings { + if k == "" { + continue + } + off, err := r.symbols.ReverseLookup(k) + if err != nil { + return nil, errors.Wrap(err, "reverse symbol lookup") + } + r.nameSymbols[off] = k + } + r.dec = &Decoder{LookupSymbol: r.lookupSymbol} return r, nil @@ -993,39 +972,127 @@ func (r *Reader) PostingsRanges() (map[labels.Label]Range, error) { return m, nil } -// ReadSymbols reads the symbol table fully into memory and allocates proper strings for them. -// Strings backed by the mmap'd memory would cause memory faults if applications keep using them -// after the reader is closed. -func ReadSymbols(bs ByteSlice, version int, off int) ([]string, map[uint32]string, error) { +type Symbols struct { + bs ByteSlice + version int + off int + + offsets []int + seen int +} + +const symbolFactor = 32 + +// NewSymbols returns a Symbols object for symbol lookups. +func NewSymbols(bs ByteSlice, version int, off int) (*Symbols, error) { + s := &Symbols{ + bs: bs, + version: version, + off: off, + } if off == 0 { - return nil, nil, nil + // Only happens in some tests. + return nil, nil } d := encoding.NewDecbufAt(bs, off, castagnoliTable) - var ( - origLen = d.Len() - cnt = d.Be32int() - basePos = uint32(off) + 4 - nextPos = basePos + uint32(origLen-d.Len()) - symbolSlice []string - symbols = map[uint32]string{} + origLen = d.Len() + cnt = d.Be32int() + basePos = off + 4 ) - if version == FormatV2 { - symbolSlice = make([]string, 0, cnt) - } - - for d.Err() == nil && d.Len() > 0 && cnt > 0 { - s := d.UvarintStr() - - if version == FormatV2 { - symbolSlice = append(symbolSlice, s) - } else { - symbols[nextPos] = s - nextPos = basePos + uint32(origLen-d.Len()) + s.offsets = make([]int, 0, cnt/symbolFactor) + for d.Err() == nil && s.seen < cnt { + if s.seen%symbolFactor == 0 { + s.offsets = append(s.offsets, basePos+origLen-d.Len()) } + d.UvarintBytes() // The symbol. + s.seen++ + } + if d.Err() != nil { + return nil, d.Err() + } + return s, nil +} + +func (s Symbols) Lookup(o uint32) (string, error) { + d := encoding.Decbuf{ + B: s.bs.Range(0, s.bs.Len()), + } + if s.version == FormatV2 { + if int(o) > s.seen { + return "", errors.Errorf("unknown symbol offset %d", o) + } + d.Skip(s.offsets[int(o/symbolFactor)]) + // Walk until we find the one we want. + for i := o - (o / symbolFactor * symbolFactor); i > 0; i-- { + d.UvarintBytes() + } + } else { + d.Skip(int(o)) + } + sym := d.UvarintStr() + if d.Err() != nil { + return "", d.Err() + } + return sym, nil +} + +func (s Symbols) ReverseLookup(sym string) (uint32, error) { + i := sort.Search(len(s.offsets), func(i int) bool { + // Any decoding errors here will be lost, however + // we already read through all of this at startup. + d := encoding.Decbuf{ + B: s.bs.Range(0, s.bs.Len()), + } + d.Skip(s.offsets[i]) + return yoloString(d.UvarintBytes()) > sym + }) + d := encoding.Decbuf{ + B: s.bs.Range(0, s.bs.Len()), + } + if i > 0 { + i-- + } + d.Skip(s.offsets[i]) + res := i * 32 + var lastLen int + var lastSymbol string + for d.Err() == nil && res <= s.seen { + lastLen = d.Len() + lastSymbol = yoloString(d.UvarintBytes()) + if lastSymbol >= sym { + break + } + res++ + } + if d.Err() != nil { + return 0, d.Err() + } + if lastSymbol != sym { + return 0, errors.Errorf("unknown symbol %q", sym) + } + if s.version == FormatV2 { + return uint32(res), nil + } + return uint32(s.bs.Len() - lastLen), nil +} + +func (s Symbols) All() (map[string]struct{}, error) { + d := encoding.NewDecbufAt(s.bs, s.off, castagnoliTable) + cnt := d.Be32int() + res := make(map[string]struct{}, cnt) + for d.Err() == nil && cnt > 0 { + res[d.UvarintStr()] = struct{}{} cnt-- } - return symbolSlice, symbols, errors.Wrap(d.Err(), "read symbols") + if d.Err() != nil { + return nil, d.Err() + } + return res, nil +} + +func (s Symbols) Size() int { + return len(s.offsets) * 8 } // ReadOffsetTable reads an offset table and at the given position calls f for each @@ -1064,60 +1131,65 @@ func (r *Reader) Close() error { } func (r *Reader) lookupSymbol(o uint32) (string, error) { - if int(o) < len(r.symbolsV2) { - return r.symbolsV2[o], nil + if s, ok := r.nameSymbols[o]; ok { + return s, nil } - s, ok := r.symbolsV1[o] - if !ok { - return "", errors.Errorf("unknown symbol offset %d", o) - } - return s, nil + return r.symbols.Lookup(o) } // Symbols returns a set of symbols that exist within the index. func (r *Reader) Symbols() (map[string]struct{}, error) { - res := make(map[string]struct{}, len(r.symbolsV1)+len(r.symbolsV2)) - - for _, s := range r.symbolsV1 { - res[s] = struct{}{} - } - for _, s := range r.symbolsV2 { - res[s] = struct{}{} - } - return res, nil + return r.symbols.All() } // SymbolTableSize returns the symbol table size in bytes. func (r *Reader) SymbolTableSize() uint64 { - return r.symbolsTableSize + return uint64(r.symbols.Size()) } // LabelValues returns value tuples that exist for the given label name tuples. +// It is not safe to use the return value beyond the lifetime of the byte slice +// passed into the Reader. func (r *Reader) LabelValues(names ...string) (StringTuples, error) { - - key := strings.Join(names, labelNameSeparator) - off, ok := r.labels[key] + if len(names) != 1 { + return nil, errors.Errorf("only one label name supported") + } + e, ok := r.postings[names[0]] if !ok { - // XXX(fabxc): hot fix. Should return a partial data error and handle cases - // where the entire block has no data gracefully. return emptyStringTuples{}, nil - //return nil, fmt.Errorf("label index doesn't exist") } + if len(e) == 0 { + return emptyStringTuples{}, nil + } + values := make([]string, 0, len(e)*symbolFactor) - d := encoding.NewDecbufAt(r.b, int(off), castagnoliTable) - - nc := d.Be32int() - d.Be32() // consume unused value entry count. + d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) + d.Skip(e[0].off) + lastVal := e[len(e)-1].value + skip := 0 + for d.Err() == nil { + if skip == 0 { + // These are always the same number of bytes, + // and it's faster to skip than parse. + skip = d.Len() + d.Uvarint() // Keycount. + d.UvarintBytes() // Label name. + skip -= d.Len() + } else { + d.Skip(skip) + } + s := yoloString(d.UvarintBytes()) //Label value. + values = append(values, s) + if s == lastVal { + break + } + d.Uvarint64() // Offset. + } if d.Err() != nil { - return nil, errors.Wrap(d.Err(), "read label value index") + return nil, errors.Wrap(d.Err(), "get postings offset entry") } - st := &serializedStringTuples{ - idsCount: nc, - idsBytes: d.Get(), - lookup: r.lookupSymbol, - } - return st, nil + return NewStringTuples(values, 1) } type emptyStringTuples struct{} @@ -1125,16 +1197,6 @@ type emptyStringTuples struct{} func (emptyStringTuples) At(i int) ([]string, error) { return nil, nil } func (emptyStringTuples) Len() int { return 0 } -// LabelIndices returns a slice of label names for which labels or label tuples value indices exist. -// NOTE: This is deprecated. Use `LabelNames()` instead. -func (r *Reader) LabelIndices() ([][]string, error) { - var res [][]string - for s := range r.labels { - res = append(res, strings.Split(s, labelNameSeparator)) - } - return res, nil -} - // Series reads the series with the given ID and writes its labels and chunks into lbls and chks. func (r *Reader) Series(id uint64, lbls *labels.Labels, chks *[]chunks.Meta) error { offset := id @@ -1241,22 +1303,12 @@ func (r *Reader) Size() int64 { // LabelNames returns all the unique label names present in the index. func (r *Reader) LabelNames() ([]string, error) { - labelNamesMap := make(map[string]struct{}, len(r.labels)) - for key := range r.labels { - // 'key' contains the label names concatenated with the - // delimiter 'labelNameSeparator'. - names := strings.Split(key, labelNameSeparator) - for _, name := range names { - if name == allPostingsKey.Name { - // This is not from any metric. - // It is basically an empty label name. - continue - } - labelNamesMap[name] = struct{}{} + labelNames := make([]string, 0, len(r.postings)) + for name := range r.postings { + if name == allPostingsKey.Name { + // This is not from any metric. + continue } - } - labelNames := make([]string, 0, len(labelNamesMap)) - for name := range labelNamesMap { labelNames = append(labelNames, name) } sort.Strings(labelNames) @@ -1307,35 +1359,6 @@ func (t *stringTuples) Less(i, j int) bool { return false } -type serializedStringTuples struct { - idsCount int - idsBytes []byte // bytes containing the ids pointing to the string in the lookup table. - lookup func(uint32) (string, error) -} - -func (t *serializedStringTuples) Len() int { - return len(t.idsBytes) / (4 * t.idsCount) -} - -func (t *serializedStringTuples) At(i int) ([]string, error) { - if len(t.idsBytes) < (i+t.idsCount)*4 { - return nil, encoding.ErrInvalidSize - } - res := make([]string, 0, t.idsCount) - - for k := 0; k < t.idsCount; k++ { - offset := binary.BigEndian.Uint32(t.idsBytes[(i+k)*4:]) - - s, err := t.lookup(offset) - if err != nil { - return nil, errors.Wrap(err, "symbol lookup") - } - res = append(res, s) - } - - return res, nil -} - // Decoder provides decoding methods for the v1 and v2 index file format. // // It currently does not contain decoding methods for all entry types but can be extended @@ -1418,3 +1441,7 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]chunks.Meta) e } return d.Err() } + +func yoloString(b []byte) string { + return *((*string)(unsafe.Pointer(&b))) +} diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index d60af036cf..408fa78c2b 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -138,14 +138,6 @@ func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) return nil } -func (m mockIndex) LabelIndices() ([][]string, error) { - res := make([][]string, 0, len(m.labelIndex)) - for k := range m.labelIndex { - res = append(res, []string{k}) - } - return res, nil -} - func TestIndexRW_Create_Open(t *testing.T) { dir, err := ioutil.TempDir("", "test_index_create") testutil.Ok(t, err) @@ -211,10 +203,8 @@ func TestIndexRW_Postings(t *testing.T) { testutil.Ok(t, iw.AddSeries(3, series[2])) testutil.Ok(t, iw.AddSeries(4, series[3])) - err = iw.WriteLabelIndex([]string{"a"}, []string{"1"}) - testutil.Ok(t, err) - err = iw.WriteLabelIndex([]string{"b"}, []string{"1", "2", "3", "4"}) - testutil.Ok(t, err) + testutil.Ok(t, iw.WriteLabelIndex([]string{"a"}, []string{"1"})) + testutil.Ok(t, iw.WriteLabelIndex([]string{"b"}, []string{"1", "2", "3", "4"})) testutil.Ok(t, iw.Close()) @@ -236,6 +226,34 @@ func TestIndexRW_Postings(t *testing.T) { } testutil.Ok(t, p.Err()) + // The label incides are no longer used, so test them by hand here. + labelIndices := map[string][]string{} + testutil.Ok(t, ReadOffsetTable(ir.b, ir.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error { + if len(key) != 1 { + return errors.Errorf("unexpected key length for label indices table %d", len(key)) + } + + d := encoding.NewDecbufAt(ir.b, int(off), castagnoliTable) + vals := []string{} + nc := d.Be32int() + if nc != 1 { + return errors.Errorf("unexpected nuumber of label indices table names %d", nc) + } + for i := d.Be32(); i > 0; i-- { + v, err := ir.lookupSymbol(d.Be32()) + if err != nil { + return err + } + vals = append(vals, v) + } + labelIndices[key[0]] = vals + return d.Err() + })) + testutil.Equals(t, map[string][]string{ + "a": []string{"1"}, + "b": []string{"1", "2", "3", "4"}, + }, labelIndices) + testutil.Ok(t, ir.Close()) } diff --git a/tsdb/querier.go b/tsdb/querier.go index f1f2c55209..eff193beec 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -35,12 +35,9 @@ type Querier interface { Select(...*labels.Matcher) (SeriesSet, error) // LabelValues returns all potential values for a label name. + // It is not safe to use the strings beyond the lifefime of the querier. LabelValues(string) ([]string, error) - // LabelValuesFor returns all potential values for a label name. - // under the constraint of another label. - LabelValuesFor(string, labels.Label) ([]string, error) - // LabelNames returns all the unique label names present in the block in sorted order. LabelNames() ([]string, error) @@ -108,10 +105,6 @@ func (q *querier) lvals(qs []Querier, n string) ([]string, error) { return mergeStrings(s1, s2), nil } -func (q *querier) LabelValuesFor(string, labels.Label) ([]string, error) { - return nil, fmt.Errorf("not implemented") -} - func (q *querier) Select(ms ...*labels.Matcher) (SeriesSet, error) { if len(q.blocks) == 0 { return EmptySeriesSet(), nil diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 03fd867f04..df8a0cb3b4 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -1408,14 +1408,6 @@ func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) return nil } -func (m mockIndex) LabelIndices() ([][]string, error) { - res := make([][]string, 0, len(m.labelIndex)) - for k := range m.labelIndex { - res = append(res, []string{k}) - } - return res, nil -} - func (m mockIndex) LabelNames() ([]string, error) { labelNames := make([]string, 0, len(m.labelIndex)) for name := range m.labelIndex { diff --git a/tsdb/testdata/index_format_v1/chunks/000001 b/tsdb/testdata/index_format_v1/chunks/000001 new file mode 100644 index 0000000000000000000000000000000000000000..1f2250b865ef7234a1bcc92dee7f76e033aadc03 GIT binary patch literal 44 ncmZqO>u{Hmfq{XCk%5uP0muM@$f`?aAQ3hP0jS8o#C5I!rJ@Id literal 0 HcmV?d00001 diff --git a/tsdb/testdata/index_format_v1/index b/tsdb/testdata/index_format_v1/index new file mode 100644 index 0000000000000000000000000000000000000000..8804430fbcbc37b42932853488e5fd494e57bb8d GIT binary patch literal 238 zcmdnB>N*1>0|SEq5Hm9;B^Cj36?0mCzVvtbJa$G=UPdMc4sVkHCLmjok&QvBuYYeSO05M3dth9gFEF`|>8?z1*AfF2) z&j_;DLfOS}3y>!R#LPgW1DHUfAeW`USXCu;zg{i?%7Xwagkk{M1_GuKilGHc2SDj* JGkz-^2LQA1BPsv@ literal 0 HcmV?d00001 diff --git a/tsdb/testdata/index_format_v1/meta.json b/tsdb/testdata/index_format_v1/meta.json new file mode 100644 index 0000000000..62347db26c --- /dev/null +++ b/tsdb/testdata/index_format_v1/meta.json @@ -0,0 +1,17 @@ +{ + "version": 1, + "ulid": "01DVZX4CHY2EGZ6JQVS80AB9CF", + "minTime": 0, + "maxTime": 7200000, + "stats": { + "numSamples": 2, + "numSeries": 2, + "numChunks": 2 + }, + "compaction": { + "level": 1, + "sources": [ + "01DVZX4CHY2EGZ6JQVS80AB9CF" + ] + } +} diff --git a/tsdb/testdata/index_format_v1/tombstones b/tsdb/testdata/index_format_v1/tombstones new file mode 100644 index 0000000000000000000000000000000000000000..95fb83272e6f55edeee7e5d86bdec5fcd217eae0 GIT binary patch literal 9 OcmZQH*k!=T00aOB%>f+% literal 0 HcmV?d00001 diff --git a/web/api/v1/api.go b/web/api/v1/api.go index fc03e063b6..4909f98315 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -471,14 +471,17 @@ func (api *API) labelValues(r *http.Request) apiFuncResult { if err != nil { return apiFuncResult{nil, &apiError{errorExec, err}, nil, nil} } - defer q.Close() + + closer := func() { + q.Close() + } vals, warnings, err := q.LabelValues(name) if err != nil { - return apiFuncResult{nil, &apiError{errorExec, err}, warnings, nil} + return apiFuncResult{nil, &apiError{errorExec, err}, warnings, closer} } - return apiFuncResult{vals, nil, warnings, nil} + return apiFuncResult{vals, nil, warnings, closer} } var (