Reduce memory taken up by posting/symbol tables.

Reuse the string already allocated for symbols
in the posting tables.

Use a slice for symbols in v2 format.

Move symbol size logic into the index code.
Avoid duplication of lookupSymbol logic.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2018-11-02 09:52:45 +00:00
parent 88ebd749dd
commit c93e261466
2 changed files with 37 additions and 38 deletions

View file

@ -15,7 +15,6 @@
package tsdb
import (
"encoding/binary"
"encoding/json"
"io/ioutil"
"os"
@ -278,23 +277,13 @@ func OpenBlock(dir string, pool chunkenc.Pool) (*Block, error) {
return nil, err
}
// Calculating symbol table size.
tmp := make([]byte, 8)
symTblSize := uint64(0)
for _, v := range ir.SymbolTable() {
// Size of varint length of the symbol.
symTblSize += uint64(binary.PutUvarint(tmp, uint64(len(v))))
// Size of the symbol.
symTblSize += uint64(len(v))
}
pb := &Block{
dir: dir,
meta: *meta,
chunkr: cr,
indexr: ir,
tombstones: tr,
symbolTableSize: symTblSize,
symbolTableSize: ir.SymbolTableSize(),
}
return pb, nil
}

View file

@ -549,8 +549,10 @@ type Reader struct {
// block are always backed by true strings held in here rather than
// strings that are backed by byte slices from the mmap'd index file. This
// prevents memory faults when applications work with read symbols after
// the block has been unmapped.
symbols map[uint32]string
// the block has been unmapped. The older format has sparse indexes so a map
// must be used, but the new format is not so we can use a slice.
symbols map[uint32]string
symbolSlice []string
dec *Decoder
@ -631,11 +633,21 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
}
var err error
// Use the strings already allocated by symbols, rather than
// re-allocating them again below.
symbols := make(map[string]string, len(r.symbols)+len(r.symbolSlice))
for _, s := range r.symbols {
symbols[s] = s
}
for _, s := range r.symbolSlice {
symbols[s] = s
}
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
if len(key) != 1 {
return errors.Errorf("unexpected key length %d", len(key))
}
r.labels[key[0]] = off
r.labels[symbols[key[0]]] = off
return nil
})
if err != nil {
@ -645,14 +657,14 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
if len(key) != 2 {
return errors.Errorf("unexpected key length %d", len(key))
}
r.postings[labels.Label{Name: key[0], Value: key[1]}] = off
r.postings[labels.Label{Name: symbols[key[0]], Value: symbols[key[1]]}] = off
return nil
})
if err != nil {
return nil, errors.Wrap(err, "read postings table")
}
r.dec = &Decoder{symbols: r.symbols}
r.dec = &Decoder{lookupSymbol: r.lookupSymbol}
return r, nil
}
@ -777,18 +789,17 @@ func (r *Reader) readSymbols(off int) error {
basePos = uint32(off) + 4
nextPos = basePos + uint32(origLen-d.len())
)
if r.version == 2 {
nextPos = 0
r.symbolSlice = make([]string, 0, cnt)
}
for d.err() == nil && d.len() > 0 && cnt > 0 {
s := d.uvarintStr()
r.symbols[nextPos] = s
if r.version == 2 {
nextPos++
r.symbolSlice = append(r.symbolSlice, s)
} else {
r.symbols[nextPos] = s
nextPos = basePos + uint32(origLen-d.len())
}
cnt--
@ -828,6 +839,9 @@ func (r *Reader) Close() error {
}
func (r *Reader) lookupSymbol(o uint32) (string, error) {
if int(o) < len(r.symbolSlice) {
return r.symbolSlice[o], nil
}
s, ok := r.symbols[o]
if !ok {
return "", errors.Errorf("unknown symbol offset %d", o)
@ -842,12 +856,22 @@ func (r *Reader) Symbols() (map[string]struct{}, error) {
for _, s := range r.symbols {
res[s] = struct{}{}
}
for _, s := range r.symbolSlice {
res[s] = struct{}{}
}
return res, nil
}
// SymbolTable returns the symbol table that is used to resolve symbol references.
func (r *Reader) SymbolTable() map[uint32]string {
return r.symbols
func (r *Reader) SymbolTableSize() uint64 {
var size int
for _, s := range r.symbols {
size += len(s) + 8
}
for _, s := range r.symbolSlice {
size += len(s) + 8
}
return uint64(size)
}
// LabelValues returns value tuples that exist for the given label name tuples.
@ -1031,21 +1055,7 @@ func (t *serializedStringTuples) At(i int) ([]string, error) {
// It currently does not contain decoding methods for all entry types but can be extended
// by them if there's demand.
type Decoder struct {
symbols map[uint32]string
}
func (dec *Decoder) lookupSymbol(o uint32) (string, error) {
s, ok := dec.symbols[o]
if !ok {
return "", errors.Errorf("unknown symbol offset %d", o)
}
return s, nil
}
// SetSymbolTable set the symbol table to be used for lookups when decoding series
// and label indices
func (dec *Decoder) SetSymbolTable(t map[uint32]string) {
dec.symbols = t
lookupSymbol func(uint32) (string, error)
}
// Postings returns a postings list for b and its number of elements.