Reduce memory taken up by posting/symbol tables.

Reuse the string already allocated for symbols
in the posting tables.

Use a slice for symbols in v2 format.

Move symbol size logic into the index code.
Avoid duplication of lookupSymbol logic.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2018-11-02 09:52:45 +00:00
parent 88ebd749dd
commit c93e261466
2 changed files with 37 additions and 38 deletions

View file

@ -15,7 +15,6 @@
package tsdb package tsdb
import ( import (
"encoding/binary"
"encoding/json" "encoding/json"
"io/ioutil" "io/ioutil"
"os" "os"
@ -278,23 +277,13 @@ func OpenBlock(dir string, pool chunkenc.Pool) (*Block, error) {
return nil, err return nil, err
} }
// Calculating symbol table size.
tmp := make([]byte, 8)
symTblSize := uint64(0)
for _, v := range ir.SymbolTable() {
// Size of varint length of the symbol.
symTblSize += uint64(binary.PutUvarint(tmp, uint64(len(v))))
// Size of the symbol.
symTblSize += uint64(len(v))
}
pb := &Block{ pb := &Block{
dir: dir, dir: dir,
meta: *meta, meta: *meta,
chunkr: cr, chunkr: cr,
indexr: ir, indexr: ir,
tombstones: tr, tombstones: tr,
symbolTableSize: symTblSize, symbolTableSize: ir.SymbolTableSize(),
} }
return pb, nil return pb, nil
} }

View file

@ -549,8 +549,10 @@ type Reader struct {
// block are always backed by true strings held in here rather than // block are always backed by true strings held in here rather than
// strings that are backed by byte slices from the mmap'd index file. This // strings that are backed by byte slices from the mmap'd index file. This
// prevents memory faults when applications work with read symbols after // prevents memory faults when applications work with read symbols after
// the block has been unmapped. // the block has been unmapped. The older format has sparse indexes so a map
// must be used, but the new format is not so we can use a slice.
symbols map[uint32]string symbols map[uint32]string
symbolSlice []string
dec *Decoder dec *Decoder
@ -631,11 +633,21 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
} }
var err error var err error
// Use the strings already allocated by symbols, rather than
// re-allocating them again below.
symbols := make(map[string]string, len(r.symbols)+len(r.symbolSlice))
for _, s := range r.symbols {
symbols[s] = s
}
for _, s := range r.symbolSlice {
symbols[s] = s
}
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error { err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
if len(key) != 1 { if len(key) != 1 {
return errors.Errorf("unexpected key length %d", len(key)) return errors.Errorf("unexpected key length %d", len(key))
} }
r.labels[key[0]] = off r.labels[symbols[key[0]]] = off
return nil return nil
}) })
if err != nil { if err != nil {
@ -645,14 +657,14 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
if len(key) != 2 { if len(key) != 2 {
return errors.Errorf("unexpected key length %d", len(key)) return errors.Errorf("unexpected key length %d", len(key))
} }
r.postings[labels.Label{Name: key[0], Value: key[1]}] = off r.postings[labels.Label{Name: symbols[key[0]], Value: symbols[key[1]]}] = off
return nil return nil
}) })
if err != nil { if err != nil {
return nil, errors.Wrap(err, "read postings table") return nil, errors.Wrap(err, "read postings table")
} }
r.dec = &Decoder{symbols: r.symbols} r.dec = &Decoder{lookupSymbol: r.lookupSymbol}
return r, nil return r, nil
} }
@ -777,18 +789,17 @@ func (r *Reader) readSymbols(off int) error {
basePos = uint32(off) + 4 basePos = uint32(off) + 4
nextPos = basePos + uint32(origLen-d.len()) nextPos = basePos + uint32(origLen-d.len())
) )
if r.version == 2 { if r.version == 2 {
nextPos = 0 r.symbolSlice = make([]string, 0, cnt)
} }
for d.err() == nil && d.len() > 0 && cnt > 0 { for d.err() == nil && d.len() > 0 && cnt > 0 {
s := d.uvarintStr() s := d.uvarintStr()
r.symbols[nextPos] = s
if r.version == 2 { if r.version == 2 {
nextPos++ r.symbolSlice = append(r.symbolSlice, s)
} else { } else {
r.symbols[nextPos] = s
nextPos = basePos + uint32(origLen-d.len()) nextPos = basePos + uint32(origLen-d.len())
} }
cnt-- cnt--
@ -828,6 +839,9 @@ func (r *Reader) Close() error {
} }
func (r *Reader) lookupSymbol(o uint32) (string, error) { func (r *Reader) lookupSymbol(o uint32) (string, error) {
if int(o) < len(r.symbolSlice) {
return r.symbolSlice[o], nil
}
s, ok := r.symbols[o] s, ok := r.symbols[o]
if !ok { if !ok {
return "", errors.Errorf("unknown symbol offset %d", o) return "", errors.Errorf("unknown symbol offset %d", o)
@ -842,12 +856,22 @@ func (r *Reader) Symbols() (map[string]struct{}, error) {
for _, s := range r.symbols { for _, s := range r.symbols {
res[s] = struct{}{} res[s] = struct{}{}
} }
for _, s := range r.symbolSlice {
res[s] = struct{}{}
}
return res, nil return res, nil
} }
// SymbolTable returns the symbol table that is used to resolve symbol references. // SymbolTable returns the symbol table that is used to resolve symbol references.
func (r *Reader) SymbolTable() map[uint32]string { func (r *Reader) SymbolTableSize() uint64 {
return r.symbols var size int
for _, s := range r.symbols {
size += len(s) + 8
}
for _, s := range r.symbolSlice {
size += len(s) + 8
}
return uint64(size)
} }
// LabelValues returns value tuples that exist for the given label name tuples. // LabelValues returns value tuples that exist for the given label name tuples.
@ -1031,21 +1055,7 @@ func (t *serializedStringTuples) At(i int) ([]string, error) {
// It currently does not contain decoding methods for all entry types but can be extended // It currently does not contain decoding methods for all entry types but can be extended
// by them if there's demand. // by them if there's demand.
type Decoder struct { type Decoder struct {
symbols map[uint32]string lookupSymbol func(uint32) (string, error)
}
func (dec *Decoder) lookupSymbol(o uint32) (string, error) {
s, ok := dec.symbols[o]
if !ok {
return "", errors.Errorf("unknown symbol offset %d", o)
}
return s, nil
}
// SetSymbolTable set the symbol table to be used for lookups when decoding series
// and label indices
func (dec *Decoder) SetSymbolTable(t map[uint32]string) {
dec.symbols = t
} }
// Postings returns a postings list for b and its number of elements. // Postings returns a postings list for b and its number of elements.