mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-26 05:01:23 -08:00
Reduce memory taken up by posting/symbol tables.
Reuse the string already allocated for symbols in the posting tables. Use a slice for symbols in v2 format. Move symbol size logic into the index code. Avoid duplication of lookupSymbol logic. Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
parent
88ebd749dd
commit
c93e261466
13
block.go
13
block.go
|
@ -15,7 +15,6 @@
|
||||||
package tsdb
|
package tsdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
@ -278,23 +277,13 @@ func OpenBlock(dir string, pool chunkenc.Pool) (*Block, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculating symbol table size.
|
|
||||||
tmp := make([]byte, 8)
|
|
||||||
symTblSize := uint64(0)
|
|
||||||
for _, v := range ir.SymbolTable() {
|
|
||||||
// Size of varint length of the symbol.
|
|
||||||
symTblSize += uint64(binary.PutUvarint(tmp, uint64(len(v))))
|
|
||||||
// Size of the symbol.
|
|
||||||
symTblSize += uint64(len(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
pb := &Block{
|
pb := &Block{
|
||||||
dir: dir,
|
dir: dir,
|
||||||
meta: *meta,
|
meta: *meta,
|
||||||
chunkr: cr,
|
chunkr: cr,
|
||||||
indexr: ir,
|
indexr: ir,
|
||||||
tombstones: tr,
|
tombstones: tr,
|
||||||
symbolTableSize: symTblSize,
|
symbolTableSize: ir.SymbolTableSize(),
|
||||||
}
|
}
|
||||||
return pb, nil
|
return pb, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -549,8 +549,10 @@ type Reader struct {
|
||||||
// block are always backed by true strings held in here rather than
|
// block are always backed by true strings held in here rather than
|
||||||
// strings that are backed by byte slices from the mmap'd index file. This
|
// strings that are backed by byte slices from the mmap'd index file. This
|
||||||
// prevents memory faults when applications work with read symbols after
|
// prevents memory faults when applications work with read symbols after
|
||||||
// the block has been unmapped.
|
// the block has been unmapped. The older format has sparse indexes so a map
|
||||||
|
// must be used, but the new format is not so we can use a slice.
|
||||||
symbols map[uint32]string
|
symbols map[uint32]string
|
||||||
|
symbolSlice []string
|
||||||
|
|
||||||
dec *Decoder
|
dec *Decoder
|
||||||
|
|
||||||
|
@ -631,11 +633,21 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
// Use the strings already allocated by symbols, rather than
|
||||||
|
// re-allocating them again below.
|
||||||
|
symbols := make(map[string]string, len(r.symbols)+len(r.symbolSlice))
|
||||||
|
for _, s := range r.symbols {
|
||||||
|
symbols[s] = s
|
||||||
|
}
|
||||||
|
for _, s := range r.symbolSlice {
|
||||||
|
symbols[s] = s
|
||||||
|
}
|
||||||
|
|
||||||
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
|
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
|
||||||
if len(key) != 1 {
|
if len(key) != 1 {
|
||||||
return errors.Errorf("unexpected key length %d", len(key))
|
return errors.Errorf("unexpected key length %d", len(key))
|
||||||
}
|
}
|
||||||
r.labels[key[0]] = off
|
r.labels[symbols[key[0]]] = off
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -645,14 +657,14 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
|
||||||
if len(key) != 2 {
|
if len(key) != 2 {
|
||||||
return errors.Errorf("unexpected key length %d", len(key))
|
return errors.Errorf("unexpected key length %d", len(key))
|
||||||
}
|
}
|
||||||
r.postings[labels.Label{Name: key[0], Value: key[1]}] = off
|
r.postings[labels.Label{Name: symbols[key[0]], Value: symbols[key[1]]}] = off
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "read postings table")
|
return nil, errors.Wrap(err, "read postings table")
|
||||||
}
|
}
|
||||||
|
|
||||||
r.dec = &Decoder{symbols: r.symbols}
|
r.dec = &Decoder{lookupSymbol: r.lookupSymbol}
|
||||||
|
|
||||||
return r, nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
@ -777,18 +789,17 @@ func (r *Reader) readSymbols(off int) error {
|
||||||
basePos = uint32(off) + 4
|
basePos = uint32(off) + 4
|
||||||
nextPos = basePos + uint32(origLen-d.len())
|
nextPos = basePos + uint32(origLen-d.len())
|
||||||
)
|
)
|
||||||
|
|
||||||
if r.version == 2 {
|
if r.version == 2 {
|
||||||
nextPos = 0
|
r.symbolSlice = make([]string, 0, cnt)
|
||||||
}
|
}
|
||||||
|
|
||||||
for d.err() == nil && d.len() > 0 && cnt > 0 {
|
for d.err() == nil && d.len() > 0 && cnt > 0 {
|
||||||
s := d.uvarintStr()
|
s := d.uvarintStr()
|
||||||
r.symbols[nextPos] = s
|
|
||||||
|
|
||||||
if r.version == 2 {
|
if r.version == 2 {
|
||||||
nextPos++
|
r.symbolSlice = append(r.symbolSlice, s)
|
||||||
} else {
|
} else {
|
||||||
|
r.symbols[nextPos] = s
|
||||||
nextPos = basePos + uint32(origLen-d.len())
|
nextPos = basePos + uint32(origLen-d.len())
|
||||||
}
|
}
|
||||||
cnt--
|
cnt--
|
||||||
|
@ -828,6 +839,9 @@ func (r *Reader) Close() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Reader) lookupSymbol(o uint32) (string, error) {
|
func (r *Reader) lookupSymbol(o uint32) (string, error) {
|
||||||
|
if int(o) < len(r.symbolSlice) {
|
||||||
|
return r.symbolSlice[o], nil
|
||||||
|
}
|
||||||
s, ok := r.symbols[o]
|
s, ok := r.symbols[o]
|
||||||
if !ok {
|
if !ok {
|
||||||
return "", errors.Errorf("unknown symbol offset %d", o)
|
return "", errors.Errorf("unknown symbol offset %d", o)
|
||||||
|
@ -842,12 +856,22 @@ func (r *Reader) Symbols() (map[string]struct{}, error) {
|
||||||
for _, s := range r.symbols {
|
for _, s := range r.symbols {
|
||||||
res[s] = struct{}{}
|
res[s] = struct{}{}
|
||||||
}
|
}
|
||||||
|
for _, s := range r.symbolSlice {
|
||||||
|
res[s] = struct{}{}
|
||||||
|
}
|
||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SymbolTable returns the symbol table that is used to resolve symbol references.
|
// SymbolTable returns the symbol table that is used to resolve symbol references.
|
||||||
func (r *Reader) SymbolTable() map[uint32]string {
|
func (r *Reader) SymbolTableSize() uint64 {
|
||||||
return r.symbols
|
var size int
|
||||||
|
for _, s := range r.symbols {
|
||||||
|
size += len(s) + 8
|
||||||
|
}
|
||||||
|
for _, s := range r.symbolSlice {
|
||||||
|
size += len(s) + 8
|
||||||
|
}
|
||||||
|
return uint64(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
// LabelValues returns value tuples that exist for the given label name tuples.
|
// LabelValues returns value tuples that exist for the given label name tuples.
|
||||||
|
@ -1031,21 +1055,7 @@ func (t *serializedStringTuples) At(i int) ([]string, error) {
|
||||||
// It currently does not contain decoding methods for all entry types but can be extended
|
// It currently does not contain decoding methods for all entry types but can be extended
|
||||||
// by them if there's demand.
|
// by them if there's demand.
|
||||||
type Decoder struct {
|
type Decoder struct {
|
||||||
symbols map[uint32]string
|
lookupSymbol func(uint32) (string, error)
|
||||||
}
|
|
||||||
|
|
||||||
func (dec *Decoder) lookupSymbol(o uint32) (string, error) {
|
|
||||||
s, ok := dec.symbols[o]
|
|
||||||
if !ok {
|
|
||||||
return "", errors.Errorf("unknown symbol offset %d", o)
|
|
||||||
}
|
|
||||||
return s, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetSymbolTable set the symbol table to be used for lookups when decoding series
|
|
||||||
// and label indices
|
|
||||||
func (dec *Decoder) SetSymbolTable(t map[uint32]string) {
|
|
||||||
dec.symbols = t
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Postings returns a postings list for b and its number of elements.
|
// Postings returns a postings list for b and its number of elements.
|
||||||
|
|
Loading…
Reference in a new issue