mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-27 05:32:27 -08:00
Merge pull request #262 from cstyan/callum-249
Reduce index file size caused by symbol references by using indicies rather than offsets
This commit is contained in:
commit
467948f3c3
|
@ -43,7 +43,7 @@ Most of the sections described below start with a `len` field. It always specifi
|
|||
The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size.
|
||||
|
||||
The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded.
|
||||
Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order.
|
||||
Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order.
|
||||
|
||||
```
|
||||
┌────────────────────┬─────────────────────┐
|
||||
|
|
|
@ -273,17 +273,18 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
|
|||
w.buf2.putUvarint(len(lset))
|
||||
|
||||
for _, l := range lset {
|
||||
offset, ok := w.symbols[l.Name]
|
||||
// here we have an index for the symbol file if v2, otherwise it's an offset
|
||||
index, ok := w.symbols[l.Name]
|
||||
if !ok {
|
||||
return errors.Errorf("symbol entry for %q does not exist", l.Name)
|
||||
}
|
||||
w.buf2.putUvarint32(offset)
|
||||
w.buf2.putUvarint32(index)
|
||||
|
||||
offset, ok = w.symbols[l.Value]
|
||||
index, ok = w.symbols[l.Value]
|
||||
if !ok {
|
||||
return errors.Errorf("symbol entry for %q does not exist", l.Value)
|
||||
}
|
||||
w.buf2.putUvarint32(offset)
|
||||
w.buf2.putUvarint32(index)
|
||||
}
|
||||
|
||||
w.buf2.putUvarint(len(chunks))
|
||||
|
@ -341,8 +342,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
|
|||
|
||||
w.symbols = make(map[string]uint32, len(symbols))
|
||||
|
||||
for _, s := range symbols {
|
||||
w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len())
|
||||
for index, s := range symbols {
|
||||
w.symbols[s] = uint32(index)
|
||||
w.buf2.putUvarintStr(s)
|
||||
}
|
||||
|
||||
|
@ -381,12 +382,13 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
|
|||
w.buf2.putBE32int(len(names))
|
||||
w.buf2.putBE32int(valt.Len())
|
||||
|
||||
// here we have an index for the symbol file if v2, otherwise it's an offset
|
||||
for _, v := range valt.s {
|
||||
offset, ok := w.symbols[v]
|
||||
index, ok := w.symbols[v]
|
||||
if !ok {
|
||||
return errors.Errorf("symbol entry for %q does not exist", v)
|
||||
}
|
||||
w.buf2.putBE32(offset)
|
||||
w.buf2.putBE32(index)
|
||||
}
|
||||
|
||||
w.buf1.reset()
|
||||
|
@ -756,11 +758,20 @@ func (r *Reader) readSymbols(off int) error {
|
|||
basePos = uint32(off) + 4
|
||||
nextPos = basePos + uint32(origLen-d.len())
|
||||
)
|
||||
|
||||
if r.version == 2 {
|
||||
nextPos = 0
|
||||
}
|
||||
|
||||
for d.err() == nil && d.len() > 0 && cnt > 0 {
|
||||
s := d.uvarintStr()
|
||||
r.symbols[uint32(nextPos)] = s
|
||||
|
||||
nextPos = basePos + uint32(origLen-d.len())
|
||||
if r.version == 2 {
|
||||
nextPos++
|
||||
} else {
|
||||
nextPos = basePos + uint32(origLen-d.len())
|
||||
}
|
||||
cnt--
|
||||
}
|
||||
return errors.Wrap(d.err(), "read symbols")
|
||||
|
|
Loading…
Reference in a new issue