mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-12 16:44:05 -08:00
Merge pull request #262 from cstyan/callum-249
Reduce index file size caused by symbol references by using indicies rather than offsets
This commit is contained in:
commit
467948f3c3
|
@ -43,7 +43,7 @@ Most of the sections described below start with a `len` field. It always specifi
|
||||||
The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size.
|
The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size.
|
||||||
|
|
||||||
The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded.
|
The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded.
|
||||||
Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order.
|
Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order.
|
||||||
|
|
||||||
```
|
```
|
||||||
┌────────────────────┬─────────────────────┐
|
┌────────────────────┬─────────────────────┐
|
||||||
|
|
|
@ -273,17 +273,18 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
|
||||||
w.buf2.putUvarint(len(lset))
|
w.buf2.putUvarint(len(lset))
|
||||||
|
|
||||||
for _, l := range lset {
|
for _, l := range lset {
|
||||||
offset, ok := w.symbols[l.Name]
|
// here we have an index for the symbol file if v2, otherwise it's an offset
|
||||||
|
index, ok := w.symbols[l.Name]
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.Errorf("symbol entry for %q does not exist", l.Name)
|
return errors.Errorf("symbol entry for %q does not exist", l.Name)
|
||||||
}
|
}
|
||||||
w.buf2.putUvarint32(offset)
|
w.buf2.putUvarint32(index)
|
||||||
|
|
||||||
offset, ok = w.symbols[l.Value]
|
index, ok = w.symbols[l.Value]
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.Errorf("symbol entry for %q does not exist", l.Value)
|
return errors.Errorf("symbol entry for %q does not exist", l.Value)
|
||||||
}
|
}
|
||||||
w.buf2.putUvarint32(offset)
|
w.buf2.putUvarint32(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.buf2.putUvarint(len(chunks))
|
w.buf2.putUvarint(len(chunks))
|
||||||
|
@ -341,8 +342,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
|
||||||
|
|
||||||
w.symbols = make(map[string]uint32, len(symbols))
|
w.symbols = make(map[string]uint32, len(symbols))
|
||||||
|
|
||||||
for _, s := range symbols {
|
for index, s := range symbols {
|
||||||
w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len())
|
w.symbols[s] = uint32(index)
|
||||||
w.buf2.putUvarintStr(s)
|
w.buf2.putUvarintStr(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -381,12 +382,13 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
|
||||||
w.buf2.putBE32int(len(names))
|
w.buf2.putBE32int(len(names))
|
||||||
w.buf2.putBE32int(valt.Len())
|
w.buf2.putBE32int(valt.Len())
|
||||||
|
|
||||||
|
// here we have an index for the symbol file if v2, otherwise it's an offset
|
||||||
for _, v := range valt.s {
|
for _, v := range valt.s {
|
||||||
offset, ok := w.symbols[v]
|
index, ok := w.symbols[v]
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.Errorf("symbol entry for %q does not exist", v)
|
return errors.Errorf("symbol entry for %q does not exist", v)
|
||||||
}
|
}
|
||||||
w.buf2.putBE32(offset)
|
w.buf2.putBE32(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.buf1.reset()
|
w.buf1.reset()
|
||||||
|
@ -756,11 +758,20 @@ func (r *Reader) readSymbols(off int) error {
|
||||||
basePos = uint32(off) + 4
|
basePos = uint32(off) + 4
|
||||||
nextPos = basePos + uint32(origLen-d.len())
|
nextPos = basePos + uint32(origLen-d.len())
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if r.version == 2 {
|
||||||
|
nextPos = 0
|
||||||
|
}
|
||||||
|
|
||||||
for d.err() == nil && d.len() > 0 && cnt > 0 {
|
for d.err() == nil && d.len() > 0 && cnt > 0 {
|
||||||
s := d.uvarintStr()
|
s := d.uvarintStr()
|
||||||
r.symbols[uint32(nextPos)] = s
|
r.symbols[uint32(nextPos)] = s
|
||||||
|
|
||||||
nextPos = basePos + uint32(origLen-d.len())
|
if r.version == 2 {
|
||||||
|
nextPos++
|
||||||
|
} else {
|
||||||
|
nextPos = basePos + uint32(origLen-d.len())
|
||||||
|
}
|
||||||
cnt--
|
cnt--
|
||||||
}
|
}
|
||||||
return errors.Wrap(d.err(), "read symbols")
|
return errors.Wrap(d.err(), "read symbols")
|
||||||
|
|
Loading…
Reference in a new issue