From a515b162990b5fff89baefa76933a942752d0d38 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 16 Jan 2018 19:19:01 -0800 Subject: [PATCH 1/4] use an incrementing index rather than an offset for symbols table --- index/index.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/index/index.go b/index/index.go index df8d7b5fe..73c7a2a1f 100644 --- a/index/index.go +++ b/index/index.go @@ -273,17 +273,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta w.buf2.putUvarint(len(lset)) for _, l := range lset { - offset, ok := w.symbols[l.Name] + index, ok := w.symbols[l.Name] if !ok { return errors.Errorf("symbol entry for %q does not exist", l.Name) } - w.buf2.putUvarint32(offset) + w.buf2.putUvarint32(index) - offset, ok = w.symbols[l.Value] + index, ok = w.symbols[l.Value] if !ok { return errors.Errorf("symbol entry for %q does not exist", l.Value) } - w.buf2.putUvarint32(offset) + w.buf2.putUvarint32(index) } w.buf2.putUvarint(len(chunks)) @@ -341,8 +341,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error { w.symbols = make(map[string]uint32, len(symbols)) - for _, s := range symbols { - w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len()) + for index, s := range symbols { + w.symbols[s] = uint32(index) w.buf2.putUvarintStr(s) } @@ -382,11 +382,11 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error { w.buf2.putBE32int(valt.Len()) for _, v := range valt.s { - offset, ok := w.symbols[v] + index, ok := w.symbols[v] if !ok { return errors.Errorf("symbol entry for %q does not exist", v) } - w.buf2.putBE32(offset) + w.buf2.putBE32(index) } w.buf1.reset() @@ -751,16 +751,14 @@ func (r *Reader) readSymbols(off int) error { d := r.decbufAt(off) var ( - origLen = d.len() cnt = d.be32int() - basePos = uint32(off) + 4 - nextPos = basePos + uint32(origLen-d.len()) + nextPos = 0 ) for d.err() == nil && d.len() > 0 && cnt > 0 { s := d.uvarintStr() r.symbols[uint32(nextPos)] = s - nextPos = basePos + uint32(origLen-d.len()) + nextPos++ cnt-- } return d.err() From 4792d2bbd1d9113a5b653799b8e37ae949797dd6 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 16 Jan 2018 20:37:57 -0800 Subject: [PATCH 2/4] handle v1 vs v2 for symbol offset vs index changes --- index/index.go | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/index/index.go b/index/index.go index 73c7a2a1f..e77dd8a3d 100644 --- a/index/index.go +++ b/index/index.go @@ -273,6 +273,7 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta w.buf2.putUvarint(len(lset)) for _, l := range lset { + // here we have an index for the symbol file if v2, otherwise it's an offset index, ok := w.symbols[l.Name] if !ok { return errors.Errorf("symbol entry for %q does not exist", l.Name) @@ -342,7 +343,10 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error { w.symbols = make(map[string]uint32, len(symbols)) for index, s := range symbols { - w.symbols[s] = uint32(index) + w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len()) + if w.Version == 2 { + w.symbols[s] = uint32(index) + } w.buf2.putUvarintStr(s) } @@ -381,6 +385,7 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error { w.buf2.putBE32int(len(names)) w.buf2.putBE32int(valt.Len()) + // here we have an index for the symbol file if v2, otherwise it's an offset for _, v := range valt.s { index, ok := w.symbols[v] if !ok { @@ -751,14 +756,25 @@ func (r *Reader) readSymbols(off int) error { d := r.decbufAt(off) var ( + origLen = d.len() cnt = d.be32int() - nextPos = 0 + basePos = uint32(off) + 4 + nextPos = basePos + uint32(origLen-d.len()) ) + + if r.version == 2 { + nextPos = 0 + } + for d.err() == nil && d.len() > 0 && cnt > 0 { s := d.uvarintStr() r.symbols[uint32(nextPos)] = s - nextPos++ + if r.version == 2 { + nextPos++ + } else { + nextPos = basePos + uint32(origLen-d.len()) + } cnt-- } return d.err() From 87a9415bf173d29cb91a8900f047a266ead368b1 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 16 Jan 2018 20:41:25 -0800 Subject: [PATCH 3/4] update docs for symbols indexing change --- docs/format/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/format/index.md b/docs/format/index.md index 48366ef8d..331c20e16 100644 --- a/docs/format/index.md +++ b/docs/format/index.md @@ -43,7 +43,7 @@ Most of the sections described below start with a `len` field. It always specifi The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size. The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded. -Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order. +Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order. ``` ┌────────────────────┬─────────────────────┐ From cd6758481797fe26ba36f59b06679918e56a8029 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 17 Jan 2018 22:46:22 -0800 Subject: [PATCH 4/4] we don't want to write to index file in older version formats --- index/index.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/index/index.go b/index/index.go index e77dd8a3d..998f009d5 100644 --- a/index/index.go +++ b/index/index.go @@ -343,10 +343,7 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error { w.symbols = make(map[string]uint32, len(symbols)) for index, s := range symbols { - w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len()) - if w.Version == 2 { - w.symbols[s] = uint32(index) - } + w.symbols[s] = uint32(index) w.buf2.putUvarintStr(s) }