From b09d90c79c4821063f9151daf82c4266f73f9c9e Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Fri, 8 Sep 2017 15:15:12 +0200 Subject: [PATCH] Add decoding method to retrieve unsafe strings When decoding data from mmaped blocks, we would like to retrieve a string backed by the mmaped region. As the underlying byte slice never changes, this is safe. --- encoding_helpers.go | 16 ++++++++++++++++ index.go | 10 +++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/encoding_helpers.go b/encoding_helpers.go index 9aa4ba409..17c3ff081 100644 --- a/encoding_helpers.go +++ b/encoding_helpers.go @@ -77,6 +77,22 @@ func (d *decbuf) uvarint32() uint32 { return uint32(d.uvarint64()) } func (d *decbuf) be32int() int { return int(d.be32()) } func (d *decbuf) be64int64() int64 { return int64(d.be64()) } +// uvarintTempStr decodes like uvarintStr but the returned string is +// not safe to use if the underyling buffer changes. +func (d *decbuf) uvarintTempStr() string { + l := d.uvarint64() + if d.e != nil { + return "" + } + if len(d.b) < int(l) { + d.e = errInvalidSize + return "" + } + s := yoloString(d.b[:l]) + d.b = d.b[l:] + return s +} + func (d *decbuf) uvarintStr() string { l := d.uvarint64() if d.e != nil { diff --git a/index.go b/index.go index ddc2c4f52..e65f3fec1 100644 --- a/index.go +++ b/index.go @@ -335,10 +335,6 @@ func (w *indexWriter) AddSymbols(sym map[string]struct{}) error { for _, s := range symbols { w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len()) - - // NOTE: len(s) gives the number of runes, not the number of bytes. - // Therefore the read-back length for strings with unicode characters will - // be off when not using putUvarintStr. w.buf2.putUvarintStr(s) } @@ -636,7 +632,7 @@ func (r *indexReader) readOffsetTable(off uint64) (map[string]uint32, error) { keys := make([]string, 0, keyCount) for i := 0; i < keyCount; i++ { - keys = append(keys, d2.uvarintStr()) + keys = append(keys, d2.uvarintTempStr()) } res[strings.Join(keys, sep)] = uint32(d2.uvarint()) @@ -673,7 +669,7 @@ func (r *indexReader) section(o uint32) (byte, []byte, error) { func (r *indexReader) lookupSymbol(o uint32) (string, error) { d := r.decbufAt(int(o)) - s := d.uvarintStr() + s := d.uvarintTempStr() if d.err() != nil { return "", errors.Wrapf(d.err(), "read symbol at %d", o) } @@ -688,7 +684,7 @@ func (r *indexReader) Symbols() (map[string]struct{}, error) { sym := make(map[string]struct{}, count) for ; count > 0; count-- { - s := d2.uvarintStr() + s := d2.uvarintTempStr() sym[s] = struct{}{} }