Fix reader for large index files.

Currently the offsets are cast into uint32 even though the index can
grow larger than 4GiB.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
This commit is contained in:
Goutham Veeramachaneni 2018-02-02 13:16:23 +05:30
parent 44dd5e1202
commit bb0e74b343
No known key found for this signature in database
GPG key ID: F1C217E8E9023CAD

View file

@ -143,7 +143,11 @@ func NewWriter(fn string) (*Writer, error) {
if err != nil {
return nil, err
}
defer df.Close() // close for flatform windows
defer df.Close() // Close for platform windows.
if err := os.RemoveAll(fn); err != nil {
return nil, errors.Wrap(err, "remove any existing index at path")
}
f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0666)
if err != nil {
@ -530,8 +534,8 @@ type Reader struct {
c io.Closer
// Cached hashmaps of section offsets.
labels map[string]uint32
postings map[labels.Label]uint32
labels map[string]uint64
postings map[labels.Label]uint64
// Cache of read symbols. Strings that are returned when reading from the
// block are always backed by true strings held in here rather than
// strings that are backed by byte slices from the mmap'd index file. This
@ -595,8 +599,8 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
b: b,
c: c,
symbols: map[uint32]string{},
labels: map[string]uint32{},
postings: map[labels.Label]uint32{},
labels: map[string]uint64{},
postings: map[labels.Label]uint64{},
crc32: newCRC32(),
version: version,
}
@ -617,7 +621,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
}
var err error
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint32) error {
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
if len(key) != 1 {
return errors.Errorf("unexpected key length %d", len(key))
}
@ -627,7 +631,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
if err != nil {
return nil, errors.Wrap(err, "read label index table")
}
err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint32) error {
err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint64) error {
if len(key) != 2 {
return errors.Errorf("unexpected key length %d", len(key))
}
@ -780,7 +784,7 @@ func (r *Reader) readSymbols(off int) error {
// readOffsetTable reads an offset table at the given position calls f for each
// found entry.f
// If f returns an error it stops decoding and returns the received error,
func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) error {
func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) error {
d := r.decbufAt(int(off))
cnt := d.be32()
@ -791,7 +795,7 @@ func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) err
for i := 0; i < keyCount; i++ {
keys = append(keys, d.uvarintStr())
}
o := uint32(d.uvarint())
o := d.uvarint64()
if d.err() != nil {
break
}