Fix reader for large index files.

Currently the offsets are cast into uint32 even though the index can
grow larger than 4GiB.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
This commit is contained in:
Goutham Veeramachaneni 2018-02-02 13:16:23 +05:30
parent 44dd5e1202
commit bb0e74b343
No known key found for this signature in database
GPG key ID: F1C217E8E9023CAD

View file

@ -143,7 +143,11 @@ func NewWriter(fn string) (*Writer, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer df.Close() // close for flatform windows defer df.Close() // Close for platform windows.
if err := os.RemoveAll(fn); err != nil {
return nil, errors.Wrap(err, "remove any existing index at path")
}
f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0666) f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0666)
if err != nil { if err != nil {
@ -530,8 +534,8 @@ type Reader struct {
c io.Closer c io.Closer
// Cached hashmaps of section offsets. // Cached hashmaps of section offsets.
labels map[string]uint32 labels map[string]uint64
postings map[labels.Label]uint32 postings map[labels.Label]uint64
// Cache of read symbols. Strings that are returned when reading from the // Cache of read symbols. Strings that are returned when reading from the
// block are always backed by true strings held in here rather than // block are always backed by true strings held in here rather than
// strings that are backed by byte slices from the mmap'd index file. This // strings that are backed by byte slices from the mmap'd index file. This
@ -595,8 +599,8 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
b: b, b: b,
c: c, c: c,
symbols: map[uint32]string{}, symbols: map[uint32]string{},
labels: map[string]uint32{}, labels: map[string]uint64{},
postings: map[labels.Label]uint32{}, postings: map[labels.Label]uint64{},
crc32: newCRC32(), crc32: newCRC32(),
version: version, version: version,
} }
@ -617,7 +621,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
} }
var err error var err error
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint32) error { err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
if len(key) != 1 { if len(key) != 1 {
return errors.Errorf("unexpected key length %d", len(key)) return errors.Errorf("unexpected key length %d", len(key))
} }
@ -627,7 +631,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
if err != nil { if err != nil {
return nil, errors.Wrap(err, "read label index table") return nil, errors.Wrap(err, "read label index table")
} }
err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint32) error { err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint64) error {
if len(key) != 2 { if len(key) != 2 {
return errors.Errorf("unexpected key length %d", len(key)) return errors.Errorf("unexpected key length %d", len(key))
} }
@ -780,7 +784,7 @@ func (r *Reader) readSymbols(off int) error {
// readOffsetTable reads an offset table at the given position calls f for each // readOffsetTable reads an offset table at the given position calls f for each
// found entry.f // found entry.f
// If f returns an error it stops decoding and returns the received error, // If f returns an error it stops decoding and returns the received error,
func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) error { func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) error {
d := r.decbufAt(int(off)) d := r.decbufAt(int(off))
cnt := d.be32() cnt := d.be32()
@ -791,7 +795,7 @@ func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) err
for i := 0; i < keyCount; i++ { for i := 0; i < keyCount; i++ {
keys = append(keys, d.uvarintStr()) keys = append(keys, d.uvarintStr())
} }
o := uint32(d.uvarint()) o := d.uvarint64()
if d.err() != nil { if d.err() != nil {
break break
} }