Factor out index file writing code.

Now that we have more than one file open at a time, deduplicate a bit.

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2019-12-17 21:54:13 +00:00
parent 85964ce567
commit 1733724e30

View file

@ -111,16 +111,14 @@ func newCRC32() hash.Hash32 {
// Writer implements the IndexWriter interface for the standard // Writer implements the IndexWriter interface for the standard
// serialization format. // serialization format.
type Writer struct { type Writer struct {
ctx context.Context ctx context.Context
f *os.File
fbuf *bufio.Writer // For the main index file.
pos uint64 f *fileWriter
// Temporary file for posting offsets table. // Temporary file for posting offsets table.
fPO *os.File fPO *fileWriter
fbufPO *bufio.Writer cntPO uint64
posPO uint64
cntPO uint64
toc TOC toc TOC
stage indexWriterStage stage indexWriterStage
@ -200,12 +198,12 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
} }
// Main index file we are building. // Main index file we are building.
f, err := os.OpenFile(fn, os.O_CREATE|os.O_RDWR, 0666) f, err := newFileWriter(fn)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Temporary file for posting offset table. // Temporary file for posting offset table.
fPO, err := os.OpenFile(fn+"_tmp_po", os.O_CREATE|os.O_RDWR, 0666) fPO, err := newFileWriter(fn + "_tmp_po")
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -214,14 +212,10 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
} }
iw := &Writer{ iw := &Writer{
ctx: ctx, ctx: ctx,
f: f, f: f,
fbuf: bufio.NewWriterSize(f, 1<<22), fPO: fPO,
pos: 0, stage: idxStageNone,
fPO: fPO,
fbufPO: bufio.NewWriterSize(fPO, 1<<22),
posPO: 0,
stage: idxStageNone,
// Reusable memory. // Reusable memory.
buf1: encoding.Encbuf{B: make([]byte, 0, 1<<22)}, buf1: encoding.Encbuf{B: make([]byte, 0, 1<<22)},
@ -238,9 +232,41 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
} }
func (w *Writer) write(bufs ...[]byte) error { func (w *Writer) write(bufs ...[]byte) error {
return w.f.write(bufs...)
}
func (w *Writer) writeAt(buf []byte, pos uint64) error {
return w.f.writeAt(buf, pos)
}
func (w *Writer) addPadding(size int) error {
return w.f.addPadding(size)
}
type fileWriter struct {
f *os.File
fbuf *bufio.Writer
pos uint64
name string
}
func newFileWriter(name string) (*fileWriter, error) {
f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0666)
if err != nil {
return nil, err
}
return &fileWriter{
f: f,
fbuf: bufio.NewWriterSize(f, 1<<22),
pos: 0,
name: name,
}, nil
}
func (fw *fileWriter) write(bufs ...[]byte) error {
for _, b := range bufs { for _, b := range bufs {
n, err := w.fbuf.Write(b) n, err := fw.fbuf.Write(b)
w.pos += uint64(n) fw.pos += uint64(n)
if err != nil { if err != nil {
return err return err
} }
@ -248,29 +274,47 @@ func (w *Writer) write(bufs ...[]byte) error {
// offset references in v1 are only 4 bytes large. // offset references in v1 are only 4 bytes large.
// Once we move to compressed/varint representations in those areas, this limitation // Once we move to compressed/varint representations in those areas, this limitation
// can be lifted. // can be lifted.
if w.pos > 16*math.MaxUint32 { if fw.pos > 16*math.MaxUint32 {
return errors.Errorf("exceeding max size of 64GiB") return errors.Errorf("exceeding max size of 64GiB")
} }
} }
return nil return nil
} }
func (w *Writer) writeAt(buf []byte, pos uint64) error { func (fw *fileWriter) flush() error {
if err := w.fbuf.Flush(); err != nil { return fw.fbuf.Flush()
}
func (fw *fileWriter) writeAt(buf []byte, pos uint64) error {
if err := fw.flush(); err != nil {
return err return err
} }
_, err := w.f.WriteAt(buf, int64(pos)) _, err := fw.f.WriteAt(buf, int64(pos))
return err return err
} }
// addPadding adds zero byte padding until the file size is a multiple size. // addPadding adds zero byte padding until the file size is a multiple size.
func (w *Writer) addPadding(size int) error { func (fw *fileWriter) addPadding(size int) error {
p := w.pos % uint64(size) p := fw.pos % uint64(size)
if p == 0 { if p == 0 {
return nil return nil
} }
p = uint64(size) - p p = uint64(size) - p
return errors.Wrap(w.write(make([]byte, p)), "add padding") return errors.Wrap(fw.write(make([]byte, p)), "add padding")
}
func (fw *fileWriter) close() error {
if err := fw.flush(); err != nil {
return err
}
if err := fw.f.Sync(); err != nil {
return err
}
return fw.f.Close()
}
func (fw *fileWriter) remove() error {
return os.Remove(fw.name)
} }
// ensureStage handles transitions between write stages and ensures that IndexWriter // ensureStage handles transitions between write stages and ensures that IndexWriter
@ -292,7 +336,7 @@ func (w *Writer) ensureStage(s indexWriterStage) error {
// Mark start of sections in table of contents. // Mark start of sections in table of contents.
switch s { switch s {
case idxStageSymbols: case idxStageSymbols:
w.toc.Symbols = w.pos w.toc.Symbols = w.f.pos
if err := w.startSymbols(); err != nil { if err := w.startSymbols(); err != nil {
return err return err
} }
@ -300,22 +344,22 @@ func (w *Writer) ensureStage(s indexWriterStage) error {
if err := w.finishSymbols(); err != nil { if err := w.finishSymbols(); err != nil {
return err return err
} }
w.toc.Series = w.pos w.toc.Series = w.f.pos
case idxStageLabelIndex: case idxStageLabelIndex:
w.toc.LabelIndices = w.pos w.toc.LabelIndices = w.f.pos
case idxStageDone: case idxStageDone:
w.toc.Postings = w.pos w.toc.Postings = w.f.pos
if err := w.writePostings(); err != nil { if err := w.writePostings(); err != nil {
return err return err
} }
w.toc.LabelIndicesTable = w.pos w.toc.LabelIndicesTable = w.f.pos
if err := w.writeLabelIndexesOffsetTable(); err != nil { if err := w.writeLabelIndexesOffsetTable(); err != nil {
return err return err
} }
w.toc.PostingsTable = w.pos w.toc.PostingsTable = w.f.pos
if err := w.writePostingsOffsetTable(); err != nil { if err := w.writePostingsOffsetTable(); err != nil {
return err return err
} }
@ -354,8 +398,8 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
return errors.Errorf("failed to write padding bytes: %v", err) return errors.Errorf("failed to write padding bytes: %v", err)
} }
if w.pos%16 != 0 { if w.f.pos%16 != 0 {
return errors.Errorf("series write not 16-byte aligned at %d", w.pos) return errors.Errorf("series write not 16-byte aligned at %d", w.f.pos)
} }
w.buf2.Reset() w.buf2.Reset()
@ -436,24 +480,24 @@ func (w *Writer) AddSymbol(sym string) error {
func (w *Writer) finishSymbols() error { func (w *Writer) finishSymbols() error {
// Write out the length and symbol count. // Write out the length and symbol count.
w.buf1.Reset() w.buf1.Reset()
w.buf1.PutBE32int(int(w.pos - w.toc.Symbols - 4)) w.buf1.PutBE32int(int(w.f.pos - w.toc.Symbols - 4))
w.buf1.PutBE32int(int(w.numSymbols)) w.buf1.PutBE32int(int(w.numSymbols))
if err := w.writeAt(w.buf1.Get(), w.toc.Symbols); err != nil { if err := w.writeAt(w.buf1.Get(), w.toc.Symbols); err != nil {
return err return err
} }
hashPos := w.pos hashPos := w.f.pos
// Leave space for the hash. We can only calculate it // Leave space for the hash. We can only calculate it
// now that the number of symbols is known, so mmap and do it from there. // now that the number of symbols is known, so mmap and do it from there.
if err := w.write([]byte("hash")); err != nil { if err := w.write([]byte("hash")); err != nil {
return err return err
} }
if err := w.fbuf.Flush(); err != nil { if err := w.f.flush(); err != nil {
return err return err
} }
var err error var err error
w.symbolFile, err = fileutil.OpenMmapFile(w.f.Name()) w.symbolFile, err = fileutil.OpenMmapFile(w.f.name)
if err != nil { if err != nil {
return err return err
} }
@ -493,10 +537,10 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
w.labelIndexes = append(w.labelIndexes, labelIndexHashEntry{ w.labelIndexes = append(w.labelIndexes, labelIndexHashEntry{
keys: names, keys: names,
offset: w.pos, offset: w.f.pos,
}) })
startPos := w.pos startPos := w.f.pos
// Leave 4 bytes of space for the length, which will be calculated later. // Leave 4 bytes of space for the length, which will be calculated later.
if err := w.write([]byte("alen")); err != nil { if err := w.write([]byte("alen")); err != nil {
return err return err
@ -527,7 +571,7 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
// Write out the length. // Write out the length.
w.buf1.Reset() w.buf1.Reset()
w.buf1.PutBE32int(int(w.pos - startPos - 4)) w.buf1.PutBE32int(int(w.f.pos - startPos - 4))
if err := w.writeAt(w.buf1.Get(), startPos); err != nil { if err := w.writeAt(w.buf1.Get(), startPos); err != nil {
return err return err
} }
@ -539,7 +583,7 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
// writeLabelIndexesOffsetTable writes the label indices offset table. // writeLabelIndexesOffsetTable writes the label indices offset table.
func (w *Writer) writeLabelIndexesOffsetTable() error { func (w *Writer) writeLabelIndexesOffsetTable() error {
startPos := w.pos startPos := w.f.pos
// Leave 4 bytes of space for the length, which will be calculated later. // Leave 4 bytes of space for the length, which will be calculated later.
if err := w.write([]byte("alen")); err != nil { if err := w.write([]byte("alen")); err != nil {
return err return err
@ -567,7 +611,7 @@ func (w *Writer) writeLabelIndexesOffsetTable() error {
} }
// Write out the length. // Write out the length.
w.buf1.Reset() w.buf1.Reset()
w.buf1.PutBE32int(int(w.pos - startPos - 4)) w.buf1.PutBE32int(int(w.f.pos - startPos - 4))
if err := w.writeAt(w.buf1.Get(), startPos); err != nil { if err := w.writeAt(w.buf1.Get(), startPos); err != nil {
return err return err
} }
@ -580,12 +624,12 @@ func (w *Writer) writeLabelIndexesOffsetTable() error {
// writePostingsOffsetTable writes the postings offset table. // writePostingsOffsetTable writes the postings offset table.
func (w *Writer) writePostingsOffsetTable() error { func (w *Writer) writePostingsOffsetTable() error {
// Ensure everything is in the temporary file. // Ensure everything is in the temporary file.
if err := w.fbufPO.Flush(); err != nil { if err := w.fPO.flush(); err != nil {
return err return err
} }
w.buf1.Reset() w.buf1.Reset()
w.buf1.PutBE32int(int(w.posPO) + 4) // Length, including the count. w.buf1.PutBE32int(int(w.fPO.pos) + 4) // Length, including the count.
if err := w.write(w.buf1.Get()); err != nil { if err := w.write(w.buf1.Get()); err != nil {
return err return err
} }
@ -598,14 +642,14 @@ func (w *Writer) writePostingsOffsetTable() error {
return err return err
} }
// Copy temporary file into main index. // Copy temporary file into main index.
if _, err := w.fPO.Seek(0, 0); err != nil { if _, err := w.fPO.f.Seek(0, 0); err != nil {
return err return err
} }
buf := make([]byte, 1<<20) buf := make([]byte, 1<<20)
l := 0 l := 0
for { for {
n, err := w.fPO.Read(buf) n, err := w.fPO.f.Read(buf)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
return err return err
} }
@ -618,19 +662,18 @@ func (w *Writer) writePostingsOffsetTable() error {
return err return err
} }
} }
if w.posPO != uint64(l) { if w.fPO.pos != uint64(l) {
return errors.Errorf("wrote %d bytes to posting offset temporary file, but only read back %d", w.posPO, l) return errors.Errorf("wrote %d bytes to posting offset temporary file, but only read back %d", w.fPO.pos, l)
} }
// Cleanup temporary file. // Cleanup temporary file.
name := w.fPO.Name() if err := w.fPO.close(); err != nil {
if err := w.fPO.Close(); err != nil { return err
}
if err := w.fPO.remove(); err != nil {
return err return err
} }
w.fPO = nil w.fPO = nil
if err := os.Remove(name); err != nil {
return err
}
// Finally write the hash. // Finally write the hash.
w.buf1.Reset() w.buf1.Reset()
@ -662,10 +705,10 @@ func (w *Writer) writePostings() error {
} }
sort.Strings(names) sort.Strings(names)
if err := w.fbuf.Flush(); err != nil { if err := w.f.flush(); err != nil {
return err return err
} }
f, err := fileutil.OpenMmapFile(w.f.Name()) f, err := fileutil.OpenMmapFile(w.f.name)
if err != nil { if err != nil {
return err return err
} }
@ -790,11 +833,9 @@ func (w *Writer) writePosting(name, value string, offs []uint32) error {
w.buf1.PutUvarint(2) w.buf1.PutUvarint(2)
w.buf1.PutUvarintStr(name) w.buf1.PutUvarintStr(name)
w.buf1.PutUvarintStr(value) w.buf1.PutUvarintStr(value)
w.buf1.PutUvarint64(w.pos) w.buf1.PutUvarint64(w.f.pos)
if n, err := w.fbufPO.Write(w.buf1.Get()); err != nil { if err := w.fPO.write(w.buf1.Get()); err != nil {
return err return err
} else {
w.posPO += uint64(n)
} }
w.cntPO++ w.cntPO++
@ -840,17 +881,11 @@ func (w *Writer) Close() error {
} }
} }
if w.fPO != nil { if w.fPO != nil {
if err := w.fPO.Close(); err != nil { if err := w.fPO.close(); err != nil {
return err return err
} }
} }
if err := w.fbuf.Flush(); err != nil { return w.f.close()
return err
}
if err := w.f.Sync(); err != nil {
return err
}
return w.f.Close()
} }
// StringTuples provides access to a sorted list of string tuples. // StringTuples provides access to a sorted list of string tuples.