Fixed WAL corruption on partial writes within a page (#8125)

* Fixed WAL corruption on partial writes

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Renamed variable

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed test

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
Marco Pracucci 2020-10-29 11:37:03 +01:00 committed by GitHub
parent 318190021d
commit 63be30dcee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 5 deletions

View file

@ -74,9 +74,18 @@ func (p *page) reset() {
p.flushed = 0
}
// SegmentFile represents the underlying file used to store a segment.
type SegmentFile interface {
Stat() (os.FileInfo, error)
Sync() error
io.Writer
io.Reader
io.Closer
}
// Segment represents a segment file.
type Segment struct {
*os.File
SegmentFile
dir string
i int
}
@ -130,7 +139,7 @@ func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) {
return nil, errors.Wrap(err, "zero-pad torn page")
}
}
return &Segment{File: f, i: k, dir: dir}, nil
return &Segment{SegmentFile: f, i: k, dir: dir}, nil
}
// CreateSegment creates a new segment k in dir.
@ -139,7 +148,7 @@ func CreateSegment(dir string, k int) (*Segment, error) {
if err != nil {
return nil, err
}
return &Segment{File: f, i: k, dir: dir}, nil
return &Segment{SegmentFile: f, i: k, dir: dir}, nil
}
// OpenReadSegment opens the segment with the given filename.
@ -152,7 +161,7 @@ func OpenReadSegment(fn string) (*Segment, error) {
if err != nil {
return nil, err
}
return &Segment{File: f, i: k, dir: filepath.Dir(fn)}, nil
return &Segment{SegmentFile: f, i: k, dir: filepath.Dir(fn)}, nil
}
// WAL is a write ahead log that stores records in segment files.
@ -517,8 +526,10 @@ func (w *WAL) flushPage(clear bool) error {
if clear {
p.alloc = pageSize // Write till end of page.
}
n, err := w.segment.Write(p.buf[p.flushed:p.alloc])
if err != nil {
p.flushed += n
return err
}
p.flushed += n
@ -664,6 +675,9 @@ func (w *WAL) log(rec []byte, final bool) error {
if w.page.full() {
if err := w.flushPage(true); err != nil {
// TODO When the flushing fails at this point and the record has not been
// fully written to the buffer, we end up with a corrupted WAL because some part of the
// record have been written to the buffer, while the rest of the record will be discarded.
return err
}
}
@ -705,7 +719,7 @@ func (w *WAL) Truncate(i int) (err error) {
func (w *WAL) fsync(f *Segment) error {
start := time.Now()
err := f.File.Sync()
err := f.Sync()
w.metrics.fsyncDuration.Observe(time.Since(start).Seconds())
return err
}

View file

@ -17,6 +17,7 @@ package wal
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
@ -424,6 +425,103 @@ func TestCompression(t *testing.T) {
require.Greater(t, float64(uncompressedSize)*0.75, float64(compressedSize), "Compressing zeroes should save at least 25%% space - uncompressedSize: %d, compressedSize: %d", uncompressedSize, compressedSize)
}
func TestLogPartialWrite(t *testing.T) {
const segmentSize = pageSize * 2
record := []byte{1, 2, 3, 4, 5}
tests := map[string]struct {
numRecords int
faultyRecord int
}{
"partial write when logging first record in a page": {
numRecords: 10,
faultyRecord: 1,
},
"partial write when logging record in the middle of a page": {
numRecords: 10,
faultyRecord: 3,
},
"partial write when logging last record of a page": {
numRecords: (pageSize / (recordHeaderSize + len(record))) + 10,
faultyRecord: pageSize / (recordHeaderSize + len(record)),
},
// TODO the current implementation suffers this:
//"partial write when logging a record overlapping two pages": {
// numRecords: (pageSize / (recordHeaderSize + len(record))) + 10,
// faultyRecord: pageSize/(recordHeaderSize+len(record)) + 1,
//},
}
for testName, testData := range tests {
t.Run(testName, func(t *testing.T) {
dirPath, err := ioutil.TempDir("", "")
require.NoError(t, err)
w, err := NewSize(nil, nil, dirPath, segmentSize, false)
require.NoError(t, err)
// Replace the underlying segment file with a mocked one that injects a failure.
w.segment.SegmentFile = &faultySegmentFile{
SegmentFile: w.segment.SegmentFile,
writeFailureAfter: ((recordHeaderSize + len(record)) * (testData.faultyRecord - 1)) + 2,
writeFailureErr: io.ErrShortWrite,
}
for i := 1; i <= testData.numRecords; i++ {
if err := w.Log(record); i == testData.faultyRecord {
require.Error(t, io.ErrShortWrite, err)
} else {
require.NoError(t, err)
}
}
require.NoError(t, w.Close())
// Read it back. We expect no corruption.
s, err := OpenReadSegment(SegmentName(dirPath, 0))
require.NoError(t, err)
r := NewReader(NewSegmentBufReader(s))
for i := 0; i < testData.numRecords; i++ {
require.True(t, r.Next())
require.NoError(t, r.Err())
require.Equal(t, record, r.Record())
}
require.False(t, r.Next())
require.NoError(t, r.Err())
})
}
}
type faultySegmentFile struct {
SegmentFile
written int
writeFailureAfter int
writeFailureErr error
}
func (f *faultySegmentFile) Write(p []byte) (int, error) {
if f.writeFailureAfter >= 0 && f.writeFailureAfter < f.written+len(p) {
partialLen := f.writeFailureAfter - f.written
if partialLen <= 0 || partialLen >= len(p) {
partialLen = 1
}
// Inject failure.
n, _ := f.SegmentFile.Write(p[:partialLen])
f.written += n
f.writeFailureAfter = -1
return n, f.writeFailureErr
}
// Proxy the write to the underlying file.
n, err := f.SegmentFile.Write(p)
f.written += n
return n, err
}
func BenchmarkWAL_LogBatched(b *testing.B) {
for _, compress := range []bool{true, false} {
b.Run(fmt.Sprintf("compress=%t", compress), func(b *testing.B) {