Merge pull request #880 from prometheus/beorn7/fix

Fix the storage corruption bug.
This commit is contained in:
Björn Rabenstein 2015-07-16 15:55:48 +02:00
commit c5d8730150
9 changed files with 94 additions and 23 deletions

View file

@ -25,6 +25,7 @@ import (
"github.com/prometheus/prometheus/storage/metric"
)
// The DefaultChunkEncoding can be changed via a flag.
var DefaultChunkEncoding = doubleDelta
type chunkEncoding byte

View file

@ -254,7 +254,7 @@ func (p *persistence) sanitizeSeries(
// disk. Treat this series as a freshly unarchived one
// by loading the chunkDescs and setting all parameters
// based on the loaded chunkDescs.
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
cds, err := p.loadChunkDescs(fp, 0)
if err != nil {
log.Errorf(
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
@ -286,8 +286,7 @@ func (p *persistence) sanitizeSeries(
// First, throw away the chunkDescs without chunks.
s.chunkDescs = s.chunkDescs[s.persistWatermark:]
numMemChunkDescs.Sub(float64(s.persistWatermark))
// Load all the chunk descs.
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
cds, err := p.loadChunkDescs(fp, 0)
if err != nil {
log.Errorf(
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
@ -407,7 +406,7 @@ func (p *persistence) cleanUpArchiveIndexes(
if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
return err
}
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), clientmodel.Latest)
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), 0)
if err != nil {
return err
}

View file

@ -33,6 +33,7 @@ const (
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
)
// LevelDB cache sizes, changeable via flags.
var (
FingerprintMetricCacheSize = 10 * 1024 * 1024
FingerprintTimeRangeCacheSize = 5 * 1024 * 1024

View file

@ -89,6 +89,8 @@ func init() {
var (
// Global counter, also used internally, so not implemented as
// metrics. Collected in memorySeriesStorage.Collect.
// TODO(beorn7): As it is used internally, it is actually very bad style
// to have it as a global variable.
numMemChunks int64
// Metric descriptors for the above.

View file

@ -444,10 +444,11 @@ func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int, inde
return chunks, nil
}
// loadChunkDescs loads chunkDescs for a series up until a given time. It is
// the caller's responsibility to not persist or drop anything for the same
// loadChunkDescs loads the chunkDescs for a series from disk. offsetFromEnd is
// the number of chunkDescs to skip from the end of the series file. It is the
// caller's responsibility to not persist or drop anything for the same
// fingerprint concurrently.
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
f, err := p.openChunkFileForReading(fp)
if os.IsNotExist(err) {
return nil, nil
@ -469,8 +470,8 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
)
}
numChunks := int(fi.Size()) / chunkLenWithHeader
cds := make([]*chunkDesc, 0, numChunks)
numChunks := int(fi.Size())/chunkLenWithHeader - offsetFromEnd
cds := make([]*chunkDesc, numChunks)
chunkTimesBuf := make([]byte, 16)
for i := 0; i < numChunks; i++ {
_, err := f.Seek(offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET)
@ -482,15 +483,10 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
if err != nil {
return nil, err
}
cd := &chunkDesc{
cds[i] = &chunkDesc{
chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)),
chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
}
if !cd.chunkLastTime.Before(beforeTime) {
// From here on, we have chunkDescs in memory already.
break
}
cds = append(cds, cd)
}
chunkDescOps.WithLabelValues(load).Add(float64(len(cds)))
numMemChunkDescs.Add(float64(len(cds)))

View file

@ -122,7 +122,7 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunkEncoding) {
}
}
// Load all chunk descs.
actualChunkDescs, err := p.loadChunkDescs(fp, 10)
actualChunkDescs, err := p.loadChunkDescs(fp, 0)
if len(actualChunkDescs) != 10 {
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10)
}
@ -974,7 +974,7 @@ func BenchmarkLoadChunkDescs(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, s := range fpStrings {
fp.LoadFromString(s)
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
cds, err := p.loadChunkDescs(fp, 0)
if err != nil {
b.Error(err)
}

View file

@ -384,7 +384,7 @@ func (s *memorySeries) preloadChunksForRange(
firstChunkDescTime = s.chunkDescs[0].firstTime()
}
if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
cds, err := mss.loadChunkDescs(fp, firstChunkDescTime)
cds, err := mss.loadChunkDescs(fp, s.persistWatermark)
if err != nil {
return nil, err
}

View file

@ -589,7 +589,7 @@ func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m cl
// end up with a series without any chunkDescs for a
// while (which is confusing as it makes the series
// appear as archived or purged).
cds, err = s.loadChunkDescs(fp, clientmodel.Latest)
cds, err = s.loadChunkDescs(fp, 0)
if err != nil {
log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
}
@ -979,7 +979,7 @@ func (s *memorySeriesStorage) maintainMemorySeries(
return
}
// If we are here, the series is not archived, so check for chunkDesc
// eviction next
// eviction next.
series.evictChunkDescs(iOldestNotEvicted)
return series.dirty && !seriesWasDirty
@ -1107,8 +1107,8 @@ func (s *memorySeriesStorage) loadChunks(fp clientmodel.Fingerprint, indexes []i
}
// See persistence.loadChunkDescs for detailed explanation.
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
return s.persistence.loadChunkDescs(fp, beforeTime)
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
return s.persistence.loadChunkDescs(fp, offsetFromEnd)
}
// getNumChunksToPersist returns numChunksToPersist in a goroutine-safe way.

View file

@ -1116,6 +1116,78 @@ func TestEvictAndPurgeSeriesChunkType1(t *testing.T) {
testEvictAndPurgeSeries(t, 1)
}
func testEvictAndLoadChunkDescs(t *testing.T, encoding chunkEncoding) {
samples := make(clientmodel.Samples, 10000)
for i := range samples {
samples[i] = &clientmodel.Sample{
Timestamp: clientmodel.Timestamp(2 * i),
Value: clientmodel.SampleValue(float64(i * i)),
}
}
// Give last sample a timestamp of now so that the head chunk will not
// be closed (which would then archive the time series later as
// everything will get evicted).
samples[len(samples)-1] = &clientmodel.Sample{
Timestamp: clientmodel.Now(),
Value: clientmodel.SampleValue(3.14),
}
s, closer := NewTestStorage(t, encoding)
defer closer.Close()
// Adjust memory chunks to lower value to see evictions.
s.maxMemoryChunks = 1
for _, sample := range samples {
s.Append(sample)
}
s.WaitForIndexing()
fp := clientmodel.Metric{}.FastFingerprint()
series, ok := s.fpToSeries.get(fp)
if !ok {
t.Fatal("could not find series")
}
oldLen := len(series.chunkDescs)
// Maintain series without any dropped chunks.
s.maintainMemorySeries(fp, 0)
// Give the evict goroutine an opportunity to run.
time.Sleep(10 * time.Millisecond)
// Maintain series again to trigger chunkDesc eviction
s.maintainMemorySeries(fp, 0)
if oldLen <= len(series.chunkDescs) {
t.Errorf("Expected number of chunkDescs to decrease, old number %d, current number %d.", oldLen, len(series.chunkDescs))
}
// Load everything back.
p := s.NewPreloader()
p.PreloadRange(fp, 0, 100000, time.Hour)
if oldLen != len(series.chunkDescs) {
t.Errorf("Expected number of chunkDescs to have reached old value again, old number %d, current number %d.", oldLen, len(series.chunkDescs))
}
p.Close()
// Now maintain series with drops to make sure nothing crazy happens.
s.maintainMemorySeries(fp, 100000)
if len(series.chunkDescs) != 1 {
t.Errorf("Expected exactly one chunkDesc left, got %d.", len(series.chunkDescs))
}
}
func TestEvictAndLoadChunkDescsType0(t *testing.T) {
testEvictAndLoadChunkDescs(t, 0)
}
func TestEvictAndLoadChunkDescsType1(t *testing.T) {
testEvictAndLoadChunkDescs(t, 1)
}
func benchmarkAppend(b *testing.B, encoding chunkEncoding) {
samples := make(clientmodel.Samples, b.N)
for i := range samples {
@ -1437,7 +1509,7 @@ func TestAppendOutOfOrder(t *testing.T) {
err = pl.PreloadRange(fp, 0, 2, 5*time.Minute)
if err != nil {
t.Fatalf("error preloading chunks: %s", err)
t.Fatalf("Error preloading chunks: %s", err)
}
it := s.NewIterator(fp)