mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 22:37:27 -08:00
Merge pull request #880 from prometheus/beorn7/fix
Fix the storage corruption bug.
This commit is contained in:
commit
c5d8730150
|
@ -25,6 +25,7 @@ import (
|
|||
"github.com/prometheus/prometheus/storage/metric"
|
||||
)
|
||||
|
||||
// The DefaultChunkEncoding can be changed via a flag.
|
||||
var DefaultChunkEncoding = doubleDelta
|
||||
|
||||
type chunkEncoding byte
|
||||
|
|
|
@ -254,7 +254,7 @@ func (p *persistence) sanitizeSeries(
|
|||
// disk. Treat this series as a freshly unarchived one
|
||||
// by loading the chunkDescs and setting all parameters
|
||||
// based on the loaded chunkDescs.
|
||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
||||
cds, err := p.loadChunkDescs(fp, 0)
|
||||
if err != nil {
|
||||
log.Errorf(
|
||||
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
||||
|
@ -286,8 +286,7 @@ func (p *persistence) sanitizeSeries(
|
|||
// First, throw away the chunkDescs without chunks.
|
||||
s.chunkDescs = s.chunkDescs[s.persistWatermark:]
|
||||
numMemChunkDescs.Sub(float64(s.persistWatermark))
|
||||
// Load all the chunk descs.
|
||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
||||
cds, err := p.loadChunkDescs(fp, 0)
|
||||
if err != nil {
|
||||
log.Errorf(
|
||||
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
||||
|
@ -407,7 +406,7 @@ func (p *persistence) cleanUpArchiveIndexes(
|
|||
if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
|
||||
return err
|
||||
}
|
||||
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), clientmodel.Latest)
|
||||
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ const (
|
|||
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
|
||||
)
|
||||
|
||||
// LevelDB cache sizes, changeable via flags.
|
||||
var (
|
||||
FingerprintMetricCacheSize = 10 * 1024 * 1024
|
||||
FingerprintTimeRangeCacheSize = 5 * 1024 * 1024
|
||||
|
|
|
@ -89,6 +89,8 @@ func init() {
|
|||
var (
|
||||
// Global counter, also used internally, so not implemented as
|
||||
// metrics. Collected in memorySeriesStorage.Collect.
|
||||
// TODO(beorn7): As it is used internally, it is actually very bad style
|
||||
// to have it as a global variable.
|
||||
numMemChunks int64
|
||||
|
||||
// Metric descriptors for the above.
|
||||
|
|
|
@ -444,10 +444,11 @@ func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int, inde
|
|||
return chunks, nil
|
||||
}
|
||||
|
||||
// loadChunkDescs loads chunkDescs for a series up until a given time. It is
|
||||
// the caller's responsibility to not persist or drop anything for the same
|
||||
// loadChunkDescs loads the chunkDescs for a series from disk. offsetFromEnd is
|
||||
// the number of chunkDescs to skip from the end of the series file. It is the
|
||||
// caller's responsibility to not persist or drop anything for the same
|
||||
// fingerprint concurrently.
|
||||
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
|
||||
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
|
||||
f, err := p.openChunkFileForReading(fp)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
|
@ -469,8 +470,8 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
|
|||
)
|
||||
}
|
||||
|
||||
numChunks := int(fi.Size()) / chunkLenWithHeader
|
||||
cds := make([]*chunkDesc, 0, numChunks)
|
||||
numChunks := int(fi.Size())/chunkLenWithHeader - offsetFromEnd
|
||||
cds := make([]*chunkDesc, numChunks)
|
||||
chunkTimesBuf := make([]byte, 16)
|
||||
for i := 0; i < numChunks; i++ {
|
||||
_, err := f.Seek(offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET)
|
||||
|
@ -482,15 +483,10 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cd := &chunkDesc{
|
||||
cds[i] = &chunkDesc{
|
||||
chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)),
|
||||
chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
|
||||
}
|
||||
if !cd.chunkLastTime.Before(beforeTime) {
|
||||
// From here on, we have chunkDescs in memory already.
|
||||
break
|
||||
}
|
||||
cds = append(cds, cd)
|
||||
}
|
||||
chunkDescOps.WithLabelValues(load).Add(float64(len(cds)))
|
||||
numMemChunkDescs.Add(float64(len(cds)))
|
||||
|
|
|
@ -122,7 +122,7 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunkEncoding) {
|
|||
}
|
||||
}
|
||||
// Load all chunk descs.
|
||||
actualChunkDescs, err := p.loadChunkDescs(fp, 10)
|
||||
actualChunkDescs, err := p.loadChunkDescs(fp, 0)
|
||||
if len(actualChunkDescs) != 10 {
|
||||
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10)
|
||||
}
|
||||
|
@ -974,7 +974,7 @@ func BenchmarkLoadChunkDescs(b *testing.B) {
|
|||
for i := 0; i < b.N; i++ {
|
||||
for _, s := range fpStrings {
|
||||
fp.LoadFromString(s)
|
||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
||||
cds, err := p.loadChunkDescs(fp, 0)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
|
|
|
@ -384,7 +384,7 @@ func (s *memorySeries) preloadChunksForRange(
|
|||
firstChunkDescTime = s.chunkDescs[0].firstTime()
|
||||
}
|
||||
if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
|
||||
cds, err := mss.loadChunkDescs(fp, firstChunkDescTime)
|
||||
cds, err := mss.loadChunkDescs(fp, s.persistWatermark)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -589,7 +589,7 @@ func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m cl
|
|||
// end up with a series without any chunkDescs for a
|
||||
// while (which is confusing as it makes the series
|
||||
// appear as archived or purged).
|
||||
cds, err = s.loadChunkDescs(fp, clientmodel.Latest)
|
||||
cds, err = s.loadChunkDescs(fp, 0)
|
||||
if err != nil {
|
||||
log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
|
||||
}
|
||||
|
@ -979,7 +979,7 @@ func (s *memorySeriesStorage) maintainMemorySeries(
|
|||
return
|
||||
}
|
||||
// If we are here, the series is not archived, so check for chunkDesc
|
||||
// eviction next
|
||||
// eviction next.
|
||||
series.evictChunkDescs(iOldestNotEvicted)
|
||||
|
||||
return series.dirty && !seriesWasDirty
|
||||
|
@ -1107,8 +1107,8 @@ func (s *memorySeriesStorage) loadChunks(fp clientmodel.Fingerprint, indexes []i
|
|||
}
|
||||
|
||||
// See persistence.loadChunkDescs for detailed explanation.
|
||||
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
|
||||
return s.persistence.loadChunkDescs(fp, beforeTime)
|
||||
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
|
||||
return s.persistence.loadChunkDescs(fp, offsetFromEnd)
|
||||
}
|
||||
|
||||
// getNumChunksToPersist returns numChunksToPersist in a goroutine-safe way.
|
||||
|
|
|
@ -1116,6 +1116,78 @@ func TestEvictAndPurgeSeriesChunkType1(t *testing.T) {
|
|||
testEvictAndPurgeSeries(t, 1)
|
||||
}
|
||||
|
||||
func testEvictAndLoadChunkDescs(t *testing.T, encoding chunkEncoding) {
|
||||
samples := make(clientmodel.Samples, 10000)
|
||||
for i := range samples {
|
||||
samples[i] = &clientmodel.Sample{
|
||||
Timestamp: clientmodel.Timestamp(2 * i),
|
||||
Value: clientmodel.SampleValue(float64(i * i)),
|
||||
}
|
||||
}
|
||||
// Give last sample a timestamp of now so that the head chunk will not
|
||||
// be closed (which would then archive the time series later as
|
||||
// everything will get evicted).
|
||||
samples[len(samples)-1] = &clientmodel.Sample{
|
||||
Timestamp: clientmodel.Now(),
|
||||
Value: clientmodel.SampleValue(3.14),
|
||||
}
|
||||
|
||||
s, closer := NewTestStorage(t, encoding)
|
||||
defer closer.Close()
|
||||
|
||||
// Adjust memory chunks to lower value to see evictions.
|
||||
s.maxMemoryChunks = 1
|
||||
|
||||
for _, sample := range samples {
|
||||
s.Append(sample)
|
||||
}
|
||||
s.WaitForIndexing()
|
||||
|
||||
fp := clientmodel.Metric{}.FastFingerprint()
|
||||
|
||||
series, ok := s.fpToSeries.get(fp)
|
||||
if !ok {
|
||||
t.Fatal("could not find series")
|
||||
}
|
||||
|
||||
oldLen := len(series.chunkDescs)
|
||||
// Maintain series without any dropped chunks.
|
||||
s.maintainMemorySeries(fp, 0)
|
||||
// Give the evict goroutine an opportunity to run.
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
// Maintain series again to trigger chunkDesc eviction
|
||||
s.maintainMemorySeries(fp, 0)
|
||||
|
||||
if oldLen <= len(series.chunkDescs) {
|
||||
t.Errorf("Expected number of chunkDescs to decrease, old number %d, current number %d.", oldLen, len(series.chunkDescs))
|
||||
}
|
||||
|
||||
// Load everything back.
|
||||
p := s.NewPreloader()
|
||||
p.PreloadRange(fp, 0, 100000, time.Hour)
|
||||
|
||||
if oldLen != len(series.chunkDescs) {
|
||||
t.Errorf("Expected number of chunkDescs to have reached old value again, old number %d, current number %d.", oldLen, len(series.chunkDescs))
|
||||
}
|
||||
|
||||
p.Close()
|
||||
|
||||
// Now maintain series with drops to make sure nothing crazy happens.
|
||||
s.maintainMemorySeries(fp, 100000)
|
||||
|
||||
if len(series.chunkDescs) != 1 {
|
||||
t.Errorf("Expected exactly one chunkDesc left, got %d.", len(series.chunkDescs))
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvictAndLoadChunkDescsType0(t *testing.T) {
|
||||
testEvictAndLoadChunkDescs(t, 0)
|
||||
}
|
||||
|
||||
func TestEvictAndLoadChunkDescsType1(t *testing.T) {
|
||||
testEvictAndLoadChunkDescs(t, 1)
|
||||
}
|
||||
|
||||
func benchmarkAppend(b *testing.B, encoding chunkEncoding) {
|
||||
samples := make(clientmodel.Samples, b.N)
|
||||
for i := range samples {
|
||||
|
@ -1437,7 +1509,7 @@ func TestAppendOutOfOrder(t *testing.T) {
|
|||
|
||||
err = pl.PreloadRange(fp, 0, 2, 5*time.Minute)
|
||||
if err != nil {
|
||||
t.Fatalf("error preloading chunks: %s", err)
|
||||
t.Fatalf("Error preloading chunks: %s", err)
|
||||
}
|
||||
|
||||
it := s.NewIterator(fp)
|
||||
|
|
Loading…
Reference in a new issue