mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Merge pull request #880 from prometheus/beorn7/fix
Fix the storage corruption bug.
This commit is contained in:
commit
c5d8730150
|
@ -25,6 +25,7 @@ import (
|
||||||
"github.com/prometheus/prometheus/storage/metric"
|
"github.com/prometheus/prometheus/storage/metric"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// The DefaultChunkEncoding can be changed via a flag.
|
||||||
var DefaultChunkEncoding = doubleDelta
|
var DefaultChunkEncoding = doubleDelta
|
||||||
|
|
||||||
type chunkEncoding byte
|
type chunkEncoding byte
|
||||||
|
|
|
@ -254,7 +254,7 @@ func (p *persistence) sanitizeSeries(
|
||||||
// disk. Treat this series as a freshly unarchived one
|
// disk. Treat this series as a freshly unarchived one
|
||||||
// by loading the chunkDescs and setting all parameters
|
// by loading the chunkDescs and setting all parameters
|
||||||
// based on the loaded chunkDescs.
|
// based on the loaded chunkDescs.
|
||||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
cds, err := p.loadChunkDescs(fp, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(
|
log.Errorf(
|
||||||
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
||||||
|
@ -286,8 +286,7 @@ func (p *persistence) sanitizeSeries(
|
||||||
// First, throw away the chunkDescs without chunks.
|
// First, throw away the chunkDescs without chunks.
|
||||||
s.chunkDescs = s.chunkDescs[s.persistWatermark:]
|
s.chunkDescs = s.chunkDescs[s.persistWatermark:]
|
||||||
numMemChunkDescs.Sub(float64(s.persistWatermark))
|
numMemChunkDescs.Sub(float64(s.persistWatermark))
|
||||||
// Load all the chunk descs.
|
cds, err := p.loadChunkDescs(fp, 0)
|
||||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(
|
log.Errorf(
|
||||||
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
|
||||||
|
@ -407,7 +406,7 @@ func (p *persistence) cleanUpArchiveIndexes(
|
||||||
if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
|
if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), clientmodel.Latest)
|
cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ const (
|
||||||
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
|
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// LevelDB cache sizes, changeable via flags.
|
||||||
var (
|
var (
|
||||||
FingerprintMetricCacheSize = 10 * 1024 * 1024
|
FingerprintMetricCacheSize = 10 * 1024 * 1024
|
||||||
FingerprintTimeRangeCacheSize = 5 * 1024 * 1024
|
FingerprintTimeRangeCacheSize = 5 * 1024 * 1024
|
||||||
|
|
|
@ -89,6 +89,8 @@ func init() {
|
||||||
var (
|
var (
|
||||||
// Global counter, also used internally, so not implemented as
|
// Global counter, also used internally, so not implemented as
|
||||||
// metrics. Collected in memorySeriesStorage.Collect.
|
// metrics. Collected in memorySeriesStorage.Collect.
|
||||||
|
// TODO(beorn7): As it is used internally, it is actually very bad style
|
||||||
|
// to have it as a global variable.
|
||||||
numMemChunks int64
|
numMemChunks int64
|
||||||
|
|
||||||
// Metric descriptors for the above.
|
// Metric descriptors for the above.
|
||||||
|
|
|
@ -444,10 +444,11 @@ func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int, inde
|
||||||
return chunks, nil
|
return chunks, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadChunkDescs loads chunkDescs for a series up until a given time. It is
|
// loadChunkDescs loads the chunkDescs for a series from disk. offsetFromEnd is
|
||||||
// the caller's responsibility to not persist or drop anything for the same
|
// the number of chunkDescs to skip from the end of the series file. It is the
|
||||||
|
// caller's responsibility to not persist or drop anything for the same
|
||||||
// fingerprint concurrently.
|
// fingerprint concurrently.
|
||||||
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
|
func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
|
||||||
f, err := p.openChunkFileForReading(fp)
|
f, err := p.openChunkFileForReading(fp)
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
@ -469,8 +470,8 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
numChunks := int(fi.Size()) / chunkLenWithHeader
|
numChunks := int(fi.Size())/chunkLenWithHeader - offsetFromEnd
|
||||||
cds := make([]*chunkDesc, 0, numChunks)
|
cds := make([]*chunkDesc, numChunks)
|
||||||
chunkTimesBuf := make([]byte, 16)
|
chunkTimesBuf := make([]byte, 16)
|
||||||
for i := 0; i < numChunks; i++ {
|
for i := 0; i < numChunks; i++ {
|
||||||
_, err := f.Seek(offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET)
|
_, err := f.Seek(offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET)
|
||||||
|
@ -482,15 +483,10 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
cd := &chunkDesc{
|
cds[i] = &chunkDesc{
|
||||||
chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)),
|
chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)),
|
||||||
chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
|
chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
|
||||||
}
|
}
|
||||||
if !cd.chunkLastTime.Before(beforeTime) {
|
|
||||||
// From here on, we have chunkDescs in memory already.
|
|
||||||
break
|
|
||||||
}
|
|
||||||
cds = append(cds, cd)
|
|
||||||
}
|
}
|
||||||
chunkDescOps.WithLabelValues(load).Add(float64(len(cds)))
|
chunkDescOps.WithLabelValues(load).Add(float64(len(cds)))
|
||||||
numMemChunkDescs.Add(float64(len(cds)))
|
numMemChunkDescs.Add(float64(len(cds)))
|
||||||
|
|
|
@ -122,7 +122,7 @@ func testPersistLoadDropChunks(t *testing.T, encoding chunkEncoding) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Load all chunk descs.
|
// Load all chunk descs.
|
||||||
actualChunkDescs, err := p.loadChunkDescs(fp, 10)
|
actualChunkDescs, err := p.loadChunkDescs(fp, 0)
|
||||||
if len(actualChunkDescs) != 10 {
|
if len(actualChunkDescs) != 10 {
|
||||||
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10)
|
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10)
|
||||||
}
|
}
|
||||||
|
@ -974,7 +974,7 @@ func BenchmarkLoadChunkDescs(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, s := range fpStrings {
|
for _, s := range fpStrings {
|
||||||
fp.LoadFromString(s)
|
fp.LoadFromString(s)
|
||||||
cds, err := p.loadChunkDescs(fp, clientmodel.Latest)
|
cds, err := p.loadChunkDescs(fp, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.Error(err)
|
b.Error(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -384,7 +384,7 @@ func (s *memorySeries) preloadChunksForRange(
|
||||||
firstChunkDescTime = s.chunkDescs[0].firstTime()
|
firstChunkDescTime = s.chunkDescs[0].firstTime()
|
||||||
}
|
}
|
||||||
if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
|
if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
|
||||||
cds, err := mss.loadChunkDescs(fp, firstChunkDescTime)
|
cds, err := mss.loadChunkDescs(fp, s.persistWatermark)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -589,7 +589,7 @@ func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m cl
|
||||||
// end up with a series without any chunkDescs for a
|
// end up with a series without any chunkDescs for a
|
||||||
// while (which is confusing as it makes the series
|
// while (which is confusing as it makes the series
|
||||||
// appear as archived or purged).
|
// appear as archived or purged).
|
||||||
cds, err = s.loadChunkDescs(fp, clientmodel.Latest)
|
cds, err = s.loadChunkDescs(fp, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
|
log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
|
||||||
}
|
}
|
||||||
|
@ -979,7 +979,7 @@ func (s *memorySeriesStorage) maintainMemorySeries(
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// If we are here, the series is not archived, so check for chunkDesc
|
// If we are here, the series is not archived, so check for chunkDesc
|
||||||
// eviction next
|
// eviction next.
|
||||||
series.evictChunkDescs(iOldestNotEvicted)
|
series.evictChunkDescs(iOldestNotEvicted)
|
||||||
|
|
||||||
return series.dirty && !seriesWasDirty
|
return series.dirty && !seriesWasDirty
|
||||||
|
@ -1107,8 +1107,8 @@ func (s *memorySeriesStorage) loadChunks(fp clientmodel.Fingerprint, indexes []i
|
||||||
}
|
}
|
||||||
|
|
||||||
// See persistence.loadChunkDescs for detailed explanation.
|
// See persistence.loadChunkDescs for detailed explanation.
|
||||||
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) {
|
func (s *memorySeriesStorage) loadChunkDescs(fp clientmodel.Fingerprint, offsetFromEnd int) ([]*chunkDesc, error) {
|
||||||
return s.persistence.loadChunkDescs(fp, beforeTime)
|
return s.persistence.loadChunkDescs(fp, offsetFromEnd)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getNumChunksToPersist returns numChunksToPersist in a goroutine-safe way.
|
// getNumChunksToPersist returns numChunksToPersist in a goroutine-safe way.
|
||||||
|
|
|
@ -1116,6 +1116,78 @@ func TestEvictAndPurgeSeriesChunkType1(t *testing.T) {
|
||||||
testEvictAndPurgeSeries(t, 1)
|
testEvictAndPurgeSeries(t, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func testEvictAndLoadChunkDescs(t *testing.T, encoding chunkEncoding) {
|
||||||
|
samples := make(clientmodel.Samples, 10000)
|
||||||
|
for i := range samples {
|
||||||
|
samples[i] = &clientmodel.Sample{
|
||||||
|
Timestamp: clientmodel.Timestamp(2 * i),
|
||||||
|
Value: clientmodel.SampleValue(float64(i * i)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Give last sample a timestamp of now so that the head chunk will not
|
||||||
|
// be closed (which would then archive the time series later as
|
||||||
|
// everything will get evicted).
|
||||||
|
samples[len(samples)-1] = &clientmodel.Sample{
|
||||||
|
Timestamp: clientmodel.Now(),
|
||||||
|
Value: clientmodel.SampleValue(3.14),
|
||||||
|
}
|
||||||
|
|
||||||
|
s, closer := NewTestStorage(t, encoding)
|
||||||
|
defer closer.Close()
|
||||||
|
|
||||||
|
// Adjust memory chunks to lower value to see evictions.
|
||||||
|
s.maxMemoryChunks = 1
|
||||||
|
|
||||||
|
for _, sample := range samples {
|
||||||
|
s.Append(sample)
|
||||||
|
}
|
||||||
|
s.WaitForIndexing()
|
||||||
|
|
||||||
|
fp := clientmodel.Metric{}.FastFingerprint()
|
||||||
|
|
||||||
|
series, ok := s.fpToSeries.get(fp)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("could not find series")
|
||||||
|
}
|
||||||
|
|
||||||
|
oldLen := len(series.chunkDescs)
|
||||||
|
// Maintain series without any dropped chunks.
|
||||||
|
s.maintainMemorySeries(fp, 0)
|
||||||
|
// Give the evict goroutine an opportunity to run.
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
// Maintain series again to trigger chunkDesc eviction
|
||||||
|
s.maintainMemorySeries(fp, 0)
|
||||||
|
|
||||||
|
if oldLen <= len(series.chunkDescs) {
|
||||||
|
t.Errorf("Expected number of chunkDescs to decrease, old number %d, current number %d.", oldLen, len(series.chunkDescs))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load everything back.
|
||||||
|
p := s.NewPreloader()
|
||||||
|
p.PreloadRange(fp, 0, 100000, time.Hour)
|
||||||
|
|
||||||
|
if oldLen != len(series.chunkDescs) {
|
||||||
|
t.Errorf("Expected number of chunkDescs to have reached old value again, old number %d, current number %d.", oldLen, len(series.chunkDescs))
|
||||||
|
}
|
||||||
|
|
||||||
|
p.Close()
|
||||||
|
|
||||||
|
// Now maintain series with drops to make sure nothing crazy happens.
|
||||||
|
s.maintainMemorySeries(fp, 100000)
|
||||||
|
|
||||||
|
if len(series.chunkDescs) != 1 {
|
||||||
|
t.Errorf("Expected exactly one chunkDesc left, got %d.", len(series.chunkDescs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvictAndLoadChunkDescsType0(t *testing.T) {
|
||||||
|
testEvictAndLoadChunkDescs(t, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvictAndLoadChunkDescsType1(t *testing.T) {
|
||||||
|
testEvictAndLoadChunkDescs(t, 1)
|
||||||
|
}
|
||||||
|
|
||||||
func benchmarkAppend(b *testing.B, encoding chunkEncoding) {
|
func benchmarkAppend(b *testing.B, encoding chunkEncoding) {
|
||||||
samples := make(clientmodel.Samples, b.N)
|
samples := make(clientmodel.Samples, b.N)
|
||||||
for i := range samples {
|
for i := range samples {
|
||||||
|
@ -1437,7 +1509,7 @@ func TestAppendOutOfOrder(t *testing.T) {
|
||||||
|
|
||||||
err = pl.PreloadRange(fp, 0, 2, 5*time.Minute)
|
err = pl.PreloadRange(fp, 0, 2, 5*time.Minute)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("error preloading chunks: %s", err)
|
t.Fatalf("Error preloading chunks: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
it := s.NewIterator(fp)
|
it := s.NewIterator(fp)
|
||||||
|
|
Loading…
Reference in a new issue