Fix the weird chunkDesc shuffling bug.

The root cause was that after chunkDesc eviction, the offset between
memory representation of chunk layout (via chunkDescs in memory) was
shiftet against chunks as layed out on disk. Keeping the offset up to
date is by no means trivial, so this commit is pretty involved.

Also, found a race that for some reason didn't bite us so far:
Persisting chunks was completel unlocked, so if chunks were purged on
disk at the same time, disaster would strike. However, locking the
persisting of chunk revealed interesting dead locks. Basically, never
queue under the fp lock.

Change-Id: I1ea9e4e71024cabbc1f9601b28e74db0c5c55db8
This commit is contained in:
Bjoern Rabenstein 2014-10-27 20:40:48 +01:00
parent a617269b12
commit d215e013b7
5 changed files with 193 additions and 123 deletions

View file

@ -58,8 +58,10 @@ const (
) )
const ( const (
flagChunkDescsLoaded byte = 1 << iota flagHeadChunkPersisted byte = 1 << iota
flagHeadChunkPersisted // Add more flags here like:
// flagFoo
// flagBar
) )
type indexingOpType byte type indexingOpType byte
@ -228,34 +230,53 @@ func (p *persistence) getLabelValuesForLabelName(ln clientmodel.LabelName) (clie
} }
// persistChunk persists a single chunk of a series. It is the caller's // persistChunk persists a single chunk of a series. It is the caller's
// responsibility to not modify chunk concurrently and to not persist or drop anything // responsibility to not modify chunk concurrently and to not persist or drop
// for the same fingerprint concurrently. // anything for the same fingerprint concurrently. It returns the (zero-based)
func (p *persistence) persistChunk(fp clientmodel.Fingerprint, c chunk) error { // index of the persisted chunk within the series file. In case of an error, the
// returned index is -1 (to avoid the misconception that the chunk was written
// at position 0).
func (p *persistence) persistChunk(fp clientmodel.Fingerprint, c chunk) (int, error) {
// 1. Open chunk file. // 1. Open chunk file.
f, err := p.openChunkFileForWriting(fp) f, err := p.openChunkFileForWriting(fp)
if err != nil { if err != nil {
return err return -1, err
} }
defer f.Close() defer f.Close()
b := bufio.NewWriterSize(f, chunkHeaderLen+p.chunkLen) b := bufio.NewWriterSize(f, chunkHeaderLen+p.chunkLen)
defer b.Flush()
// 2. Write the header (chunk type and first/last times). // 2. Write the header (chunk type and first/last times).
err = writeChunkHeader(b, c) err = writeChunkHeader(b, c)
if err != nil { if err != nil {
return err return -1, err
} }
// 3. Write chunk into file. // 3. Write chunk into file.
return c.marshal(b) err = c.marshal(b)
if err != nil {
return -1, err
}
// 4. Determine index within the file.
b.Flush()
offset, err := f.Seek(0, os.SEEK_CUR)
if err != nil {
return -1, err
}
index, err := p.chunkIndexForOffset(offset)
if err != nil {
return -1, err
}
return index - 1, err
} }
// loadChunks loads a group of chunks of a timeseries by their index. The chunk // loadChunks loads a group of chunks of a timeseries by their index. The chunk
// with the earliest time will have index 0, the following ones will have // with the earliest time will have index 0, the following ones will have
// incrementally larger indexes. It is the caller's responsibility to not // incrementally larger indexes. The indexOffset denotes the offset to be added to
// persist or drop anything for the same fingerprint concurrently. // each index in indexes. It is the caller's responsibility to not persist or
func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int) ([]chunk, error) { // drop anything for the same fingerprint concurrently.
func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int, indexOffset int) ([]chunk, error) {
// TODO: we need to verify at some point that file length is a multiple of // TODO: we need to verify at some point that file length is a multiple of
// the chunk size. When is the best time to do this, and where to remember // the chunk size. When is the best time to do this, and where to remember
// it? Right now, we only do it when loading chunkDescs. // it? Right now, we only do it when loading chunkDescs.
@ -268,7 +289,7 @@ func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int) ([]c
chunks := make([]chunk, 0, len(indexes)) chunks := make([]chunk, 0, len(indexes))
typeBuf := make([]byte, 1) typeBuf := make([]byte, 1)
for _, idx := range indexes { for _, idx := range indexes {
_, err := f.Seek(p.offsetForChunkIndex(idx), os.SEEK_SET) _, err := f.Seek(p.offsetForChunkIndex(idx+indexOffset), os.SEEK_SET)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -339,7 +360,7 @@ func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clie
chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)), chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)),
chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])), chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
} }
if !cd.firstTime().Before(beforeTime) { if cd.chunkLastTime.After(beforeTime) {
// From here on, we have chunkDescs in memory already. // From here on, we have chunkDescs in memory already.
break break
} }
@ -412,9 +433,6 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap
} }
realNumberOfSeries++ realNumberOfSeries++
var seriesFlags byte var seriesFlags byte
if m.series.chunkDescsLoaded {
seriesFlags |= flagChunkDescsLoaded
}
if m.series.headChunkPersisted { if m.series.headChunkPersisted {
seriesFlags |= flagHeadChunkPersisted seriesFlags |= flagHeadChunkPersisted
} }
@ -430,6 +448,9 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap
return return
} }
w.Write(buf) w.Write(buf)
if _, err = codable.EncodeVarint(w, int64(m.series.chunkDescsOffset)); err != nil {
return
}
if _, err = codable.EncodeVarint(w, int64(len(m.series.chunkDescs))); err != nil { if _, err = codable.EncodeVarint(w, int64(len(m.series.chunkDescs))); err != nil {
return return
} }
@ -523,6 +544,10 @@ func (p *persistence) loadSeriesMapAndHeads() (*seriesMap, error) {
if err := metric.UnmarshalFromReader(r); err != nil { if err := metric.UnmarshalFromReader(r); err != nil {
return nil, err return nil, err
} }
chunkDescsOffset, err := binary.ReadVarint(r)
if err != nil {
return nil, err
}
numChunkDescs, err := binary.ReadVarint(r) numChunkDescs, err := binary.ReadVarint(r)
if err != nil { if err != nil {
return nil, err return nil, err
@ -562,7 +587,7 @@ func (p *persistence) loadSeriesMapAndHeads() (*seriesMap, error) {
fingerprintToSeries[clientmodel.Fingerprint(fp)] = &memorySeries{ fingerprintToSeries[clientmodel.Fingerprint(fp)] = &memorySeries{
metric: clientmodel.Metric(metric), metric: clientmodel.Metric(metric),
chunkDescs: chunkDescs, chunkDescs: chunkDescs,
chunkDescsLoaded: seriesFlags&flagChunkDescsLoaded != 0, chunkDescsOffset: int(chunkDescsOffset),
headChunkPersisted: headChunkPersisted, headChunkPersisted: headChunkPersisted,
} }
} }
@ -572,24 +597,25 @@ func (p *persistence) loadSeriesMapAndHeads() (*seriesMap, error) {
} }
// dropChunks deletes all chunks from a series whose last sample time is before // dropChunks deletes all chunks from a series whose last sample time is before
// beforeTime. It returns true if all chunks of the series have been deleted. // beforeTime. It returns the number of deleted chunks and true if all chunks of
// It is the caller's responsibility to make sure nothing is persisted or loaded // the series have been deleted. It is the caller's responsibility to make sure
// for the same fingerprint concurrently. // nothing is persisted or loaded for the same fingerprint concurrently.
func (p *persistence) dropChunks(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) (bool, error) { func (p *persistence) dropChunks(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) (int, bool, error) {
f, err := p.openChunkFileForReading(fp) f, err := p.openChunkFileForReading(fp)
if os.IsNotExist(err) { if os.IsNotExist(err) {
return true, nil return 0, true, nil
} }
if err != nil { if err != nil {
return false, err return 0, false, err
} }
defer f.Close() defer f.Close()
// Find the first chunk that should be kept. // Find the first chunk that should be kept.
for i := 0; ; i++ { var i int
for ; ; i++ {
_, err := f.Seek(p.offsetForChunkIndex(i)+chunkHeaderLastTimeOffset, os.SEEK_SET) _, err := f.Seek(p.offsetForChunkIndex(i)+chunkHeaderLastTimeOffset, os.SEEK_SET)
if err != nil { if err != nil {
return false, err return 0, false, err
} }
lastTimeBuf := make([]byte, 8) lastTimeBuf := make([]byte, 8)
_, err = io.ReadAtLeast(f, lastTimeBuf, 8) _, err = io.ReadAtLeast(f, lastTimeBuf, 8)
@ -598,12 +624,12 @@ func (p *persistence) dropChunks(fp clientmodel.Fingerprint, beforeTime clientmo
// be kept. Remove the whole file. // be kept. Remove the whole file.
chunkOps.WithLabelValues(purge).Add(float64(i)) chunkOps.WithLabelValues(purge).Add(float64(i))
if err := os.Remove(f.Name()); err != nil { if err := os.Remove(f.Name()); err != nil {
return true, err return 0, true, err
} }
return true, nil return i, true, nil
} }
if err != nil { if err != nil {
return false, err return 0, false, err
} }
lastTime := clientmodel.Timestamp(binary.LittleEndian.Uint64(lastTimeBuf)) lastTime := clientmodel.Timestamp(binary.LittleEndian.Uint64(lastTimeBuf))
if !lastTime.Before(beforeTime) { if !lastTime.Before(beforeTime) {
@ -617,21 +643,23 @@ func (p *persistence) dropChunks(fp clientmodel.Fingerprint, beforeTime clientmo
// file. // file.
_, err = f.Seek(-(chunkHeaderLastTimeOffset + 8), os.SEEK_CUR) _, err = f.Seek(-(chunkHeaderLastTimeOffset + 8), os.SEEK_CUR)
if err != nil { if err != nil {
return false, err return 0, false, err
} }
temp, err := os.OpenFile(p.tempFileNameForFingerprint(fp), os.O_WRONLY|os.O_CREATE, 0640) temp, err := os.OpenFile(p.tempFileNameForFingerprint(fp), os.O_WRONLY|os.O_CREATE, 0640)
if err != nil { if err != nil {
return false, err return 0, false, err
} }
defer temp.Close() defer temp.Close()
if _, err := io.Copy(temp, f); err != nil { if _, err := io.Copy(temp, f); err != nil {
return false, err return 0, false, err
} }
os.Rename(p.tempFileNameForFingerprint(fp), p.fileNameForFingerprint(fp)) if err := os.Rename(p.tempFileNameForFingerprint(fp), p.fileNameForFingerprint(fp)); err != nil {
return false, nil return 0, false, err
}
return i, false, nil
} }
// indexMetric queues the given metric for addition to the indexes needed by // indexMetric queues the given metric for addition to the indexes needed by
@ -836,6 +864,16 @@ func (p *persistence) offsetForChunkIndex(i int) int64 {
return int64(i * (chunkHeaderLen + p.chunkLen)) return int64(i * (chunkHeaderLen + p.chunkLen))
} }
func (p *persistence) chunkIndexForOffset(offset int64) (int, error) {
if int(offset)%(chunkHeaderLen+p.chunkLen) != 0 {
return -1, fmt.Errorf(
"offset %d is not a multiple of on-disk chunk length %d",
offset, chunkHeaderLen+p.chunkLen,
)
}
return int(offset) / (chunkHeaderLen + p.chunkLen), nil
}
func (p *persistence) headsFileName() string { func (p *persistence) headsFileName() string {
return path.Join(p.basePath, headsFileName) return path.Join(p.basePath, headsFileName)
} }

View file

@ -82,10 +82,14 @@ func TestPersistChunk(t *testing.T) {
fpToChunks := buildTestChunks() fpToChunks := buildTestChunks()
for fp, chunks := range fpToChunks { for fp, chunks := range fpToChunks {
for _, c := range chunks { for i, c := range chunks {
if err := p.persistChunk(fp, c); err != nil { index, err := p.persistChunk(fp, c)
if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if i != index {
t.Errorf("Want chunk index %d, got %d.", i, index)
}
} }
} }
@ -94,7 +98,7 @@ func TestPersistChunk(t *testing.T) {
for i := range expectedChunks { for i := range expectedChunks {
indexes = append(indexes, i) indexes = append(indexes, i)
} }
actualChunks, err := p.loadChunks(fp, indexes) actualChunks, err := p.loadChunks(fp, indexes, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -136,10 +136,16 @@ type memorySeries struct {
metric clientmodel.Metric metric clientmodel.Metric
// Sorted by start time, overlapping chunk ranges are forbidden. // Sorted by start time, overlapping chunk ranges are forbidden.
chunkDescs []*chunkDesc chunkDescs []*chunkDesc
// Whether chunkDescs for chunks on disk are all loaded. If false, some // The chunkDescs in memory might not have all the chunkDescs for the
// (or all) chunkDescs are only on disk. These chunks are all contiguous // chunks that are persisted to disk. The missing chunkDescs are all
// and at the tail end. // contiguous and at the tail end. chunkDescsOffset is the index of the
chunkDescsLoaded bool // chunk on disk that corresponds to the first chunkDesc in memory. If
// it is 0, the chunkDescs are all loaded. A value of -1 denotes a
// special case: There are chunks on disk, but the offset to the
// chunkDescs in memory is unknown. Also, there is no overlap between
// chunks on disk and chunks in memory (implying that upon first
// persiting of a chunk in memory, the offset has to be set).
chunkDescsOffset int
// Whether the current head chunk has already been scheduled to be // Whether the current head chunk has already been scheduled to be
// persisted. If true, the current head chunk must not be modified // persisted. If true, the current head chunk must not be modified
// anymore. // anymore.
@ -155,16 +161,20 @@ type memorySeries struct {
// or (if false) a series for a metric being unarchived, i.e. a series that // or (if false) a series for a metric being unarchived, i.e. a series that
// existed before but has been evicted from memory. // existed before but has been evicted from memory.
func newMemorySeries(m clientmodel.Metric, reallyNew bool) *memorySeries { func newMemorySeries(m clientmodel.Metric, reallyNew bool) *memorySeries {
return &memorySeries{ s := memorySeries{
metric: m, metric: m,
chunkDescsLoaded: reallyNew,
headChunkPersisted: !reallyNew, headChunkPersisted: !reallyNew,
} }
if !reallyNew {
s.chunkDescsOffset = -1
}
return &s
} }
// add adds a sample pair to the series. // add adds a sample pair to the series.
// It returns chunkDescs that must be queued to be persisted.
// The caller must have locked the fingerprint of the series. // The caller must have locked the fingerprint of the series.
func (s *memorySeries) add(fp clientmodel.Fingerprint, v *metric.SamplePair, persistQueue chan *persistRequest) { func (s *memorySeries) add(fp clientmodel.Fingerprint, v *metric.SamplePair) []*chunkDesc {
if len(s.chunkDescs) == 0 || s.headChunkPersisted { if len(s.chunkDescs) == 0 || s.headChunkPersisted {
newHead := newChunkDesc(newDeltaEncodedChunk(d1, d0, true)) newHead := newChunkDesc(newDeltaEncodedChunk(d1, d0, true))
s.chunkDescs = append(s.chunkDescs, newHead) s.chunkDescs = append(s.chunkDescs, newHead)
@ -187,33 +197,27 @@ func (s *memorySeries) add(fp clientmodel.Fingerprint, v *metric.SamplePair, per
} }
chunks := s.head().add(v) chunks := s.head().add(v)
s.head().chunk = chunks[0] s.head().chunk = chunks[0]
var chunkDescsToPersist []*chunkDesc
if len(chunks) > 1 { if len(chunks) > 1 {
queuePersist := func(cd *chunkDesc) { chunkDescsToPersist = append(chunkDescsToPersist, s.head())
persistQueue <- &persistRequest{
fingerprint: fp,
chunkDesc: cd,
}
}
queuePersist(s.head())
for i, c := range chunks[1:] { for i, c := range chunks[1:] {
cd := newChunkDesc(c) cd := newChunkDesc(c)
s.chunkDescs = append(s.chunkDescs, cd) s.chunkDescs = append(s.chunkDescs, cd)
// The last chunk is still growing. // The last chunk is still growing.
if i < len(chunks[1:])-1 { if i < len(chunks[1:])-1 {
queuePersist(cd) chunkDescsToPersist = append(chunkDescsToPersist, cd)
} }
} }
} }
return chunkDescsToPersist
} }
// evictOlderThan marks for eviction all chunks whose latest sample is older // evictOlderThan marks for eviction all chunks whose latest sample is older
// than the given timestamp. It returns true if all chunks in the series were // than the given timestamp. It returns allEvicted as true if all chunks in the
// immediately evicted (i.e. all chunks are older than the timestamp, and none // series were immediately evicted (i.e. all chunks are older than the
// of the chunks was pinned). // timestamp, and none of the chunks was pinned).
// //
// The method also evicts chunkDescs if there are chunkDescEvictionFactor times // The method also evicts chunkDescs if there are chunkDescEvictionFactor times
// more chunkDescs in the series than unevicted chunks. (The number of unevicted // more chunkDescs in the series than unevicted chunks. (The number of unevicted
@ -222,25 +226,22 @@ func (s *memorySeries) add(fp clientmodel.Fingerprint, v *metric.SamplePair, per
// series, even if chunks in between were evicted.) // series, even if chunks in between were evicted.)
// //
// Special considerations for the head chunk: If it has not been scheduled to be // Special considerations for the head chunk: If it has not been scheduled to be
// persisted yet but is old enough for eviction, the scheduling happens now. (To // persisted yet but is old enough for eviction, this method returns
// do that, the method neets the fingerprint and the persist queue.) It is // persistHeadChunk as true. The caller is then responsible for persisting the
// likely that the actual persisting will not happen soon enough to immediately // head chunk. The internal state of this memorySeries is already set
// evict the head chunk, though. Thus, calling evictOlderThan for a series with // accordingly by this method. Calling evictOlderThan for a series with a
// a non-persisted head chunk will most likely return false, even if no chunk is // non-persisted head chunk that is old enough for eviction will never evict all
// pinned for other reasons. A series old enough for archiving will usually // chunks immediately, even if no chunk is pinned for other reasons, because the
// head chunk is not persisted yet. A series old enough for archiving will
// require at least two eviction runs to become ready for archiving: In the // require at least two eviction runs to become ready for archiving: In the
// first run, its head chunk is scheduled to be persisted. The next call of // first run, its head chunk is requested to be persisted. The next call of
// evictOlderThan will then return true, provided that the series hasn't // evictOlderThan will then return true, provided that the series hasn't
// received new samples in the meantime, the head chunk has now been persisted, // received new samples in the meantime, the head chunk has now been persisted,
// and no chunk is pinned for other reasons. // and no chunk is pinned for other reasons.
// //
// The caller must have locked the fingerprint of the series. // The caller must have locked the fingerprint of the series.
func (s *memorySeries) evictOlderThan( func (s *memorySeries) evictOlderThan(t clientmodel.Timestamp) (allEvicted bool, persistHeadChunk bool) {
t clientmodel.Timestamp, allEvicted = true
fp clientmodel.Fingerprint,
persistQueue chan *persistRequest,
) bool {
allEvicted := true
iOldestNotEvicted := -1 iOldestNotEvicted := -1
defer func() { defer func() {
@ -250,8 +251,8 @@ func (s *memorySeries) evictOlderThan(
if iOldestNotEvicted != -1 { if iOldestNotEvicted != -1 {
lenToKeep := chunkDescEvictionFactor * (len(s.chunkDescs) - iOldestNotEvicted) lenToKeep := chunkDescEvictionFactor * (len(s.chunkDescs) - iOldestNotEvicted)
if lenToKeep < len(s.chunkDescs) { if lenToKeep < len(s.chunkDescs) {
s.chunkDescsLoaded = false
lenEvicted := len(s.chunkDescs) - lenToKeep lenEvicted := len(s.chunkDescs) - lenToKeep
s.chunkDescsOffset += lenEvicted
chunkDescOps.WithLabelValues(evict).Add(float64(lenEvicted)) chunkDescOps.WithLabelValues(evict).Add(float64(lenEvicted))
atomic.AddInt64(&numMemChunkDescs, -int64(lenEvicted)) atomic.AddInt64(&numMemChunkDescs, -int64(lenEvicted))
s.chunkDescs = append( s.chunkDescs = append(
@ -268,22 +269,19 @@ func (s *memorySeries) evictOlderThan(
if iOldestNotEvicted == -1 { if iOldestNotEvicted == -1 {
iOldestNotEvicted = i iOldestNotEvicted = i
} }
return false return false, false
} }
if cd.isEvicted() { if cd.isEvicted() {
continue continue
} }
if !s.headChunkPersisted && i == len(s.chunkDescs)-1 { if !s.headChunkPersisted && i == len(s.chunkDescs)-1 {
// This is a non-persisted head chunk that is old enough // This is a non-persisted head chunk that is old enough
// for eviction. Queue it to be persisted: // for eviction. Request it to be persisted:
persistHeadChunk = true
s.headChunkPersisted = true s.headChunkPersisted = true
// Since we cannot modify the head chunk from now on, we // Since we cannot modify the head chunk from now on, we
// don't need to bother with cloning anymore. // don't need to bother with cloning anymore.
s.headChunkUsedByIterator = false s.headChunkUsedByIterator = false
persistQueue <- &persistRequest{
fingerprint: fp,
chunkDesc: cd,
}
} }
if !cd.evictOnUnpin() { if !cd.evictOnUnpin() {
if iOldestNotEvicted == -1 { if iOldestNotEvicted == -1 {
@ -292,13 +290,15 @@ func (s *memorySeries) evictOlderThan(
allEvicted = false allEvicted = false
} }
} }
return allEvicted return allEvicted, persistHeadChunk
} }
// purgeOlderThan returns true if all chunks have been purged. // purgeOlderThan removes chunkDescs older than t. It also evicts the chunks of
// those chunkDescs (although that's probably not even necessary). It returns
// the number of purged chunkDescs and true if all chunkDescs have been purged.
// //
// The caller must have locked the fingerprint of the series. // The caller must have locked the fingerprint of the series.
func (s *memorySeries) purgeOlderThan(t clientmodel.Timestamp) bool { func (s *memorySeries) purgeOlderThan(t clientmodel.Timestamp) (int, bool) {
keepIdx := len(s.chunkDescs) keepIdx := len(s.chunkDescs)
for i, cd := range s.chunkDescs { for i, cd := range s.chunkDescs {
if !cd.lastTime().Before(t) { if !cd.lastTime().Before(t) {
@ -307,9 +307,11 @@ func (s *memorySeries) purgeOlderThan(t clientmodel.Timestamp) bool {
} }
s.chunkDescs[i].evictOnUnpin() s.chunkDescs[i].evictOnUnpin()
} }
s.chunkDescs = append(make([]*chunkDesc, 0, len(s.chunkDescs)-keepIdx), s.chunkDescs[keepIdx:]...) if keepIdx > 0 {
atomic.AddInt64(&numMemChunkDescs, -int64(keepIdx)) s.chunkDescs = append(make([]*chunkDesc, 0, len(s.chunkDescs)-keepIdx), s.chunkDescs[keepIdx:]...)
return len(s.chunkDescs) == 0 atomic.AddInt64(&numMemChunkDescs, -int64(keepIdx))
}
return keepIdx, len(s.chunkDescs) == 0
} }
// preloadChunks is an internal helper method. // preloadChunks is an internal helper method.
@ -327,8 +329,11 @@ func (s *memorySeries) preloadChunks(indexes []int, p *persistence) ([]*chunkDes
chunkOps.WithLabelValues(pin).Add(float64(len(pinnedChunkDescs))) chunkOps.WithLabelValues(pin).Add(float64(len(pinnedChunkDescs)))
if len(loadIndexes) > 0 { if len(loadIndexes) > 0 {
if s.chunkDescsOffset == -1 {
panic("requested loading chunks from persistence in a situation where we must not have persisted data for chunk descriptors in memory")
}
fp := s.metric.Fingerprint() fp := s.metric.Fingerprint()
chunks, err := p.loadChunks(fp, loadIndexes) chunks, err := p.loadChunks(fp, loadIndexes, s.chunkDescsOffset)
if err != nil { if err != nil {
// Unpin the chunks since we won't return them as pinned chunks now. // Unpin the chunks since we won't return them as pinned chunks now.
for _, cd := range pinnedChunkDescs { for _, cd := range pinnedChunkDescs {
@ -388,13 +393,13 @@ func (s *memorySeries) preloadChunksForRange(
if len(s.chunkDescs) > 0 { if len(s.chunkDescs) > 0 {
firstChunkDescTime = s.chunkDescs[0].firstTime() firstChunkDescTime = s.chunkDescs[0].firstTime()
} }
if !s.chunkDescsLoaded && from.Before(firstChunkDescTime) { if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
cds, err := p.loadChunkDescs(fp, firstChunkDescTime) cds, err := p.loadChunkDescs(fp, firstChunkDescTime)
if err != nil { if err != nil {
return nil, err return nil, err
} }
s.chunkDescs = append(cds, s.chunkDescs...) s.chunkDescs = append(cds, s.chunkDescs...)
s.chunkDescsLoaded = true s.chunkDescsOffset = 0
} }
if len(s.chunkDescs) == 0 { if len(s.chunkDescs) == 0 {

View file

@ -43,15 +43,15 @@ type persistRequest struct {
} }
type memorySeriesStorage struct { type memorySeriesStorage struct {
fpLocker *fingerprintLocker fpLocker *fingerprintLocker
fingerprintToSeries *seriesMap fpToSeries *seriesMap
loopStopping, loopStopped chan struct{} loopStopping, loopStopped chan struct{}
evictInterval, evictAfter time.Duration evictInterval, evictAfter time.Duration
purgeInterval, purgeAfter time.Duration purgeInterval, purgeAfter time.Duration
checkpointInterval time.Duration checkpointInterval time.Duration
persistQueue chan *persistRequest persistQueue chan persistRequest
persistStopped chan struct{} persistStopped chan struct{}
persistence *persistence persistence *persistence
@ -84,22 +84,22 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) (Storage, error) {
return nil, err return nil, err
} }
glog.Info("Loading series map and head chunks...") glog.Info("Loading series map and head chunks...")
fingerprintToSeries, err := p.loadSeriesMapAndHeads() fpToSeries, err := p.loadSeriesMapAndHeads()
if err != nil { if err != nil {
return nil, err return nil, err
} }
glog.Infof("%d series loaded.", fingerprintToSeries.length()) glog.Infof("%d series loaded.", fpToSeries.length())
numSeries := prometheus.NewGauge(prometheus.GaugeOpts{ numSeries := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "memory_series", Name: "memory_series",
Help: "The current number of series in memory.", Help: "The current number of series in memory.",
}) })
numSeries.Set(float64(fingerprintToSeries.length())) numSeries.Set(float64(fpToSeries.length()))
return &memorySeriesStorage{ return &memorySeriesStorage{
fpLocker: newFingerprintLocker(100), // TODO: Tweak value. fpLocker: newFingerprintLocker(100), // TODO: Tweak value.
fingerprintToSeries: fingerprintToSeries, fpToSeries: fpToSeries,
loopStopping: make(chan struct{}), loopStopping: make(chan struct{}),
loopStopped: make(chan struct{}), loopStopped: make(chan struct{}),
@ -109,7 +109,7 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) (Storage, error) {
purgeAfter: o.PersistenceRetentionPeriod, purgeAfter: o.PersistenceRetentionPeriod,
checkpointInterval: o.CheckpointInterval, checkpointInterval: o.CheckpointInterval,
persistQueue: make(chan *persistRequest, persistQueueCap), persistQueue: make(chan persistRequest, persistQueueCap),
persistStopped: make(chan struct{}), persistStopped: make(chan struct{}),
persistence: p, persistence: p,
@ -182,7 +182,7 @@ func (s *memorySeriesStorage) Stop() error {
<-s.persistStopped <-s.persistStopped
// One final checkpoint of the series map and the head chunks. // One final checkpoint of the series map and the head chunks.
if err := s.persistence.checkpointSeriesMapAndHeads(s.fingerprintToSeries, s.fpLocker); err != nil { if err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker); err != nil {
return err return err
} }
@ -202,7 +202,7 @@ func (s *memorySeriesStorage) NewIterator(fp clientmodel.Fingerprint) SeriesIter
s.fpLocker.Lock(fp) s.fpLocker.Lock(fp)
defer s.fpLocker.Unlock(fp) defer s.fpLocker.Unlock(fp)
series, ok := s.fingerprintToSeries.get(fp) series, ok := s.fpToSeries.get(fp)
if !ok { if !ok {
// Oops, no series for fp found. That happens if, after // Oops, no series for fp found. That happens if, after
// preloading is done, the whole series is identified as old // preloading is done, the whole series is identified as old
@ -301,7 +301,7 @@ func (s *memorySeriesStorage) GetMetricForFingerprint(fp clientmodel.Fingerprint
s.fpLocker.Lock(fp) s.fpLocker.Lock(fp)
defer s.fpLocker.Unlock(fp) defer s.fpLocker.Unlock(fp)
series, ok := s.fingerprintToSeries.get(fp) series, ok := s.fpToSeries.get(fp)
if ok { if ok {
// Copy required here because caller might mutate the returned // Copy required here because caller might mutate the returned
// metric. // metric.
@ -330,17 +330,21 @@ func (s *memorySeriesStorage) AppendSamples(samples clientmodel.Samples) {
func (s *memorySeriesStorage) appendSample(sample *clientmodel.Sample) { func (s *memorySeriesStorage) appendSample(sample *clientmodel.Sample) {
fp := sample.Metric.Fingerprint() fp := sample.Metric.Fingerprint()
s.fpLocker.Lock(fp) s.fpLocker.Lock(fp)
defer s.fpLocker.Unlock(fp)
series := s.getOrCreateSeries(fp, sample.Metric) series := s.getOrCreateSeries(fp, sample.Metric)
series.add(fp, &metric.SamplePair{ chunkDescsToPersist := series.add(fp, &metric.SamplePair{
Value: sample.Value, Value: sample.Value,
Timestamp: sample.Timestamp, Timestamp: sample.Timestamp,
}, s.persistQueue) })
s.fpLocker.Unlock(fp)
// Queue only outside of the locked area, processing the persistQueue
// requires the same lock!
for _, cd := range chunkDescsToPersist {
s.persistQueue <- persistRequest{fp, cd}
}
} }
func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m clientmodel.Metric) *memorySeries { func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m clientmodel.Metric) *memorySeries {
series, ok := s.fingerprintToSeries.get(fp) series, ok := s.fpToSeries.get(fp)
if !ok { if !ok {
unarchived, err := s.persistence.unarchiveMetric(fp) unarchived, err := s.persistence.unarchiveMetric(fp)
if err != nil { if err != nil {
@ -354,7 +358,7 @@ func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m cl
s.seriesOps.WithLabelValues(create).Inc() s.seriesOps.WithLabelValues(create).Inc()
} }
series = newMemorySeries(m, !unarchived) series = newMemorySeries(m, !unarchived)
s.fingerprintToSeries.put(fp, series) s.fpToSeries.put(fp, series)
s.numSeries.Inc() s.numSeries.Inc()
} }
return series return series
@ -362,7 +366,7 @@ func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m cl
/* /*
func (s *memorySeriesStorage) preloadChunksAtTime(fp clientmodel.Fingerprint, ts clientmodel.Timestamp) (chunkDescs, error) { func (s *memorySeriesStorage) preloadChunksAtTime(fp clientmodel.Fingerprint, ts clientmodel.Timestamp) (chunkDescs, error) {
series, ok := s.fingerprintToSeries.get(fp) series, ok := s.fpToSeries.get(fp)
if !ok { if !ok {
panic("requested preload for non-existent series") panic("requested preload for non-existent series")
} }
@ -378,7 +382,7 @@ func (s *memorySeriesStorage) preloadChunksForRange(
s.fpLocker.Lock(fp) s.fpLocker.Lock(fp)
defer s.fpLocker.Unlock(fp) defer s.fpLocker.Unlock(fp)
series, ok := s.fingerprintToSeries.get(fp) series, ok := s.fpToSeries.get(fp)
if !ok { if !ok {
has, first, last, err := s.persistence.hasArchivedMetric(fp) has, first, last, err := s.persistence.hasArchivedMetric(fp)
if err != nil { if err != nil {
@ -404,12 +408,21 @@ func (s *memorySeriesStorage) handlePersistQueue() {
for req := range s.persistQueue { for req := range s.persistQueue {
s.persistQueueLength.Set(float64(len(s.persistQueue))) s.persistQueueLength.Set(float64(len(s.persistQueue)))
start := time.Now() start := time.Now()
err := s.persistence.persistChunk(req.fingerprint, req.chunkDesc.chunk) s.fpLocker.Lock(req.fingerprint)
offset, err := s.persistence.persistChunk(req.fingerprint, req.chunkDesc.chunk)
if series, seriesInMemory := s.fpToSeries.get(req.fingerprint); err == nil && seriesInMemory && series.chunkDescsOffset == -1 {
// This is the first chunk persisted for a newly created
// series that had prior chunks on disk. Finally, we can
// set the chunkDescsOffset.
series.chunkDescsOffset = offset
}
s.fpLocker.Unlock(req.fingerprint)
s.persistLatency.Observe(float64(time.Since(start)) / float64(time.Microsecond)) s.persistLatency.Observe(float64(time.Since(start)) / float64(time.Microsecond))
if err != nil { if err != nil {
s.persistErrors.WithLabelValues(err.Error()).Inc() s.persistErrors.WithLabelValues(err.Error()).Inc()
glog.Error("Error persisting chunk, requeuing: ", err) glog.Error("Error persisting chunk: ", err)
s.persistQueue <- req glog.Error("The storage is now inconsistent. Prepare for disaster.")
// TODO: Remove respective chunkDesc to at least be consistent?
continue continue
} }
req.chunkDesc.unpin() req.chunkDesc.unpin()
@ -436,13 +449,13 @@ func (s *memorySeriesStorage) loop() {
case <-s.loopStopping: case <-s.loopStopping:
return return
case <-checkpointTicker.C: case <-checkpointTicker.C:
s.persistence.checkpointSeriesMapAndHeads(s.fingerprintToSeries, s.fpLocker) s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
case <-evictTicker.C: case <-evictTicker.C:
// TODO: Change this to be based on number of chunks in memory. // TODO: Change this to be based on number of chunks in memory.
glog.Info("Evicting chunks...") glog.Info("Evicting chunks...")
begin := time.Now() begin := time.Now()
for m := range s.fingerprintToSeries.iter() { for m := range s.fpToSeries.iter() {
select { select {
case <-s.loopStopping: case <-s.loopStopping:
glog.Info("Interrupted evicting chunks.") glog.Info("Interrupted evicting chunks.")
@ -451,11 +464,11 @@ func (s *memorySeriesStorage) loop() {
// Keep going. // Keep going.
} }
s.fpLocker.Lock(m.fp) s.fpLocker.Lock(m.fp)
if m.series.evictOlderThan( allEvicted, persistHeadChunk := m.series.evictOlderThan(
clientmodel.TimestampFromTime(time.Now()).Add(-1*s.evictAfter), clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.evictAfter),
m.fp, s.persistQueue, )
) { if allEvicted {
s.fingerprintToSeries.del(m.fp) s.fpToSeries.del(m.fp)
s.numSeries.Dec() s.numSeries.Dec()
if err := s.persistence.archiveMetric( if err := s.persistence.archiveMetric(
m.fp, m.series.metric, m.series.firstTime(), m.series.lastTime(), m.fp, m.series.metric, m.series.firstTime(), m.series.lastTime(),
@ -466,6 +479,10 @@ func (s *memorySeriesStorage) loop() {
} }
} }
s.fpLocker.Unlock(m.fp) s.fpLocker.Unlock(m.fp)
// Queue outside of lock!
if persistHeadChunk {
s.persistQueue <- persistRequest{m.fp, m.series.head()}
}
} }
duration := time.Since(begin) duration := time.Since(begin)
@ -476,7 +493,7 @@ func (s *memorySeriesStorage) loop() {
ts := clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.purgeAfter) ts := clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.purgeAfter)
begin := time.Now() begin := time.Now()
for fp := range s.fingerprintToSeries.fpIter() { for fp := range s.fpToSeries.fpIter() {
select { select {
case <-s.loopStopping: case <-s.loopStopping:
glog.Info("Interrupted purging series.") glog.Info("Interrupted purging series.")
@ -515,18 +532,24 @@ func (s *memorySeriesStorage) purgeSeries(fp clientmodel.Fingerprint, beforeTime
defer s.fpLocker.Unlock(fp) defer s.fpLocker.Unlock(fp)
// First purge persisted chunks. We need to do that anyway. // First purge persisted chunks. We need to do that anyway.
allDropped, err := s.persistence.dropChunks(fp, beforeTime) numDropped, allDropped, err := s.persistence.dropChunks(fp, beforeTime)
if err != nil { if err != nil {
glog.Error("Error purging persisted chunks: ", err) glog.Error("Error purging persisted chunks: ", err)
} }
// Purge chunks from memory accordingly. // Purge chunks from memory accordingly.
if series, ok := s.fingerprintToSeries.get(fp); ok { if series, ok := s.fpToSeries.get(fp); ok {
if series.purgeOlderThan(beforeTime) && allDropped { numPurged, allPurged := series.purgeOlderThan(beforeTime)
s.fingerprintToSeries.del(fp) if allPurged && allDropped {
s.fpToSeries.del(fp)
s.numSeries.Dec() s.numSeries.Dec()
s.seriesOps.WithLabelValues(memoryPurge).Inc() s.seriesOps.WithLabelValues(memoryPurge).Inc()
s.persistence.unindexMetric(series.metric, fp) s.persistence.unindexMetric(series.metric, fp)
} else if series.chunkDescsOffset != -1 {
series.chunkDescsOffset += numPurged - numDropped
if series.chunkDescsOffset < 0 {
panic("dropped more chunks from persistence than from memory")
}
} }
return return
} }

View file

@ -37,7 +37,7 @@ func TestChunk(t *testing.T) {
s.AppendSamples(samples) s.AppendSamples(samples)
for m := range s.(*memorySeriesStorage).fingerprintToSeries.iter() { for m := range s.(*memorySeriesStorage).fpToSeries.iter() {
for i, v := range m.series.values() { for i, v := range m.series.values() {
if samples[i].Timestamp != v.Timestamp { if samples[i].Timestamp != v.Timestamp {
t.Fatalf("%d. Got %v; want %v", i, v.Timestamp, samples[i].Timestamp) t.Fatalf("%d. Got %v; want %v", i, v.Timestamp, samples[i].Timestamp)