mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-28 06:03:04 -08:00
Reorganize maintenance loop.
Change-Id: Iac10f988ba3e93ffb188f49c30f92e0b6adce5a3
This commit is contained in:
parent
a5f56639b8
commit
3f61d304ce
2
main.go
2
main.go
|
@ -55,7 +55,6 @@ var (
|
||||||
memoryEvictionInterval = flag.Duration("storage.memory.evictionInterval", 15*time.Minute, "The period at which old data is evicted from memory.")
|
memoryEvictionInterval = flag.Duration("storage.memory.evictionInterval", 15*time.Minute, "The period at which old data is evicted from memory.")
|
||||||
memoryRetentionPeriod = flag.Duration("storage.memory.retentionPeriod", time.Hour, "The period of time to retain in memory during evictions.")
|
memoryRetentionPeriod = flag.Duration("storage.memory.retentionPeriod", time.Hour, "The period of time to retain in memory during evictions.")
|
||||||
|
|
||||||
storagePurgeInterval = flag.Duration("storage.purgeInterval", time.Hour, "The period at which old data is deleted completely from storage.")
|
|
||||||
storageRetentionPeriod = flag.Duration("storage.retentionPeriod", 15*24*time.Hour, "The period of time to retain in storage.")
|
storageRetentionPeriod = flag.Duration("storage.retentionPeriod", 15*24*time.Hour, "The period of time to retain in storage.")
|
||||||
|
|
||||||
checkpointInterval = flag.Duration("storage.checkpointInterval", 5*time.Minute, "The period at which the in-memory index of time series is checkpointed.")
|
checkpointInterval = flag.Duration("storage.checkpointInterval", 5*time.Minute, "The period at which the in-memory index of time series is checkpointed.")
|
||||||
|
@ -119,7 +118,6 @@ func NewPrometheus() *prometheus {
|
||||||
MemoryEvictionInterval: *memoryEvictionInterval,
|
MemoryEvictionInterval: *memoryEvictionInterval,
|
||||||
MemoryRetentionPeriod: *memoryRetentionPeriod,
|
MemoryRetentionPeriod: *memoryRetentionPeriod,
|
||||||
PersistenceStoragePath: *metricsStoragePath,
|
PersistenceStoragePath: *metricsStoragePath,
|
||||||
PersistencePurgeInterval: *storagePurgeInterval,
|
|
||||||
PersistenceRetentionPeriod: *storageRetentionPeriod,
|
PersistenceRetentionPeriod: *storageRetentionPeriod,
|
||||||
CheckpointInterval: *checkpointInterval,
|
CheckpointInterval: *checkpointInterval,
|
||||||
Dirty: *storageDirty,
|
Dirty: *storageDirty,
|
||||||
|
|
|
@ -50,7 +50,7 @@ type memorySeriesStorage struct {
|
||||||
|
|
||||||
loopStopping, loopStopped chan struct{}
|
loopStopping, loopStopped chan struct{}
|
||||||
evictInterval, evictAfter time.Duration
|
evictInterval, evictAfter time.Duration
|
||||||
purgeInterval, purgeAfter time.Duration
|
purgeAfter time.Duration
|
||||||
checkpointInterval time.Duration
|
checkpointInterval time.Duration
|
||||||
|
|
||||||
persistQueue chan persistRequest
|
persistQueue chan persistRequest
|
||||||
|
@ -74,7 +74,6 @@ type MemorySeriesStorageOptions struct {
|
||||||
MemoryEvictionInterval time.Duration // How often to check for memory eviction.
|
MemoryEvictionInterval time.Duration // How often to check for memory eviction.
|
||||||
MemoryRetentionPeriod time.Duration // Chunks at least that old are evicted from memory.
|
MemoryRetentionPeriod time.Duration // Chunks at least that old are evicted from memory.
|
||||||
PersistenceStoragePath string // Location of persistence files.
|
PersistenceStoragePath string // Location of persistence files.
|
||||||
PersistencePurgeInterval time.Duration // How often to check for purging.
|
|
||||||
PersistenceRetentionPeriod time.Duration // Chunks at least that old are purged.
|
PersistenceRetentionPeriod time.Duration // Chunks at least that old are purged.
|
||||||
CheckpointInterval time.Duration // How often to checkpoint the series map and head chunks.
|
CheckpointInterval time.Duration // How often to checkpoint the series map and head chunks.
|
||||||
Dirty bool // Force the storage to consider itself dirty on startup.
|
Dirty bool // Force the storage to consider itself dirty on startup.
|
||||||
|
@ -109,7 +108,6 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) (Storage, error) {
|
||||||
loopStopped: make(chan struct{}),
|
loopStopped: make(chan struct{}),
|
||||||
evictInterval: o.MemoryEvictionInterval,
|
evictInterval: o.MemoryEvictionInterval,
|
||||||
evictAfter: o.MemoryRetentionPeriod,
|
evictAfter: o.MemoryRetentionPeriod,
|
||||||
purgeInterval: o.PersistencePurgeInterval,
|
|
||||||
purgeAfter: o.PersistenceRetentionPeriod,
|
purgeAfter: o.PersistenceRetentionPeriod,
|
||||||
checkpointInterval: o.CheckpointInterval,
|
checkpointInterval: o.CheckpointInterval,
|
||||||
|
|
||||||
|
@ -432,22 +430,111 @@ func (s *memorySeriesStorage) handlePersistQueue() {
|
||||||
close(s.persistStopped)
|
close(s.persistStopped)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// waitForNextFP waits an estimated duration, after which we want to process
|
||||||
|
// another fingerprint so that we will process all fingerprints in a tenth of
|
||||||
|
// s.purgeAfter, e.g. if we want to purge after 10d, we want to cycle through
|
||||||
|
// all fingerprints within 1d. However, this method will always wait for at
|
||||||
|
// least 10ms and never longer than 1m. If s.loopStopped is closed, it will
|
||||||
|
// return false immediately. The estimation is based on the total number of
|
||||||
|
// fingerprints as passed in.
|
||||||
|
func (s *memorySeriesStorage) waitForNextFP(numberOfFPs int) bool {
|
||||||
|
d := time.Minute
|
||||||
|
if numberOfFPs != 0 {
|
||||||
|
d = s.purgeAfter / time.Duration(numberOfFPs*10)
|
||||||
|
if d < 10*time.Millisecond {
|
||||||
|
d = 10 * time.Millisecond
|
||||||
|
}
|
||||||
|
if d > time.Minute {
|
||||||
|
d = time.Minute
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t := time.NewTimer(d)
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
return true
|
||||||
|
case <-s.loopStopping:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *memorySeriesStorage) loop() {
|
func (s *memorySeriesStorage) loop() {
|
||||||
evictTicker := time.NewTicker(s.evictInterval)
|
evictTicker := time.NewTicker(s.evictInterval)
|
||||||
purgeTicker := time.NewTicker(s.purgeInterval)
|
|
||||||
checkpointTicker := time.NewTicker(s.checkpointInterval)
|
checkpointTicker := time.NewTicker(s.checkpointInterval)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
evictTicker.Stop()
|
evictTicker.Stop()
|
||||||
purgeTicker.Stop()
|
|
||||||
checkpointTicker.Stop()
|
checkpointTicker.Stop()
|
||||||
glog.Info("Maintenance loop stopped.")
|
glog.Info("Maintenance loop stopped.")
|
||||||
close(s.loopStopped)
|
close(s.loopStopped)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
memoryFingerprints := make(chan clientmodel.Fingerprint)
|
||||||
|
go func() {
|
||||||
|
var fpIter <-chan clientmodel.Fingerprint
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if fpIter != nil {
|
||||||
|
for _ = range fpIter {
|
||||||
|
// Consume the iterator.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(memoryFingerprints)
|
||||||
|
}()
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Initial wait, also important if there are no FPs yet.
|
||||||
|
if !s.waitForNextFP(s.fpToSeries.length()) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
begun := time.Now()
|
||||||
|
fpIter = s.fpToSeries.fpIter()
|
||||||
|
for fp := range fpIter {
|
||||||
|
select {
|
||||||
|
case memoryFingerprints <- fp:
|
||||||
|
case <-s.loopStopping:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.waitForNextFP(s.fpToSeries.length())
|
||||||
|
}
|
||||||
|
glog.Infof("Completed maintenance sweep through in-memory fingerprints in %v.", time.Since(begun))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
archivedFingerprints := make(chan clientmodel.Fingerprint)
|
||||||
|
go func() {
|
||||||
|
defer close(archivedFingerprints)
|
||||||
|
|
||||||
|
for {
|
||||||
|
archivedFPs, err := s.persistence.getFingerprintsModifiedBefore(
|
||||||
|
clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.purgeAfter),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
glog.Error("Failed to lookup archived fingerprint ranges: ", err)
|
||||||
|
s.waitForNextFP(0)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Initial wait, also important if there are no FPs yet.
|
||||||
|
if !s.waitForNextFP(len(archivedFPs)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
begun := time.Now()
|
||||||
|
for _, fp := range archivedFPs {
|
||||||
|
select {
|
||||||
|
case archivedFingerprints <- fp:
|
||||||
|
case <-s.loopStopping:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.waitForNextFP(len(archivedFPs))
|
||||||
|
}
|
||||||
|
glog.Infof("Completed maintenance sweep through archived fingerprints in %v.", time.Since(begun))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
loop:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-s.loopStopping:
|
case <-s.loopStopping:
|
||||||
return
|
break loop
|
||||||
case <-checkpointTicker.C:
|
case <-checkpointTicker.C:
|
||||||
s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
|
s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
|
||||||
case <-evictTicker.C:
|
case <-evictTicker.C:
|
||||||
|
@ -459,7 +546,7 @@ func (s *memorySeriesStorage) loop() {
|
||||||
select {
|
select {
|
||||||
case <-s.loopStopping:
|
case <-s.loopStopping:
|
||||||
glog.Info("Interrupted evicting chunks.")
|
glog.Info("Interrupted evicting chunks.")
|
||||||
return
|
break loop
|
||||||
default:
|
default:
|
||||||
// Keep going.
|
// Keep going.
|
||||||
}
|
}
|
||||||
|
@ -488,39 +575,17 @@ func (s *memorySeriesStorage) loop() {
|
||||||
duration := time.Since(begin)
|
duration := time.Since(begin)
|
||||||
s.evictDuration.Set(float64(duration) / float64(time.Millisecond))
|
s.evictDuration.Set(float64(duration) / float64(time.Millisecond))
|
||||||
glog.Infof("Done evicting chunks in %v.", duration)
|
glog.Infof("Done evicting chunks in %v.", duration)
|
||||||
case <-purgeTicker.C:
|
case fp := <-memoryFingerprints:
|
||||||
glog.Info("Purging old series data...")
|
s.purgeSeries(fp, clientmodel.TimestampFromTime(time.Now()).Add(-1*s.purgeAfter))
|
||||||
ts := clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.purgeAfter)
|
// TODO: Move chunkdesc eviction and archiving here.
|
||||||
begin := time.Now()
|
case fp := <-archivedFingerprints:
|
||||||
|
s.purgeSeries(fp, clientmodel.TimestampFromTime(time.Now()).Add(-1*s.purgeAfter))
|
||||||
for fp := range s.fpToSeries.fpIter() {
|
|
||||||
select {
|
|
||||||
case <-s.loopStopping:
|
|
||||||
glog.Info("Interrupted purging series.")
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
s.purgeSeries(fp, ts)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Wait until both channels are closed.
|
||||||
persistedFPs, err := s.persistence.getFingerprintsModifiedBefore(ts)
|
for channelStillOpen := true; channelStillOpen; _, channelStillOpen = <-memoryFingerprints {
|
||||||
if err != nil {
|
|
||||||
glog.Error("Failed to lookup persisted fingerprint ranges: ", err)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
for _, fp := range persistedFPs {
|
|
||||||
select {
|
|
||||||
case <-s.loopStopping:
|
|
||||||
glog.Info("Interrupted purging series.")
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
s.purgeSeries(fp, ts)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
duration := time.Since(begin)
|
|
||||||
s.purgeDuration.Set(float64(duration) / float64(time.Millisecond))
|
|
||||||
glog.Infof("Done purging old series data in %v.", duration)
|
|
||||||
}
|
}
|
||||||
|
for channelStillOpen := true; channelStillOpen; _, channelStillOpen = <-archivedFingerprints {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,6 @@ func TestLoop(t *testing.T) {
|
||||||
o := &MemorySeriesStorageOptions{
|
o := &MemorySeriesStorageOptions{
|
||||||
MemoryEvictionInterval: 100 * time.Millisecond,
|
MemoryEvictionInterval: 100 * time.Millisecond,
|
||||||
MemoryRetentionPeriod: time.Hour,
|
MemoryRetentionPeriod: time.Hour,
|
||||||
PersistencePurgeInterval: 150 * time.Millisecond,
|
|
||||||
PersistenceRetentionPeriod: 24 * 7 * time.Hour,
|
PersistenceRetentionPeriod: 24 * 7 * time.Hour,
|
||||||
PersistenceStoragePath: directory.Path(),
|
PersistenceStoragePath: directory.Path(),
|
||||||
CheckpointInterval: 250 * time.Millisecond,
|
CheckpointInterval: 250 * time.Millisecond,
|
||||||
|
@ -492,7 +491,6 @@ func BenchmarkFuzz(b *testing.B) {
|
||||||
o := &MemorySeriesStorageOptions{
|
o := &MemorySeriesStorageOptions{
|
||||||
MemoryEvictionInterval: time.Second,
|
MemoryEvictionInterval: time.Second,
|
||||||
MemoryRetentionPeriod: 10 * time.Minute,
|
MemoryRetentionPeriod: 10 * time.Minute,
|
||||||
PersistencePurgeInterval: 10 * time.Second,
|
|
||||||
PersistenceRetentionPeriod: time.Hour,
|
PersistenceRetentionPeriod: time.Hour,
|
||||||
PersistenceStoragePath: directory.Path(),
|
PersistenceStoragePath: directory.Path(),
|
||||||
CheckpointInterval: 3 * time.Second,
|
CheckpointInterval: 3 * time.Second,
|
||||||
|
|
|
@ -38,7 +38,6 @@ func NewTestStorage(t testing.TB) (Storage, test.Closer) {
|
||||||
o := &MemorySeriesStorageOptions{
|
o := &MemorySeriesStorageOptions{
|
||||||
MemoryEvictionInterval: time.Minute,
|
MemoryEvictionInterval: time.Minute,
|
||||||
MemoryRetentionPeriod: time.Hour,
|
MemoryRetentionPeriod: time.Hour,
|
||||||
PersistencePurgeInterval: time.Hour,
|
|
||||||
PersistenceRetentionPeriod: 24 * 7 * time.Hour,
|
PersistenceRetentionPeriod: 24 * 7 * time.Hour,
|
||||||
PersistenceStoragePath: directory.Path(),
|
PersistenceStoragePath: directory.Path(),
|
||||||
CheckpointInterval: time.Hour,
|
CheckpointInterval: time.Hour,
|
||||||
|
|
Loading…
Reference in a new issue