2013-03-21 10:29:33 -07:00
|
|
|
// Copyright 2013 Prometheus Team
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package metric
|
|
|
|
|
|
|
|
import (
|
|
|
|
"code.google.com/p/goprotobuf/proto"
|
2013-04-05 09:03:45 -07:00
|
|
|
"fmt"
|
2013-03-21 10:29:33 -07:00
|
|
|
"github.com/prometheus/prometheus/coding"
|
|
|
|
"github.com/prometheus/prometheus/model"
|
|
|
|
dto "github.com/prometheus/prometheus/model/generated"
|
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
"github.com/prometheus/prometheus/storage/raw"
|
2013-04-05 09:03:45 -07:00
|
|
|
"github.com/prometheus/prometheus/storage/raw/leveldb"
|
|
|
|
"strings"
|
2013-03-21 10:29:33 -07:00
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
2013-04-28 10:01:56 -07:00
|
|
|
// CurationState contains high-level curation state information for the
|
|
|
|
// heads-up-display.
|
|
|
|
type CurationState struct {
|
|
|
|
Active bool
|
|
|
|
Name string
|
|
|
|
Limit time.Duration
|
|
|
|
Fingerprint model.Fingerprint
|
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
// watermarkFilter determines whether to include or exclude candidate
|
|
|
|
// values from the curation process by virtue of how old the high watermark is.
|
|
|
|
type watermarkFilter struct {
|
|
|
|
// curationState is the data store for curation remarks.
|
|
|
|
curationState raw.Persistence
|
|
|
|
// ignoreYoungerThan conveys this filter's policy of not working on elements
|
|
|
|
// younger than a given relative time duration. This is persisted to the
|
|
|
|
// curation remark database (curationState) to indicate how far a given
|
|
|
|
// policy of this type has progressed.
|
|
|
|
ignoreYoungerThan time.Duration
|
|
|
|
// processor is the post-processor that performs whatever action is desired on
|
|
|
|
// the data that is deemed valid to be worked on.
|
2013-05-02 03:37:24 -07:00
|
|
|
processor Processor
|
2013-04-05 09:03:45 -07:00
|
|
|
// stop functions as the global stop channel for all future operations.
|
|
|
|
stop chan bool
|
|
|
|
// stopAt is used to determine the elegibility of series for compaction.
|
|
|
|
stopAt time.Time
|
2013-04-28 10:01:56 -07:00
|
|
|
// status is the outbound channel for notifying the status page of its state.
|
|
|
|
status chan CurationState
|
2013-04-05 09:03:45 -07:00
|
|
|
}
|
|
|
|
|
2013-03-21 10:29:33 -07:00
|
|
|
// curator is responsible for effectuating a given curation policy across the
|
|
|
|
// stored samples on-disk. This is useful to compact sparse sample values into
|
|
|
|
// single sample entities to reduce keyspace load on the datastore.
|
2013-05-02 03:37:24 -07:00
|
|
|
type Curator struct {
|
2013-05-02 03:49:13 -07:00
|
|
|
// Stop functions as a channel that when empty allows the curator to operate.
|
2013-03-21 10:29:33 -07:00
|
|
|
// The moment a value is ingested inside of it, the curator goes into drain
|
|
|
|
// mode.
|
2013-05-02 03:49:13 -07:00
|
|
|
Stop chan bool
|
2013-04-05 09:03:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// watermarkDecoder converts (dto.Fingerprint, dto.MetricHighWatermark) doubles
|
|
|
|
// into (model.Fingerprint, model.Watermark) doubles.
|
|
|
|
type watermarkDecoder struct{}
|
|
|
|
|
|
|
|
// watermarkOperator scans over the curator.samples table for metrics whose
|
|
|
|
// high watermark has been determined to be allowable for curation. This type
|
|
|
|
// is individually responsible for compaction.
|
|
|
|
//
|
|
|
|
// The scanning starts from CurationRemark.LastCompletionTimestamp and goes
|
|
|
|
// forward until the stop point or end of the series is reached.
|
|
|
|
type watermarkOperator struct {
|
|
|
|
// curationState is the data store for curation remarks.
|
2013-03-21 10:29:33 -07:00
|
|
|
curationState raw.Persistence
|
2013-04-05 09:03:45 -07:00
|
|
|
// diskFrontier models the available seekable ranges for the provided
|
|
|
|
// sampleIterator.
|
|
|
|
diskFrontier diskFrontier
|
|
|
|
// ignoreYoungerThan is passed into the curation remark for the given series.
|
|
|
|
ignoreYoungerThan time.Duration
|
|
|
|
// processor is responsible for executing a given stategy on the
|
|
|
|
// to-be-operated-on series.
|
2013-05-02 03:37:24 -07:00
|
|
|
processor Processor
|
2013-04-05 09:03:45 -07:00
|
|
|
// sampleIterator is a snapshotted iterator for the time series.
|
|
|
|
sampleIterator leveldb.Iterator
|
|
|
|
// samples
|
|
|
|
samples raw.Persistence
|
|
|
|
// stopAt is a cue for when to stop mutating a given series.
|
|
|
|
stopAt time.Time
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// run facilitates the curation lifecycle.
|
2013-04-05 09:03:45 -07:00
|
|
|
//
|
|
|
|
// recencyThreshold represents the most recent time up to which values will be
|
|
|
|
// curated.
|
|
|
|
// curationState is the on-disk store where the curation remarks are made for
|
|
|
|
// how much progress has been made.
|
2013-05-02 03:37:24 -07:00
|
|
|
func (c Curator) Run(ignoreYoungerThan time.Duration, instant time.Time, processor Processor, curationState, samples, watermarks *leveldb.LevelDBPersistence, status chan CurationState) (err error) {
|
2013-04-05 09:03:45 -07:00
|
|
|
defer func(t time.Time) {
|
2013-05-07 08:14:04 -07:00
|
|
|
duration := float64(time.Since(t) / time.Millisecond)
|
2013-04-05 09:03:45 -07:00
|
|
|
|
|
|
|
labels := map[string]string{
|
|
|
|
cutOff: fmt.Sprint(ignoreYoungerThan),
|
|
|
|
processorName: processor.Name(),
|
|
|
|
result: success,
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
labels[result] = failure
|
|
|
|
}
|
|
|
|
|
|
|
|
curationDuration.IncrementBy(labels, duration)
|
|
|
|
curationDurations.Add(labels, duration)
|
|
|
|
}(time.Now())
|
2013-04-28 10:01:56 -07:00
|
|
|
defer func() {
|
|
|
|
status <- CurationState{
|
|
|
|
Active: false,
|
|
|
|
}
|
|
|
|
}()
|
2013-04-05 09:03:45 -07:00
|
|
|
|
|
|
|
iterator := samples.NewIterator(true)
|
|
|
|
defer iterator.Close()
|
|
|
|
|
|
|
|
diskFrontier, err := newDiskFrontier(iterator)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if diskFrontier == nil {
|
|
|
|
// No sample database exists; no work to do!
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2013-04-05 04:07:13 -07:00
|
|
|
decoder := watermarkDecoder{}
|
2013-04-05 09:03:45 -07:00
|
|
|
|
2013-04-05 04:07:13 -07:00
|
|
|
filter := watermarkFilter{
|
2013-04-05 09:03:45 -07:00
|
|
|
curationState: curationState,
|
|
|
|
ignoreYoungerThan: ignoreYoungerThan,
|
2013-04-28 10:01:56 -07:00
|
|
|
processor: processor,
|
|
|
|
status: status,
|
2013-05-02 03:49:13 -07:00
|
|
|
stop: c.Stop,
|
2013-04-05 09:03:45 -07:00
|
|
|
stopAt: instant.Add(-1 * ignoreYoungerThan),
|
2013-04-05 04:07:13 -07:00
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
|
|
|
|
// Right now, the ability to stop a curation is limited to the beginning of
|
|
|
|
// each fingerprint cycle. It is impractical to cease the work once it has
|
|
|
|
// begun for a given series.
|
2013-04-05 04:07:13 -07:00
|
|
|
operator := watermarkOperator{
|
2013-04-05 09:03:45 -07:00
|
|
|
curationState: curationState,
|
|
|
|
diskFrontier: *diskFrontier,
|
|
|
|
processor: processor,
|
|
|
|
ignoreYoungerThan: ignoreYoungerThan,
|
|
|
|
sampleIterator: iterator,
|
|
|
|
samples: samples,
|
|
|
|
stopAt: instant.Add(-1 * ignoreYoungerThan),
|
2013-04-05 04:07:13 -07:00
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
_, err = watermarks.ForEach(decoder, filter, operator)
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// drain instructs the curator to stop at the next convenient moment as to not
|
|
|
|
// introduce data inconsistencies.
|
2013-05-02 03:37:24 -07:00
|
|
|
func (c Curator) Drain() {
|
2013-05-02 03:49:13 -07:00
|
|
|
if len(c.Stop) == 0 {
|
|
|
|
c.Stop <- true
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w watermarkDecoder) DecodeKey(in interface{}) (out interface{}, err error) {
|
2013-04-05 09:03:45 -07:00
|
|
|
key := &dto.Fingerprint{}
|
|
|
|
bytes := in.([]byte)
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
err = proto.Unmarshal(bytes, key)
|
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
return
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
out = model.NewFingerprintFromDTO(key)
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w watermarkDecoder) DecodeValue(in interface{}) (out interface{}, err error) {
|
2013-04-05 09:03:45 -07:00
|
|
|
dto := &dto.MetricHighWatermark{}
|
|
|
|
bytes := in.([]byte)
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
err = proto.Unmarshal(bytes, dto)
|
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
return
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
out = model.NewWatermarkFromHighWatermarkDTO(dto)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
func (w watermarkFilter) shouldStop() bool {
|
|
|
|
return len(w.stop) != 0
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-05-02 03:37:24 -07:00
|
|
|
func getCurationRemark(states raw.Persistence, processor Processor, ignoreYoungerThan time.Duration, fingerprint model.Fingerprint) (remark *model.CurationRemark, err error) {
|
2013-04-05 09:03:45 -07:00
|
|
|
rawSignature, err := processor.Signature()
|
|
|
|
if err != nil {
|
|
|
|
return
|
2013-04-05 04:07:13 -07:00
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
|
|
|
|
curationKey := model.CurationKey{
|
|
|
|
Fingerprint: fingerprint,
|
|
|
|
ProcessorMessageRaw: rawSignature,
|
|
|
|
ProcessorMessageTypeName: processor.Name(),
|
|
|
|
IgnoreYoungerThan: ignoreYoungerThan,
|
|
|
|
}.ToDTO()
|
2013-04-05 04:07:13 -07:00
|
|
|
curationValue := &dto.CurationValue{}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
rawKey := coding.NewProtocolBuffer(curationKey)
|
|
|
|
|
|
|
|
has, err := states.Has(rawKey)
|
2013-03-21 10:29:33 -07:00
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
if !has {
|
|
|
|
return
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
rawCurationValue, err := states.Get(rawKey)
|
2013-03-21 10:29:33 -07:00
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
return
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
err = proto.Unmarshal(rawCurationValue, curationValue)
|
|
|
|
if err != nil {
|
|
|
|
return
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
baseRemark := model.NewCurationRemarkFromDTO(curationValue)
|
|
|
|
remark = &baseRemark
|
|
|
|
|
2013-03-21 10:29:33 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
func (w watermarkFilter) Filter(key, value interface{}) (r storage.FilterResult) {
|
2013-04-28 10:01:56 -07:00
|
|
|
fingerprint := key.(model.Fingerprint)
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
defer func() {
|
|
|
|
labels := map[string]string{
|
|
|
|
cutOff: fmt.Sprint(w.ignoreYoungerThan),
|
|
|
|
result: strings.ToLower(r.String()),
|
|
|
|
processorName: w.processor.Name(),
|
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
curationFilterOperations.Increment(labels)
|
|
|
|
}()
|
|
|
|
|
2013-04-28 10:01:56 -07:00
|
|
|
defer func() {
|
|
|
|
w.status <- CurationState{
|
|
|
|
Active: true,
|
|
|
|
Name: w.processor.Name(),
|
|
|
|
Limit: w.ignoreYoungerThan,
|
|
|
|
Fingerprint: fingerprint,
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
if w.shouldStop() {
|
|
|
|
return storage.STOP
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
curationRemark, err := getCurationRemark(w.curationState, w.processor, w.ignoreYoungerThan, fingerprint)
|
|
|
|
if err != nil {
|
2013-03-21 10:29:33 -07:00
|
|
|
return
|
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
if curationRemark == nil {
|
|
|
|
r = storage.ACCEPT
|
2013-03-21 10:29:33 -07:00
|
|
|
return
|
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
if !curationRemark.OlderThan(w.stopAt) {
|
|
|
|
return storage.SKIP
|
|
|
|
}
|
|
|
|
watermark := value.(model.Watermark)
|
|
|
|
if !curationRemark.OlderThan(watermark.Time) {
|
|
|
|
return storage.SKIP
|
|
|
|
}
|
|
|
|
curationConsistent, err := w.curationConsistent(fingerprint, watermark)
|
|
|
|
if err != nil {
|
2013-03-21 10:29:33 -07:00
|
|
|
return
|
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
if curationConsistent {
|
|
|
|
return storage.SKIP
|
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
return storage.ACCEPT
|
|
|
|
}
|
|
|
|
|
|
|
|
// curationConsistent determines whether the given metric is in a dirty state
|
|
|
|
// and needs curation.
|
|
|
|
func (w watermarkFilter) curationConsistent(f model.Fingerprint, watermark model.Watermark) (consistent bool, err error) {
|
|
|
|
curationRemark, err := getCurationRemark(w.curationState, w.processor, w.ignoreYoungerThan, f)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if !curationRemark.OlderThan(watermark.Time) {
|
|
|
|
consistent = true
|
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
func (w watermarkOperator) Operate(key, _ interface{}) (oErr *storage.OperatorError) {
|
|
|
|
fingerprint := key.(model.Fingerprint)
|
|
|
|
|
|
|
|
seriesFrontier, err := newSeriesFrontier(fingerprint, w.diskFrontier, w.sampleIterator)
|
|
|
|
if err != nil || seriesFrontier == nil {
|
|
|
|
// An anomaly with the series frontier is severe in the sense that some sort
|
|
|
|
// of an illegal state condition exists in the storage layer, which would
|
|
|
|
// probably signify an illegal disk frontier.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: err, Continuable: false}
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
curationState, err := getCurationRemark(w.curationState, w.processor, w.ignoreYoungerThan, fingerprint)
|
|
|
|
if err != nil {
|
|
|
|
// An anomaly with the curation remark is likely not fatal in the sense that
|
|
|
|
// there was a decoding error with the entity and shouldn't be cause to stop
|
|
|
|
// work. The process will simply start from a pessimistic work time and
|
|
|
|
// work forward. With an idempotent processor, this is safe.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: err, Continuable: true}
|
2013-04-05 09:03:45 -07:00
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
startKey := model.SampleKey{
|
|
|
|
Fingerprint: fingerprint,
|
|
|
|
FirstTimestamp: seriesFrontier.optimalStartTime(curationState),
|
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
prospectiveKey, err := coding.NewProtocolBuffer(startKey.ToDTO()).Encode()
|
|
|
|
if err != nil {
|
|
|
|
// An encoding failure of a key is no reason to stop.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: err, Continuable: true}
|
2013-04-05 09:03:45 -07:00
|
|
|
}
|
|
|
|
if !w.sampleIterator.Seek(prospectiveKey) {
|
|
|
|
// LevelDB is picky about the seek ranges. If an iterator was invalidated,
|
|
|
|
// no work may occur, and the iterator cannot be recovered.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: fmt.Errorf("Illegal Condition: Iterator invalidated due to seek range."), Continuable: false}
|
2013-04-05 09:03:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
newestAllowedSample := w.stopAt
|
|
|
|
if !newestAllowedSample.Before(seriesFrontier.lastSupertime) {
|
|
|
|
newestAllowedSample = seriesFrontier.lastSupertime
|
|
|
|
}
|
2013-03-21 10:29:33 -07:00
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
lastTime, err := w.processor.Apply(w.sampleIterator, w.samples, newestAllowedSample, fingerprint)
|
2013-03-21 10:29:33 -07:00
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
// We can't divine the severity of a processor error without refactoring the
|
|
|
|
// interface.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: err, Continuable: false}
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
err = w.refreshCurationRemark(fingerprint, lastTime)
|
2013-03-21 10:29:33 -07:00
|
|
|
if err != nil {
|
2013-04-05 09:03:45 -07:00
|
|
|
// Under the assumption that the processors are idempotent, they can be
|
|
|
|
// re-run; thusly, the commitment of the curation remark is no cause
|
|
|
|
// to cease further progress.
|
2013-04-28 11:09:30 -07:00
|
|
|
return &storage.OperatorError{error: err, Continuable: true}
|
2013-03-21 10:29:33 -07:00
|
|
|
}
|
|
|
|
|
2013-04-05 09:03:45 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w watermarkOperator) refreshCurationRemark(f model.Fingerprint, finished time.Time) (err error) {
|
|
|
|
signature, err := w.processor.Signature()
|
|
|
|
if err != nil {
|
2013-03-21 10:29:33 -07:00
|
|
|
return
|
|
|
|
}
|
2013-04-05 09:03:45 -07:00
|
|
|
curationKey := model.CurationKey{
|
|
|
|
Fingerprint: f,
|
|
|
|
ProcessorMessageRaw: signature,
|
|
|
|
ProcessorMessageTypeName: w.processor.Name(),
|
|
|
|
IgnoreYoungerThan: w.ignoreYoungerThan,
|
|
|
|
}.ToDTO()
|
|
|
|
curationValue := model.CurationRemark{
|
|
|
|
LastCompletionTimestamp: finished,
|
|
|
|
}.ToDTO()
|
|
|
|
|
|
|
|
err = w.curationState.Put(coding.NewProtocolBuffer(curationKey), coding.NewProtocolBuffer(curationValue))
|
2013-03-21 10:29:33 -07:00
|
|
|
|
|
|
|
return
|
|
|
|
}
|