Merge pull request #1591 from prometheus/beorn7/logging

Bring back logging of discarded samples
This commit is contained in:
Björn Rabenstein 2016-04-26 01:47:24 +02:00
commit 95a7a58eb5
3 changed files with 68 additions and 18 deletions

View file

@ -444,20 +444,31 @@ func (sl *scrapeLoop) stop() {
} }
func (sl *scrapeLoop) append(samples model.Samples) { func (sl *scrapeLoop) append(samples model.Samples) {
numOutOfOrder := 0 var (
numOutOfOrder = 0
numDuplicates = 0
)
for _, s := range samples { for _, s := range samples {
if err := sl.appender.Append(s); err != nil { if err := sl.appender.Append(s); err != nil {
if err == local.ErrOutOfOrderSample { switch err {
case local.ErrOutOfOrderSample:
numOutOfOrder++ numOutOfOrder++
} else { log.With("sample", s).With("error", err).Debug("Sample discarded")
log.Warnf("Error inserting sample: %s", err) case local.ErrDuplicateSampleForTimestamp:
numDuplicates++
log.With("sample", s).With("error", err).Debug("Sample discarded")
default:
log.With("sample", s).With("error", err).Warn("Sample discarded")
} }
} }
} }
if numOutOfOrder > 0 { if numOutOfOrder > 0 {
log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order samples") log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order samples")
} }
if numDuplicates > 0 {
log.With("numDropped", numDuplicates).Warn("Error on ingesting samples with different value but same timestamp")
}
} }
func (sl *scrapeLoop) report(start time.Time, duration time.Duration, err error) { func (sl *scrapeLoop) report(start time.Time, duration time.Duration, err error) {

View file

@ -82,6 +82,12 @@ const (
// Maintenance types for maintainSeriesDuration. // Maintenance types for maintainSeriesDuration.
maintainInMemory = "memory" maintainInMemory = "memory"
maintainArchived = "archived" maintainArchived = "archived"
discardReasonLabel = "reason"
// Reasons to discard samples.
outOfOrderTimestamp = "timestamp_out_of_order"
duplicateSample = "multiple_values_for_timestamp"
) )
func init() { func init() {

View file

@ -163,7 +163,7 @@ type memorySeriesStorage struct {
numSeries prometheus.Gauge numSeries prometheus.Gauge
seriesOps *prometheus.CounterVec seriesOps *prometheus.CounterVec
ingestedSamplesCount prometheus.Counter ingestedSamplesCount prometheus.Counter
outOfOrderSamplesCount prometheus.Counter discardedSamplesCount *prometheus.CounterVec
nonExistentSeriesMatchesCount prometheus.Counter nonExistentSeriesMatchesCount prometheus.Counter
maintainSeriesDuration *prometheus.SummaryVec maintainSeriesDuration *prometheus.SummaryVec
persistenceUrgencyScore prometheus.Gauge persistenceUrgencyScore prometheus.Gauge
@ -242,12 +242,15 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) Storage {
Name: "ingested_samples_total", Name: "ingested_samples_total",
Help: "The total number of samples ingested.", Help: "The total number of samples ingested.",
}), }),
outOfOrderSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ discardedSamplesCount: prometheus.NewCounterVec(
Namespace: namespace, prometheus.CounterOpts{
Subsystem: subsystem, Namespace: namespace,
Name: "out_of_order_samples_total", Subsystem: subsystem,
Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.", Name: "out_of_order_samples_total",
}), Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.",
},
[]string{discardReasonLabel},
),
nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{ nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
@ -276,6 +279,25 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) Storage {
Help: "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.", Help: "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.",
}), }),
} }
// Initialize metric vectors.
// TODO(beorn7): Rework once we have a utility function for it in client_golang.
s.discardedSamplesCount.WithLabelValues(outOfOrderTimestamp)
s.discardedSamplesCount.WithLabelValues(duplicateSample)
s.maintainSeriesDuration.WithLabelValues(maintainInMemory)
s.maintainSeriesDuration.WithLabelValues(maintainArchived)
s.seriesOps.WithLabelValues(create)
s.seriesOps.WithLabelValues(archive)
s.seriesOps.WithLabelValues(unarchive)
s.seriesOps.WithLabelValues(memoryPurge)
s.seriesOps.WithLabelValues(archivePurge)
s.seriesOps.WithLabelValues(requestedPurge)
s.seriesOps.WithLabelValues(memoryMaintenance)
s.seriesOps.WithLabelValues(archiveMaintenance)
s.seriesOps.WithLabelValues(completedQurantine)
s.seriesOps.WithLabelValues(droppedQuarantine)
s.seriesOps.WithLabelValues(failedQuarantine)
return s return s
} }
@ -577,9 +599,16 @@ func (s *memorySeriesStorage) DropMetricsForFingerprints(fps ...model.Fingerprin
} }
} }
// ErrOutOfOrderSample is returned if a sample has a timestamp before the latest var (
// timestamp in the series it is appended to. // ErrOutOfOrderSample is returned if a sample has a timestamp before the latest
var ErrOutOfOrderSample = fmt.Errorf("sample timestamp out of order") // timestamp in the series it is appended to.
ErrOutOfOrderSample = fmt.Errorf("sample timestamp out of order")
// ErrDuplicateSampleForTimestamp is returned if a sample has the same
// timestamp as the latest sample in the series it is appended to but a
// different value. (Appending an identical sample is a no-op and does
// not cause an error.)
ErrDuplicateSampleForTimestamp = fmt.Errorf("sample with repeated timestamp but different value")
)
// Append implements Storage. // Append implements Storage.
func (s *memorySeriesStorage) Append(sample *model.Sample) error { func (s *memorySeriesStorage) Append(sample *model.Sample) error {
@ -604,7 +633,7 @@ func (s *memorySeriesStorage) Append(sample *model.Sample) error {
return err // getOrCreateSeries took care of quarantining already. return err // getOrCreateSeries took care of quarantining already.
} }
if sample.Timestamp <= series.lastTime { if sample.Timestamp == series.lastTime {
// Don't report "no-op appends", i.e. where timestamp and sample // Don't report "no-op appends", i.e. where timestamp and sample
// value are the same as for the last append, as they are a // value are the same as for the last append, as they are a
// common occurrence when using client-side timestamps // common occurrence when using client-side timestamps
@ -614,7 +643,11 @@ func (s *memorySeriesStorage) Append(sample *model.Sample) error {
sample.Value == series.lastSampleValue { sample.Value == series.lastSampleValue {
return nil return nil
} }
s.outOfOrderSamplesCount.Inc() s.discardedSamplesCount.WithLabelValues(duplicateSample).Inc()
return ErrDuplicateSampleForTimestamp // Caused by the caller.
}
if sample.Timestamp < series.lastTime {
s.discardedSamplesCount.WithLabelValues(outOfOrderTimestamp).Inc()
return ErrOutOfOrderSample // Caused by the caller. return ErrOutOfOrderSample // Caused by the caller.
} }
completedChunksCount, err := series.add(model.SamplePair{ completedChunksCount, err := series.add(model.SamplePair{
@ -1494,7 +1527,7 @@ func (s *memorySeriesStorage) Describe(ch chan<- *prometheus.Desc) {
ch <- s.numSeries.Desc() ch <- s.numSeries.Desc()
s.seriesOps.Describe(ch) s.seriesOps.Describe(ch)
ch <- s.ingestedSamplesCount.Desc() ch <- s.ingestedSamplesCount.Desc()
ch <- s.outOfOrderSamplesCount.Desc() s.discardedSamplesCount.Describe(ch)
ch <- s.nonExistentSeriesMatchesCount.Desc() ch <- s.nonExistentSeriesMatchesCount.Desc()
ch <- numMemChunksDesc ch <- numMemChunksDesc
s.maintainSeriesDuration.Describe(ch) s.maintainSeriesDuration.Describe(ch)
@ -1521,7 +1554,7 @@ func (s *memorySeriesStorage) Collect(ch chan<- prometheus.Metric) {
ch <- s.numSeries ch <- s.numSeries
s.seriesOps.Collect(ch) s.seriesOps.Collect(ch)
ch <- s.ingestedSamplesCount ch <- s.ingestedSamplesCount
ch <- s.outOfOrderSamplesCount s.discardedSamplesCount.Collect(ch)
ch <- s.nonExistentSeriesMatchesCount ch <- s.nonExistentSeriesMatchesCount
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
numMemChunksDesc, numMemChunksDesc,