Bring back logging of discarded samples

But only on DEBUG level.

Also, count and report the two cases of out-of-order timestamps on the
one hand and same timestamp but different value on the other hand
separately.
This commit is contained in:
beorn7 2016-04-25 16:43:52 +02:00
parent 25775b3583
commit d566808d40
3 changed files with 49 additions and 18 deletions

View file

@ -444,20 +444,31 @@ func (sl *scrapeLoop) stop() {
} }
func (sl *scrapeLoop) append(samples model.Samples) { func (sl *scrapeLoop) append(samples model.Samples) {
numOutOfOrder := 0 var (
numOutOfOrder = 0
numDuplicates = 0
)
for _, s := range samples { for _, s := range samples {
if err := sl.appender.Append(s); err != nil { if err := sl.appender.Append(s); err != nil {
if err == local.ErrOutOfOrderSample { switch err {
case local.ErrOutOfOrderSample:
numOutOfOrder++ numOutOfOrder++
} else { log.With("sample", s).With("error", err).Debug("Sample discarded")
log.Warnf("Error inserting sample: %s", err) case local.ErrDuplicateSampleForTimestamp:
numDuplicates++
log.With("sample", s).With("error", err).Debug("Sample discarded")
default:
log.With("sample", s).With("error", err).Warn("Sample discarded")
} }
} }
} }
if numOutOfOrder > 0 { if numOutOfOrder > 0 {
log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order samples") log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order samples")
} }
if numDuplicates > 0 {
log.With("numDropped", numDuplicates).Warn("Error on ingesting samples with different value but same timestamp")
}
} }
func (sl *scrapeLoop) report(start time.Time, duration time.Duration, err error) { func (sl *scrapeLoop) report(start time.Time, duration time.Duration, err error) {

View file

@ -82,6 +82,12 @@ const (
// Maintenance types for maintainSeriesDuration. // Maintenance types for maintainSeriesDuration.
maintainInMemory = "memory" maintainInMemory = "memory"
maintainArchived = "archived" maintainArchived = "archived"
discardReasonLabel = "reason"
// Reasons to discard samples.
outOfOrderTimestamp = "timestamp_out_of_order"
duplicateSample = "multiple_values_for_timestamp"
) )
func init() { func init() {

View file

@ -163,7 +163,7 @@ type memorySeriesStorage struct {
numSeries prometheus.Gauge numSeries prometheus.Gauge
seriesOps *prometheus.CounterVec seriesOps *prometheus.CounterVec
ingestedSamplesCount prometheus.Counter ingestedSamplesCount prometheus.Counter
outOfOrderSamplesCount prometheus.Counter discardedSamplesCount *prometheus.CounterVec
nonExistentSeriesMatchesCount prometheus.Counter nonExistentSeriesMatchesCount prometheus.Counter
maintainSeriesDuration *prometheus.SummaryVec maintainSeriesDuration *prometheus.SummaryVec
persistenceUrgencyScore prometheus.Gauge persistenceUrgencyScore prometheus.Gauge
@ -242,12 +242,15 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) Storage {
Name: "ingested_samples_total", Name: "ingested_samples_total",
Help: "The total number of samples ingested.", Help: "The total number of samples ingested.",
}), }),
outOfOrderSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ discardedSamplesCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "out_of_order_samples_total", Name: "out_of_order_samples_total",
Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.", Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.",
}), },
[]string{discardReasonLabel},
),
nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{ nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
@ -577,9 +580,16 @@ func (s *memorySeriesStorage) DropMetricsForFingerprints(fps ...model.Fingerprin
} }
} }
var (
// ErrOutOfOrderSample is returned if a sample has a timestamp before the latest // ErrOutOfOrderSample is returned if a sample has a timestamp before the latest
// timestamp in the series it is appended to. // timestamp in the series it is appended to.
var ErrOutOfOrderSample = fmt.Errorf("sample timestamp out of order") ErrOutOfOrderSample = fmt.Errorf("sample timestamp out of order")
// ErrDuplicateSampleForTimestamp is returned if a sample has the same
// timestamp as the latest sample in the series it is appended to but a
// different value. (Appending an identical sample is a no-op does not
// cause an error.)
ErrDuplicateSampleForTimestamp = fmt.Errorf("sample with repeated timestamp but different value")
)
// Append implements Storage. // Append implements Storage.
func (s *memorySeriesStorage) Append(sample *model.Sample) error { func (s *memorySeriesStorage) Append(sample *model.Sample) error {
@ -604,7 +614,7 @@ func (s *memorySeriesStorage) Append(sample *model.Sample) error {
return err // getOrCreateSeries took care of quarantining already. return err // getOrCreateSeries took care of quarantining already.
} }
if sample.Timestamp <= series.lastTime { if sample.Timestamp == series.lastTime {
// Don't report "no-op appends", i.e. where timestamp and sample // Don't report "no-op appends", i.e. where timestamp and sample
// value are the same as for the last append, as they are a // value are the same as for the last append, as they are a
// common occurrence when using client-side timestamps // common occurrence when using client-side timestamps
@ -614,7 +624,11 @@ func (s *memorySeriesStorage) Append(sample *model.Sample) error {
sample.Value == series.lastSampleValue { sample.Value == series.lastSampleValue {
return nil return nil
} }
s.outOfOrderSamplesCount.Inc() s.discardedSamplesCount.WithLabelValues(duplicateSample).Inc()
return ErrDuplicateSampleForTimestamp // Caused by the caller.
}
if sample.Timestamp < series.lastTime {
s.discardedSamplesCount.WithLabelValues(outOfOrderTimestamp).Inc()
return ErrOutOfOrderSample // Caused by the caller. return ErrOutOfOrderSample // Caused by the caller.
} }
completedChunksCount, err := series.add(model.SamplePair{ completedChunksCount, err := series.add(model.SamplePair{
@ -1494,7 +1508,7 @@ func (s *memorySeriesStorage) Describe(ch chan<- *prometheus.Desc) {
ch <- s.numSeries.Desc() ch <- s.numSeries.Desc()
s.seriesOps.Describe(ch) s.seriesOps.Describe(ch)
ch <- s.ingestedSamplesCount.Desc() ch <- s.ingestedSamplesCount.Desc()
ch <- s.outOfOrderSamplesCount.Desc() s.discardedSamplesCount.Describe(ch)
ch <- s.nonExistentSeriesMatchesCount.Desc() ch <- s.nonExistentSeriesMatchesCount.Desc()
ch <- numMemChunksDesc ch <- numMemChunksDesc
s.maintainSeriesDuration.Describe(ch) s.maintainSeriesDuration.Describe(ch)
@ -1521,7 +1535,7 @@ func (s *memorySeriesStorage) Collect(ch chan<- prometheus.Metric) {
ch <- s.numSeries ch <- s.numSeries
s.seriesOps.Collect(ch) s.seriesOps.Collect(ch)
ch <- s.ingestedSamplesCount ch <- s.ingestedSamplesCount
ch <- s.outOfOrderSamplesCount s.discardedSamplesCount.Collect(ch)
ch <- s.nonExistentSeriesMatchesCount ch <- s.nonExistentSeriesMatchesCount
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
numMemChunksDesc, numMemChunksDesc,