mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
Consider a series stale after 4.1 intervals with no data.
To cover the cases where stale markers may not be available, we need to infer the interval and mark series stale based on that. As we're lacking stale markers this is less accurate, however it should be good enough for these cases. We need 4 intervals as if say we had data at t=0 and t=10, coming via federation. The next data point should be at t=20 however it could take up to t=30 for it actually to be ingested, t=40 for it to be scraped via federation and t=50 for it to be ingested. We then add 10% on to that for slack, as we do elsewhere.
This commit is contained in:
parent
c02c25d5ba
commit
220e78b9c3
|
@ -751,8 +751,10 @@ func (ev *evaluator) vectorSelector(node *VectorSelector) Vector {
|
|||
}
|
||||
t, v := it.Values()
|
||||
|
||||
peek := 1
|
||||
if !ok || t > refTime {
|
||||
t, v, ok = it.PeekBack(1)
|
||||
t, v, ok = it.PeekBack(peek)
|
||||
peek += 1
|
||||
if !ok || t < refTime-durationMilliseconds(StalenessDelta) {
|
||||
continue
|
||||
}
|
||||
|
@ -760,6 +762,21 @@ func (ev *evaluator) vectorSelector(node *VectorSelector) Vector {
|
|||
if value.IsStaleNaN(v) {
|
||||
continue
|
||||
}
|
||||
// Find timestamp before this point, within the staleness delta.
|
||||
prevT, _, ok := it.PeekBack(peek)
|
||||
if ok && prevT >= refTime-durationMilliseconds(StalenessDelta) {
|
||||
interval := t - prevT
|
||||
if interval*4+interval/10 < refTime-t {
|
||||
// It is more than 4 (+10% for safety) intervals
|
||||
// since the last data point, skip as stale.
|
||||
//
|
||||
// We need 4 to allow for federation, as with a 10s einterval an eval
|
||||
// started at t=10 could be ingested at t=20, scraped for federation at
|
||||
// t=30 and only ingested by federation at t=40.
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
vec = append(vec, Sample{
|
||||
Metric: node.series[i].Labels(),
|
||||
Point: Point{V: v, T: t},
|
||||
|
|
27
promql/testdata/staleness.test
vendored
27
promql/testdata/staleness.test
vendored
|
@ -10,6 +10,15 @@ eval instant at 20s metric
|
|||
eval instant at 30s metric
|
||||
{__name__="metric"} 2
|
||||
|
||||
eval instant at 40s metric
|
||||
{__name__="metric"} 2
|
||||
|
||||
# It goes stale 4 intervals + 10% after the last sample.
|
||||
eval instant at 71s metric
|
||||
{__name__="metric"} 2
|
||||
|
||||
eval instant at 72s metric
|
||||
|
||||
|
||||
# Range vector ignores stale sample.
|
||||
eval instant at 30s count_over_time(metric[1m])
|
||||
|
@ -22,3 +31,21 @@ eval instant at 20s count_over_time(metric[1s])
|
|||
|
||||
eval instant at 20s count_over_time(metric[10s])
|
||||
{} 1
|
||||
|
||||
|
||||
clear
|
||||
|
||||
load 10s
|
||||
metric 0
|
||||
|
||||
# Series with single point goes stale after 5 minutes.
|
||||
eval instant at 0s metric
|
||||
{__name__="metric"} 0
|
||||
|
||||
eval instant at 150s metric
|
||||
{__name__="metric"} 0
|
||||
|
||||
eval instant at 300s metric
|
||||
{__name__="metric"} 0
|
||||
|
||||
eval instant at 301s metric
|
||||
|
|
|
@ -94,7 +94,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) {
|
|||
if ok {
|
||||
t, v = it.Values()
|
||||
} else {
|
||||
t, v, ok = it.PeekBack(0)
|
||||
t, v, ok = it.PeekBack(1)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue