mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Track staleness per rule rather than per group.
This commit is contained in:
parent
0451d6d31b
commit
9bc68db7e6
|
@ -129,8 +129,8 @@ type Group struct {
|
||||||
name string
|
name string
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
rules []Rule
|
rules []Rule
|
||||||
|
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
|
||||||
opts *ManagerOptions
|
opts *ManagerOptions
|
||||||
seriesInPreviousEval map[string]labels.Labels
|
|
||||||
|
|
||||||
done chan struct{}
|
done chan struct{}
|
||||||
terminated chan struct{}
|
terminated chan struct{}
|
||||||
|
@ -143,7 +143,7 @@ func NewGroup(name string, interval time.Duration, rules []Rule, opts *ManagerOp
|
||||||
interval: interval,
|
interval: interval,
|
||||||
rules: rules,
|
rules: rules,
|
||||||
opts: opts,
|
opts: opts,
|
||||||
seriesInPreviousEval: map[string]labels.Labels{},
|
seriesInPreviousEval: make([]map[string]labels.Labels, len(rules)),
|
||||||
done: make(chan struct{}),
|
done: make(chan struct{}),
|
||||||
terminated: make(chan struct{}),
|
terminated: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
@ -259,17 +259,15 @@ func typeForRule(r Rule) ruleType {
|
||||||
// rule dependency.
|
// rule dependency.
|
||||||
func (g *Group) Eval(ts time.Time) {
|
func (g *Group) Eval(ts time.Time) {
|
||||||
var (
|
var (
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
mu sync.Mutex
|
|
||||||
seriesReturned = make(map[string]labels.Labels, len(g.seriesInPreviousEval))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, rule := range g.rules {
|
for i, rule := range g.rules {
|
||||||
rtyp := string(typeForRule(rule))
|
rtyp := string(typeForRule(rule))
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
// BUG(julius): Look at fixing thundering herd.
|
// BUG(julius): Look at fixing thundering herd.
|
||||||
go func(rule Rule) {
|
go func(i int, rule Rule) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
defer func(t time.Time) {
|
defer func(t time.Time) {
|
||||||
|
@ -303,6 +301,7 @@ func (g *Group) Eval(ts time.Time) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
|
||||||
for _, s := range vector {
|
for _, s := range vector {
|
||||||
if _, err := app.Add(s.Metric, s.T, s.V); err != nil {
|
if _, err := app.Add(s.Metric, s.T, s.V); err != nil {
|
||||||
switch err {
|
switch err {
|
||||||
|
@ -316,9 +315,7 @@ func (g *Group) Eval(ts time.Time) {
|
||||||
log.With("sample", s).With("err", err).Warn("Rule evaluation result discarded")
|
log.With("sample", s).With("err", err).Warn("Rule evaluation result discarded")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mu.Lock()
|
|
||||||
seriesReturned[s.Metric.String()] = s.Metric
|
seriesReturned[s.Metric.String()] = s.Metric
|
||||||
mu.Unlock()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if numOutOfOrder > 0 {
|
if numOutOfOrder > 0 {
|
||||||
|
@ -327,41 +324,29 @@ func (g *Group) Eval(ts time.Time) {
|
||||||
if numDuplicates > 0 {
|
if numDuplicates > 0 {
|
||||||
log.With("numDropped", numDuplicates).Warn("Error on ingesting results from rule evaluation with different value but same timestamp")
|
log.With("numDropped", numDuplicates).Warn("Error on ingesting results from rule evaluation with different value but same timestamp")
|
||||||
}
|
}
|
||||||
if err := app.Commit(); err != nil {
|
|
||||||
log.With("err", err).Warn("rule sample appending failed")
|
|
||||||
}
|
|
||||||
}(rule)
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
// TODO(bbrazil): This should apply per-rule.
|
for metric, lset := range g.seriesInPreviousEval[i] {
|
||||||
app, err := g.opts.Appendable.Appender()
|
if _, ok := seriesReturned[metric]; !ok {
|
||||||
if err != nil {
|
// Series no longer exposed, mark it stale.
|
||||||
log.With("err", err).Warn("creating appender failed")
|
_, err = app.Add(lset, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN))
|
||||||
return
|
switch err {
|
||||||
}
|
case nil:
|
||||||
for metric, lset := range g.seriesInPreviousEval {
|
case storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp:
|
||||||
if _, ok := seriesReturned[metric]; !ok {
|
// Do not count these in logging, as this is expected if series
|
||||||
// Series no longer exposed, mark it stale.
|
// is exposed from a different rule.
|
||||||
_, err = app.Add(lset, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN))
|
default:
|
||||||
switch err {
|
log.With("sample", metric).With("err", err).Warn("adding stale sample failed")
|
||||||
case nil:
|
}
|
||||||
case storage.ErrOutOfOrderSample, storage.ErrDuplicateSampleForTimestamp:
|
|
||||||
// Do not count these in logging, as this is expected if series
|
|
||||||
// is exposed from a different group.
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
log.With("sample", metric).With("err", err).Warn("adding stale sample failed")
|
|
||||||
if err := app.Rollback(); err != nil {
|
|
||||||
log.With("err", err).Warn("rule stale sample rollback failed")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if err := app.Commit(); err != nil {
|
||||||
|
log.With("err", err).Warn("rule sample appending failed")
|
||||||
|
} else {
|
||||||
|
g.seriesInPreviousEval[i] = seriesReturned
|
||||||
|
}
|
||||||
|
}(i, rule)
|
||||||
}
|
}
|
||||||
if err := app.Commit(); err != nil {
|
wg.Wait()
|
||||||
log.With("err", err).Warn("rule stale sample appending failed")
|
|
||||||
}
|
|
||||||
g.seriesInPreviousEval = seriesReturned
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// sendAlerts sends alert notifications for the given rule.
|
// sendAlerts sends alert notifications for the given rule.
|
||||||
|
|
Loading…
Reference in a new issue