Check that rules don't contain metrics with the same labelset (#6469)

Closes #5529

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
Julien Pivotto 2019-12-18 13:29:35 +01:00 committed by Brian Brazil
parent f9bf8e4e4c
commit 2d7c8069d0
4 changed files with 96 additions and 7 deletions

View file

@ -311,6 +311,7 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc,
resultFPs := map[uint64]struct{}{} resultFPs := map[uint64]struct{}{}
var vec promql.Vector var vec promql.Vector
var alerts = make(map[uint64]*Alert, len(res))
for _, smpl := range res { for _, smpl := range res {
// Provide the alert information to the template. // Provide the alert information to the template.
l := make(map[string]string, len(smpl.Metric)) l := make(map[string]string, len(smpl.Metric))
@ -361,15 +362,16 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc,
h := lbs.Hash() h := lbs.Hash()
resultFPs[h] = struct{}{} resultFPs[h] = struct{}{}
// Check whether we already have alerting state for the identifying label set. if _, ok := alerts[h]; ok {
// Update the last value and annotations if so, create a new alert entry otherwise. err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels")
if alert, ok := r.active[h]; ok && alert.State != StateInactive { // We have already acquired the lock above hence using SetHealth and
alert.Value = smpl.V // SetLastError will deadlock.
alert.Annotations = annotations r.health = HealthBad
continue r.lastError = err
return nil, err
} }
r.active[h] = &Alert{ alerts[h] = &Alert{
Labels: lbs, Labels: lbs,
Annotations: annotations, Annotations: annotations,
ActiveAt: ts, ActiveAt: ts,
@ -378,6 +380,18 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc,
} }
} }
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.active[h]; ok && alert.State != StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
continue
}
r.active[h] = a
}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries. // Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.active { for fp, a := range r.active {
if _, ok := resultFPs[fp]; !ok { if _, ok := resultFPs[fp]; !ok {

View file

@ -14,6 +14,8 @@
package rules package rules
import ( import (
"context"
"fmt"
"testing" "testing"
"time" "time"
@ -21,6 +23,7 @@ import (
"github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/pkg/timestamp"
"github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
) )
@ -289,3 +292,37 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) {
} }
testutil.Equals(t, result, filteredRes) testutil.Equals(t, result, filteredRes)
} }
func TestAlertingRuleDuplicate(t *testing.T) {
storage := teststorage.New(t)
defer storage.Close()
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxConcurrent: 10,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promql.NewEngine(opts)
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
now := time.Now()
expr, _ := promql.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`)
rule := NewAlertingRule(
"foo",
expr,
time.Minute,
labels.FromStrings("test", "test"),
nil,
nil,
true, log.NewNopLogger(),
)
_, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil)
testutil.NotOk(t, err)
e := fmt.Errorf("vector contains metrics with the same labelset after applying alert labels")
testutil.ErrorEqual(t, e, err)
}

View file

@ -93,6 +93,16 @@ func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFu
sample.Metric = lb.Labels() sample.Metric = lb.Labels()
} }
// Check that the rule does not produce identical metrics after applying
// labels.
if vector.ContainsSameLabelset() {
err = fmt.Errorf("vector contains metrics with the same labelset after applying rule labels")
rule.SetHealth(HealthBad)
rule.SetLastError(err)
return nil, err
}
rule.SetHealth(HealthGood) rule.SetHealth(HealthGood)
rule.SetLastError(err) rule.SetLastError(err)
return vector, nil return vector, nil

View file

@ -15,6 +15,7 @@ package rules
import ( import (
"context" "context"
"fmt"
"testing" "testing"
"time" "time"
@ -91,3 +92,30 @@ labels:
got := rule.HTMLSnippet("/test/prefix") got := rule.HTMLSnippet("/test/prefix")
testutil.Assert(t, want == got, "incorrect HTML snippet; want:\n\n%s\n\ngot:\n\n%s", want, got) testutil.Assert(t, want == got, "incorrect HTML snippet; want:\n\n%s\n\ngot:\n\n%s", want, got)
} }
// TestRuleEvalDuplicate tests for duplicate labels in recorded metrics, see #5529.
func TestRuleEvalDuplicate(t *testing.T) {
storage := teststorage.New(t)
defer storage.Close()
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxConcurrent: 10,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promql.NewEngine(opts)
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
now := time.Now()
expr, _ := promql.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`)
rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test"))
_, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil)
testutil.NotOk(t, err)
e := fmt.Errorf("vector contains metrics with the same labelset after applying rule labels")
testutil.ErrorEqual(t, e, err)
}