diff --git a/rules/alerting.go b/rules/alerting.go index e9f553935e..1a1f4b7964 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -311,6 +311,7 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, resultFPs := map[uint64]struct{}{} var vec promql.Vector + var alerts = make(map[uint64]*Alert, len(res)) for _, smpl := range res { // Provide the alert information to the template. l := make(map[string]string, len(smpl.Metric)) @@ -361,15 +362,16 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, h := lbs.Hash() resultFPs[h] = struct{}{} - // Check whether we already have alerting state for the identifying label set. - // Update the last value and annotations if so, create a new alert entry otherwise. - if alert, ok := r.active[h]; ok && alert.State != StateInactive { - alert.Value = smpl.V - alert.Annotations = annotations - continue + if _, ok := alerts[h]; ok { + err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + // We have already acquired the lock above hence using SetHealth and + // SetLastError will deadlock. + r.health = HealthBad + r.lastError = err + return nil, err } - r.active[h] = &Alert{ + alerts[h] = &Alert{ Labels: lbs, Annotations: annotations, ActiveAt: ts, @@ -378,6 +380,18 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } } + for h, a := range alerts { + // Check whether we already have alerting state for the identifying label set. + // Update the last value and annotations if so, create a new alert entry otherwise. + if alert, ok := r.active[h]; ok && alert.State != StateInactive { + alert.Value = a.Value + alert.Annotations = a.Annotations + continue + } + + r.active[h] = a + } + // Check if any pending alerts should be removed or fire now. Write out alert timeseries. for fp, a := range r.active { if _, ok := resultFPs[fp]; !ok { diff --git a/rules/alerting_test.go b/rules/alerting_test.go index ee95dd47e4..6e65f6c229 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -14,6 +14,8 @@ package rules import ( + "context" + "fmt" "testing" "time" @@ -21,6 +23,7 @@ import ( "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -289,3 +292,37 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { } testutil.Equals(t, result, filteredRes) } + +func TestAlertingRuleDuplicate(t *testing.T) { + storage := teststorage.New(t) + defer storage.Close() + + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxConcurrent: 10, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + + engine := promql.NewEngine(opts) + ctx, cancelCtx := context.WithCancel(context.Background()) + defer cancelCtx() + + now := time.Now() + + expr, _ := promql.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`) + rule := NewAlertingRule( + "foo", + expr, + time.Minute, + labels.FromStrings("test", "test"), + nil, + nil, + true, log.NewNopLogger(), + ) + _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil) + testutil.NotOk(t, err) + e := fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + testutil.ErrorEqual(t, e, err) +} diff --git a/rules/recording.go b/rules/recording.go index 4e3ec635b6..6c29d3217a 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -93,6 +93,16 @@ func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFu sample.Metric = lb.Labels() } + + // Check that the rule does not produce identical metrics after applying + // labels. + if vector.ContainsSameLabelset() { + err = fmt.Errorf("vector contains metrics with the same labelset after applying rule labels") + rule.SetHealth(HealthBad) + rule.SetLastError(err) + return nil, err + } + rule.SetHealth(HealthGood) rule.SetLastError(err) return vector, nil diff --git a/rules/recording_test.go b/rules/recording_test.go index 8ca607a148..4f007b4051 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -15,6 +15,7 @@ package rules import ( "context" + "fmt" "testing" "time" @@ -91,3 +92,30 @@ labels: got := rule.HTMLSnippet("/test/prefix") testutil.Assert(t, want == got, "incorrect HTML snippet; want:\n\n%s\n\ngot:\n\n%s", want, got) } + +// TestRuleEvalDuplicate tests for duplicate labels in recorded metrics, see #5529. +func TestRuleEvalDuplicate(t *testing.T) { + storage := teststorage.New(t) + defer storage.Close() + + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxConcurrent: 10, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + + engine := promql.NewEngine(opts) + ctx, cancelCtx := context.WithCancel(context.Background()) + defer cancelCtx() + + now := time.Now() + + expr, _ := promql.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`) + rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test")) + _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil) + testutil.NotOk(t, err) + e := fmt.Errorf("vector contains metrics with the same labelset after applying rule labels") + testutil.ErrorEqual(t, e, err) +}