From 276201598c18b0ddb62defb9badebadc000c4e4a Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 12:27:35 +0100 Subject: [PATCH] Fix tests and a bug with the series lookup logic. Signed-off-by: gotjosh --- rules/alerting_test.go | 26 ++++--- rules/group.go | 17 ++--- rules/manager_test.go | 162 +++++++++++++++++++++-------------------- 3 files changed, 105 insertions(+), 100 deletions(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index ddfe345ef..366297b4a 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -710,19 +710,21 @@ func TestQueryForStateSeries(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - alert := &Alert{ - State: 0, - Labels: labels.EmptyLabels(), - Annotations: labels.EmptyLabels(), - Value: 0, - ActiveAt: time.Time{}, - FiredAt: time.Time{}, - ResolvedAt: time.Time{}, - LastSentAt: time.Time{}, - ValidUntil: time.Time{}, - } + sample := rule.forStateSample(nil, time.Time{}, 0) + var matchersCount int + sample.Metric.Range(func(l labels.Label) { + matchersCount++ + }) - series, err := rule.QueryforStateSeries(context.Background(), alert, querier) + seriesSet, err := rule.QueryforStateSeries(context.Background(), querier) + + var series storage.Series + for seriesSet.Next() { + if seriesSet.At().Labels().Len() == matchersCount { + series = seriesSet.At() + break + } + } require.Equal(t, tst.expectedSeries, series) require.Equal(t, tst.expectedError, err) diff --git a/rules/group.go b/rules/group.go index 7afeaf96e..411ca68e2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,21 +681,18 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + result := map[uint64]storage.Series{} + for sset.Next() { + result[sset.At().Labels().DropMetricName().Hash()] = sset.At() + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - // Find the series for the given alert from the set. - for sset.Next() { - if sset.At().Labels().Hash() == a.Labels.Hash() { - s = sset.At() - break - } - } - - if s == nil { + s, ok := result[a.Labels.Hash()] + if !ok { return } - // Series found for the 'for' state. var t int64 var v float64 diff --git a/rules/manager_test.go b/rules/manager_test.go index 50ab6b861..a3bd335d1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -407,6 +407,7 @@ func TestForStateRestore(t *testing.T) { // Prometheus goes down here. We create new rules and groups. type testInput struct { + name string restoreDuration time.Duration alerts []*Alert @@ -414,105 +415,110 @@ func TestForStateRestore(t *testing.T) { noRestore bool gracePeriod bool downDuration time.Duration + before func() } tests := []testInput{ { - // Normal restore (alerts were not firing). + name: "normal restore (alerts were not firing)", restoreDuration: 15 * time.Minute, alerts: rule.ActiveAlerts(), downDuration: 10 * time.Minute, }, { - // Testing Outage Tolerance. + name: "outage tolerance", restoreDuration: 40 * time.Minute, noRestore: true, num: 2, }, { - // No active alerts. + name: "no active alerts", restoreDuration: 50 * time.Minute, alerts: []*Alert{}, }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + alerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, } - testFunc := func(tst testInput) { - newRule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, - ) - newGroup := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{newRule}, - ShouldRestore: true, - Opts: opts, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.alerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } }) - - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup - - restoreTime := baseTime.Add(tst.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - - // Checking if we have restored it correctly. - switch { - case tst.noRestore: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tst.gracePeriod: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tst.alerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) - - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } - } } - - for _, tst := range tests { - testFunc(tst) - } - - // Testing the grace period. - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - testFunc(testInput{ - restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, - gracePeriod: true, - num: 2, - }) } func TestStaleness(t *testing.T) {