Fix tests and a bug with the series lookup logic.

Signed-off-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
gotjosh 2024-04-24 12:27:35 +01:00
parent e6dcbd2e26
commit 276201598c
No known key found for this signature in database
GPG key ID: A6E1DDE38FF3C74E
3 changed files with 105 additions and 100 deletions

View file

@ -710,19 +710,21 @@ func TestQueryForStateSeries(t *testing.T) {
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
)
alert := &Alert{
State: 0,
Labels: labels.EmptyLabels(),
Annotations: labels.EmptyLabels(),
Value: 0,
ActiveAt: time.Time{},
FiredAt: time.Time{},
ResolvedAt: time.Time{},
LastSentAt: time.Time{},
ValidUntil: time.Time{},
}
sample := rule.forStateSample(nil, time.Time{}, 0)
var matchersCount int
sample.Metric.Range(func(l labels.Label) {
matchersCount++
})
series, err := rule.QueryforStateSeries(context.Background(), alert, querier)
seriesSet, err := rule.QueryforStateSeries(context.Background(), querier)
var series storage.Series
for seriesSet.Next() {
if seriesSet.At().Labels().Len() == matchersCount {
series = seriesSet.At()
break
}
}
require.Equal(t, tst.expectedSeries, series)
require.Equal(t, tst.expectedError, err)

View file

@ -681,21 +681,18 @@ func (g *Group) RestoreForState(ts time.Time) {
continue
}
result := map[uint64]storage.Series{}
for sset.Next() {
result[sset.At().Labels().DropMetricName().Hash()] = sset.At()
}
alertRule.ForEachActiveAlert(func(a *Alert) {
var s storage.Series
// Find the series for the given alert from the set.
for sset.Next() {
if sset.At().Labels().Hash() == a.Labels.Hash() {
s = sset.At()
break
}
}
if s == nil {
s, ok := result[a.Labels.Hash()]
if !ok {
return
}
// Series found for the 'for' state.
var t int64
var v float64

View file

@ -407,6 +407,7 @@ func TestForStateRestore(t *testing.T) {
// Prometheus goes down here. We create new rules and groups.
type testInput struct {
name string
restoreDuration time.Duration
alerts []*Alert
@ -414,105 +415,110 @@ func TestForStateRestore(t *testing.T) {
noRestore bool
gracePeriod bool
downDuration time.Duration
before func()
}
tests := []testInput{
{
// Normal restore (alerts were not firing).
name: "normal restore (alerts were not firing)",
restoreDuration: 15 * time.Minute,
alerts: rule.ActiveAlerts(),
downDuration: 10 * time.Minute,
},
{
// Testing Outage Tolerance.
name: "outage tolerance",
restoreDuration: 40 * time.Minute,
noRestore: true,
num: 2,
},
{
// No active alerts.
name: "no active alerts",
restoreDuration: 50 * time.Minute,
alerts: []*Alert{},
},
{
name: "test the grace period",
restoreDuration: 25 * time.Minute,
alerts: []*Alert{},
gracePeriod: true,
before: func() {
for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} {
evalTime := baseTime.Add(duration)
group.Eval(context.TODO(), evalTime)
}
},
num: 2,
},
}
testFunc := func(tst testInput) {
newRule := NewAlertingRule(
"HTTPRequestRateLow",
expr,
alertForDuration,
0,
labels.FromStrings("severity", "critical"),
labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil,
)
newGroup := NewGroup(GroupOptions{
Name: "default",
Interval: time.Second,
Rules: []Rule{newRule},
ShouldRestore: true,
Opts: opts,
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.before != nil {
tt.before()
}
newRule := NewAlertingRule(
"HTTPRequestRateLow",
expr,
alertForDuration,
0,
labels.FromStrings("severity", "critical"),
labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil,
)
newGroup := NewGroup(GroupOptions{
Name: "default",
Interval: time.Second,
Rules: []Rule{newRule},
ShouldRestore: true,
Opts: opts,
})
newGroups := make(map[string]*Group)
newGroups["default;"] = newGroup
restoreTime := baseTime.Add(tt.restoreDuration)
// First eval before restoration.
newGroup.Eval(context.TODO(), restoreTime)
// Restore happens here.
newGroup.RestoreForState(restoreTime)
got := newRule.ActiveAlerts()
for _, aa := range got {
require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels)
}
sort.Slice(got, func(i, j int) bool {
return labels.Compare(got[i].Labels, got[j].Labels) < 0
})
// Checking if we have restored it correctly.
switch {
case tt.noRestore:
require.Len(t, got, tt.num)
for _, e := range got {
require.Equal(t, e.ActiveAt, restoreTime)
}
case tt.gracePeriod:
require.Len(t, got, tt.num)
for _, e := range got {
require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime))
}
default:
exp := tt.alerts
require.Equal(t, len(exp), len(got))
sortAlerts(exp)
sortAlerts(got)
for i, e := range exp {
require.Equal(t, e.Labels, got[i].Labels)
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix())
require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong")
}
}
})
newGroups := make(map[string]*Group)
newGroups["default;"] = newGroup
restoreTime := baseTime.Add(tst.restoreDuration)
// First eval before restoration.
newGroup.Eval(context.TODO(), restoreTime)
// Restore happens here.
newGroup.RestoreForState(restoreTime)
got := newRule.ActiveAlerts()
for _, aa := range got {
require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels)
}
sort.Slice(got, func(i, j int) bool {
return labels.Compare(got[i].Labels, got[j].Labels) < 0
})
// Checking if we have restored it correctly.
switch {
case tst.noRestore:
require.Len(t, got, tst.num)
for _, e := range got {
require.Equal(t, e.ActiveAt, restoreTime)
}
case tst.gracePeriod:
require.Len(t, got, tst.num)
for _, e := range got {
require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime))
}
default:
exp := tst.alerts
require.Equal(t, len(exp), len(got))
sortAlerts(exp)
sortAlerts(got)
for i, e := range exp {
require.Equal(t, e.Labels, got[i].Labels)
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix())
require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong")
}
}
}
for _, tst := range tests {
testFunc(tst)
}
// Testing the grace period.
for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} {
evalTime := baseTime.Add(duration)
group.Eval(context.TODO(), evalTime)
}
testFunc(testInput{
restoreDuration: 25 * time.Minute,
alerts: []*Alert{},
gracePeriod: true,
num: 2,
})
}
func TestStaleness(t *testing.T) {