rules: cleanup alerting test

2025-03-05 20:59:13 -08:00 · 2015-06-30 11:51:05 +02:00 · 2015-06-30 11:51:05 +02:00 · f06cf664e1
parent 9bd4f6d017
commit f06cf664e1
2 changed files with 161 additions and 189 deletions
--- a/rules/manager.go
+++ b/rules/manager.go
@ -21,6 +21,8 @@ import (
 	"sync"
 	"time"
 	html_template "html/template"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/log"
@ -84,7 +86,7 @@ type Rule interface {
 	String() string
 	// HTMLSnippet returns a human-readable string representation of the rule,
 	// decorated with HTML elements for use the web frontend.
-	HTMLSnippet(pathPrefix string) template.HTML
+	HTMLSnippet(pathPrefix string) html_template.HTML
 }
 // The Manager manages recording and alerting rules.
@ -285,14 +287,9 @@ func (m *Manager) runIteration() {
 	wg.Wait()
 }
-// ApplyConfig updates the rule manager's state as the config requires. If
+// transferAlertState makes a copy of the state of alerting rules and returns a function
-// loading the new rules failed the old rule set is restored. Returns true on success.
+// that restores them in the current state.
-func (m *Manager) ApplyConfig(conf *config.Config) bool {
+func (m *Manager) transferAlertState() func() {
 	m.Lock()
 	defer m.Unlock()
 	success := true
 	m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval)
 	alertingRules := map[string]*AlertingRule{}
 	for _, r := range m.rules {
@ -301,6 +298,31 @@ func (m *Manager) ApplyConfig(conf *config.Config) bool {
 		}
 	}
 	return func() {
 		// Restore alerting rule state.
 		for _, r := range m.rules {
 			ar, ok := r.(*AlertingRule)
 			if !ok {
 				continue
 			}
 			if old, ok := alertingRules[ar.name]; ok {
 				ar.activeAlerts = old.activeAlerts
 			}
 		}
 	}
 }
 // ApplyConfig updates the rule manager's state as the config requires. If
 // loading the new rules failed the old rule set is restored. Returns true on success.
 func (m *Manager) ApplyConfig(conf *config.Config) bool {
 	m.Lock()
 	defer m.Unlock()
 	defer m.transferAlertState()()
 	success := true
 	m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval)
 	rulesSnapshot := make([]Rule, len(m.rules))
 	copy(rulesSnapshot, m.rules)
 	m.rules = m.rules[:0]
@ -321,16 +343,6 @@ func (m *Manager) ApplyConfig(conf *config.Config) bool {
 		log.Errorf("Error loading rules, previous rule set restored: %s", err)
 		success = false
 	}
 	// Restore alerting rule state.
 	for _, r := range m.rules {
 		ar, ok := r.(*AlertingRule)
 		if !ok {
 			continue
 		}
 		if old, ok := alertingRules[ar.name]; ok {
 			ar.activeAlerts = old.activeAlerts
 		}
 	}
 	return success
 }
--- a/rules/manager_test.go
+++ b/rules/manager_test.go
@ -15,6 +15,7 @@ package rules
 import (
 	"fmt"
 	"reflect"
 	"strings"
 	"testing"
 	"time"
@ -22,67 +23,112 @@ import (
 	clientmodel "github.com/prometheus/client_golang/model"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/storage/local"
 	"github.com/prometheus/prometheus/storage/metric"
 )
-var (
+func TestAlertingRule(t *testing.T) {
-	testSampleInterval = time.Duration(5) * time.Minute
+	suite, err := promql.NewTest(t, `
-	testStartTime      = clientmodel.Timestamp(0)
+		load 5m
-)
+			http_requests{job="api-server", instance="0", group="production"}	0+10x10
 			http_requests{job="api-server", instance="1", group="production"}	0+20x10
 			http_requests{job="api-server", instance="0", group="canary"}		0+30x10
 			http_requests{job="api-server", instance="1", group="canary"}		0+40x10
 			http_requests{job="app-server", instance="0", group="production"}	0+50x10
 			http_requests{job="app-server", instance="1", group="production"}	0+60x10
 			http_requests{job="app-server", instance="0", group="canary"}		0+70x10
 			http_requests{job="app-server", instance="1", group="canary"}		0+80x10
 	`)
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer suite.Close()
-func getTestValueStream(startVal clientmodel.SampleValue, endVal clientmodel.SampleValue, stepVal clientmodel.SampleValue, startTime clientmodel.Timestamp) (resultValues metric.Values) {
+	if err := suite.Run(); err != nil {
-	currentTime := startTime
+		t.Fatal(err)
 	for currentVal := startVal; currentVal <= endVal; currentVal += stepVal {
 		sample := metric.SamplePair{
 			Value:     currentVal,
 			Timestamp: currentTime,
 	}
 		resultValues = append(resultValues, sample)
 		currentTime = currentTime.Add(testSampleInterval)
 	}
 	return resultValues
 }
-func getTestVectorFromTestMatrix(matrix promql.Matrix) promql.Vector {
+	expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`)
-	vector := promql.Vector{}
+	if err != nil {
-	for _, sampleStream := range matrix {
+		t.Fatalf("Unable to parse alert expression: %s", err)
 		lastSample := sampleStream.Values[len(sampleStream.Values)-1]
 		vector = append(vector, &promql.Sample{
 			Metric:    sampleStream.Metric,
 			Value:     lastSample.Value,
 			Timestamp: lastSample.Timestamp,
 		})
 	}
 	return vector
 }
-func storeMatrix(storage local.Storage, matrix promql.Matrix) {
+	rule := NewAlertingRule(
-	pendingSamples := clientmodel.Samples{}
+		"HTTPRequestRateLow",
-	for _, sampleStream := range matrix {
+		expr,
-		for _, sample := range sampleStream.Values {
+		time.Minute,
-			pendingSamples = append(pendingSamples, &clientmodel.Sample{
+		clientmodel.LabelSet{"severity": "critical"},
-				Metric:    sampleStream.Metric.Metric,
+		"summary", "description", "runbook",
-				Value:     sample.Value,
+	)
 				Timestamp: sample.Timestamp,
 			})
 		}
 	}
 	for _, s := range pendingSamples {
 		storage.Append(s)
 	}
 	storage.WaitForIndexing()
 }
-func vectorComparisonString(expected []string, actual []string) string {
+	var tests = []struct {
-	separator := "\n--------------\n"
+		time   time.Duration
-	return fmt.Sprintf("Expected:%v%v%v\nActual:%v%v%v ",
+		result []string
-		separator,
+	}{
-		strings.Join(expected, "\n"),
+		{
-		separator,
+			time: 0,
-		separator,
+			result: []string{
-		strings.Join(actual, "\n"),
+				`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
-		separator)
+				`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
 			},
 		}, {
 			time: 5 * time.Minute,
 			result: []string{
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
 			},
 		}, {
 			time: 10 * time.Minute,
 			result: []string{
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
 				`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
 			},
 		},
 		{
 			time:   15 * time.Minute,
 			result: nil,
 		},
 		{
 			time:   20 * time.Minute,
 			result: nil,
 		},
 	}
 	for i, test := range tests {
 		evalTime := clientmodel.Timestamp(0).Add(test.time)
 		res, err := rule.eval(evalTime, suite.QueryEngine())
 		if err != nil {
 			t.Fatalf("Error during alerting rule evaluation: %s", err)
 		}
 		actual := strings.Split(res.String(), "\n")
 		expected := annotateWithTime(test.result, evalTime)
 		if actual[0] == "" {
 			actual = []string{}
 		}
 		if len(actual) != len(expected) {
 			t.Errorf("%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(expected), len(actual))
 		}
 		for j, expectedSample := range expected {
 			found := false
 			for _, actualSample := range actual {
 				if actualSample == expectedSample {
 					found = true
 				}
 			}
 			if !found {
 				t.Errorf("%d.%d. Couldn't find expected sample in output: '%v'", i, j, expectedSample)
 			}
 		}
 		if t.Failed() {
 			t.Errorf("%d. Expected and actual outputs don't match:", i)
 			t.Fatalf("Expected:\n%v\n----\nActual:\n%v", strings.Join(expected, "\n"), strings.Join(actual, "\n"))
 		}
 	}
 }
 func annotateWithTime(lines []string, timestamp clientmodel.Timestamp) []string {
@ -93,131 +139,45 @@ func annotateWithTime(lines []string, timestamp clientmodel.Timestamp) []string
 	return annotatedLines
 }
-var testMatrix = promql.Matrix{
+func TestTransferAlertState(t *testing.T) {
-	{
+	m := NewManager(&ManagerOptions{})
 		Metric: clientmodel.COWMetric{
 			Metric: clientmodel.Metric{
 				clientmodel.MetricNameLabel: "http_requests",
 				clientmodel.JobLabel:        "api-server",
 				"instance":                  "0",
 				"group":                     "canary",
 			},
 		},
 		Values: getTestValueStream(0, 300, 30, testStartTime),
 	},
 	{
 		Metric: clientmodel.COWMetric{
 			Metric: clientmodel.Metric{
 				clientmodel.MetricNameLabel: "http_requests",
 				clientmodel.JobLabel:        "api-server",
 				"instance":                  "1",
 				"group":                     "canary",
 			},
 		},
 		Values: getTestValueStream(0, 400, 40, testStartTime),
 	},
 	{
 		Metric: clientmodel.COWMetric{
 			Metric: clientmodel.Metric{
 				clientmodel.MetricNameLabel: "http_requests",
 				clientmodel.JobLabel:        "app-server",
 				"instance":                  "0",
 				"group":                     "canary",
 			},
 		},
 		Values: getTestValueStream(0, 700, 70, testStartTime),
 	},
 	{
 		Metric: clientmodel.COWMetric{
 			Metric: clientmodel.Metric{
 				clientmodel.MetricNameLabel: "http_requests",
 				clientmodel.JobLabel:        "app-server",
 				"instance":                  "1",
 				"group":                     "canary",
 			},
 		},
 		Values: getTestValueStream(0, 800, 80, testStartTime),
 	},
 }
-func TestAlertingRule(t *testing.T) {
+	alert := &Alert{
-	// Labels in expected output need to be alphabetically sorted.
+		Name:  "testalert",
-	var evalOutputs = [][]string{
+		State: StateFiring,
 		{
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
 		},
 		{
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
 		},
 		{
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
 			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
 		},
 		{
 		/* empty */
 		},
 		{
 		/* empty */
 		},
 	}
-	storage, closer := local.NewTestStorage(t, 1)
+	arule := AlertingRule{
-	defer closer.Close()
+		name:         "test",
 		activeAlerts: map[clientmodel.Fingerprint]*Alert{},
 	}
 	aruleCopy := arule
-	storeMatrix(storage, testMatrix)
+	m.rules = append(m.rules, &arule)
-	engine := promql.NewEngine(storage, nil)
+	// Set an alert.
-	defer engine.Stop()
+	arule.activeAlerts[0] = alert
-	expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`)
+	// Save state and get the restore function.
-	if err != nil {
+	restore := m.transferAlertState()
-		t.Fatalf("Unable to parse alert expression: %s", err)
+
 	// Remove arule from the rule list and add an unrelated rule and the
 	// stateless copy of arule.
 	m.rules = []Rule{
 		&AlertingRule{
 			name:         "test_other",
 			activeAlerts: map[clientmodel.Fingerprint]*Alert{},
 		},
 		&aruleCopy,
 	}
-	alertLabels := clientmodel.LabelSet{
+	// Apply the restore function.
-		"severity": "critical",
+	restore()
 	}
 	rule := NewAlertingRule("HttpRequestRateLow", expr, time.Minute, alertLabels, "summary", "description", "runbook")
-	for i, expectedLines := range evalOutputs {
+	if ar := m.rules[0].(*AlertingRule); len(ar.activeAlerts) != 0 {
-		evalTime := testStartTime.Add(testSampleInterval * time.Duration(i))
+		t.Fatalf("unexpected alert for unrelated alerting rule")
 		res, err := rule.eval(evalTime, engine)
 		if err != nil {
 			t.Fatalf("Error during alerting rule evaluation: %s", err)
 		}
 		actualLines := strings.Split(res.String(), "\n")
 		expectedLines := annotateWithTime(expectedLines, evalTime)
 		if actualLines[0] == "" {
 			actualLines = []string{}
 		}
 		failed := false
 		if len(actualLines) != len(expectedLines) {
 			t.Errorf("%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(expectedLines), len(actualLines))
 			failed = true
 		}
 		for j, expectedSample := range expectedLines {
 			found := false
 			for _, actualSample := range actualLines {
 				if actualSample == expectedSample {
 					found = true
 				}
 			}
 			if !found {
 				t.Errorf("%d.%d. Couldn't find expected sample in output: '%v'", i, j, expectedSample)
 				failed = true
 			}
 		}
 		if failed {
 			t.Fatalf("%d. Expected and actual outputs don't match:\n%v", i, vectorComparisonString(expectedLines, actualLines))
 	}
 	if ar := m.rules[1].(*AlertingRule); !reflect.DeepEqual(ar.activeAlerts[0], alert) {
 		t.Fatalf("alert state was not restored")
 	}
 }