From 0509b0f2db5d6afac2fab110755ceeb15e367982 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Tue, 12 Jul 2016 17:11:31 +0100 Subject: [PATCH] Expand alert templates at eval time. Fixes #1678 #1677 --- rules/alerting.go | 78 ++++++++++++++++++++++++++++++++--------- rules/manager.go | 54 +++------------------------- rules/manager_test.go | 8 ++--- rules/recording.go | 2 +- rules/recording_test.go | 2 +- 5 files changed, 72 insertions(+), 72 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index bb8dade09..2930ec937 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -15,13 +15,16 @@ package rules import ( "fmt" - "html/template" "sync" "time" + html_template "html/template" + + "github.com/prometheus/common/log" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/template" "github.com/prometheus/prometheus/util/strutil" ) @@ -63,8 +66,9 @@ func (s AlertState) String() string { // Alert is the user-level representation of a single instance of an alerting rule. type Alert struct { - State AlertState - Labels model.LabelSet + State AlertState + Labels model.LabelSet + Annotations model.LabelSet // The value at the last evaluation of the alerting expression. Value model.SampleValue // The interval during which the condition of this alert held true. @@ -142,7 +146,7 @@ const resolvedRetention = 15 * time.Minute // eval evaluates the rule expression and then creates pending alerts and fires // or removes previously pending alerts accordingly. -func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, error) { +func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine, externalURLPath string) (model.Vector, error) { query, err := engine.NewInstantQuery(r.vector.String(), ts) if err != nil { return nil, err @@ -160,6 +164,53 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, resultFPs := map[model.Fingerprint]struct{}{} for _, smpl := range res { + // Provide the alert information to the template. + l := make(map[string]string, len(smpl.Metric)) + for k, v := range smpl.Metric { + l[string(k)] = string(v) + } + + tmplData := struct { + Labels map[string]string + Value float64 + }{ + Labels: l, + Value: float64(smpl.Value), + } + // Inject some convenience variables that are easier to remember for users + // who are not used to Go's templating system. + defs := "{{$labels := .Labels}}{{$value := .Value}}" + + expand := func(text model.LabelValue) model.LabelValue { + tmpl := template.NewTemplateExpander( + defs+string(text), + "__alert_"+r.Name(), + tmplData, + ts, + engine, + externalURLPath, + ) + result, err := tmpl.Expand() + if err != nil { + result = fmt.Sprintf("", err) + log.Warnf("Error expanding alert template %v with data '%v': %s", r.Name(), tmplData, err) + } + return model.LabelValue(result) + } + + labels := make(model.LabelSet, len(smpl.Metric)+len(r.labels)+1) + for ln, lv := range smpl.Metric { + labels[ln] = lv + } + for ln, lv := range r.labels { + labels[ln] = expand(lv) + } + labels[model.AlertNameLabel] = model.LabelValue(r.Name()) + + annotations := make(model.LabelSet, len(r.annotations)) + for an, av := range r.annotations { + annotations[an] = expand(av) + } fp := smpl.Metric.Fingerprint() resultFPs[fp] = struct{}{} @@ -171,10 +222,11 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, delete(smpl.Metric, model.MetricNameLabel) r.active[fp] = &Alert{ - Labels: model.LabelSet(smpl.Metric), - ActiveAt: ts, - State: StatePending, - Value: smpl.Value, + Labels: labels, + Annotations: annotations, + ActiveAt: ts, + State: StatePending, + Value: smpl.Value, } } @@ -243,13 +295,7 @@ func (r *AlertingRule) currentAlerts() []*Alert { alerts := make([]*Alert, 0, len(r.active)) for _, a := range r.active { - labels := r.labels.Clone() - for ln, lv := range a.Labels { - labels[ln] = lv - } anew := *a - anew.Labels = labels - alerts = append(alerts, &anew) } return alerts @@ -273,7 +319,7 @@ func (r *AlertingRule) String() string { // HTMLSnippet returns an HTML snippet representing this alerting rule. The // resulting snippet is expected to be presented in a
 element, so that
 // line breaks and other returned whitespace is respected.
-func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
+func (r *AlertingRule) HTMLSnippet(pathPrefix string) html_template.HTML {
 	alertMetric := model.Metric{
 		model.MetricNameLabel: alertMetricName,
 		alertNameLabel:        model.LabelValue(r.name),
@@ -289,5 +335,5 @@ func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
 	if len(r.annotations) > 0 {
 		s += fmt.Sprintf("\n  ANNOTATIONS %s", r.annotations)
 	}
-	return template.HTML(s)
+	return html_template.HTML(s)
 }
diff --git a/rules/manager.go b/rules/manager.go
index 025c53d7e..75977dd1e 100644
--- a/rules/manager.go
+++ b/rules/manager.go
@@ -32,7 +32,6 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/storage/local"
-	"github.com/prometheus/prometheus/template"
 	"github.com/prometheus/prometheus/util/strutil"
 )
 
@@ -106,7 +105,7 @@ const (
 type Rule interface {
 	Name() string
 	// eval evaluates the rule, including any associated recording or alerting actions.
-	eval(model.Time, *promql.Engine) (model.Vector, error)
+	eval(model.Time, *promql.Engine, string) (model.Vector, error)
 	// String returns a human-readable string representation of the rule.
 	String() string
 	// HTMLSnippet returns a human-readable string representation of the rule,
@@ -257,7 +256,7 @@ func (g *Group) eval() {
 
 			evalTotal.WithLabelValues(rtyp).Inc()
 
-			vector, err := rule.eval(now, g.opts.QueryEngine)
+			vector, err := rule.eval(now, g.opts.QueryEngine, g.opts.ExternalURL.Path)
 			if err != nil {
 				// Canceled queries are intentional termination of queries. This normally
 				// happens on shutdown and thus we skip logging of any errors here.
@@ -310,55 +309,10 @@ func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
 			continue
 		}
 
-		// Provide the alert information to the template.
-		l := make(map[string]string, len(alert.Labels))
-		for k, v := range alert.Labels {
-			l[string(k)] = string(v)
-		}
-
-		tmplData := struct {
-			Labels map[string]string
-			Value  float64
-		}{
-			Labels: l,
-			Value:  float64(alert.Value),
-		}
-		// Inject some convenience variables that are easier to remember for users
-		// who are not used to Go's templating system.
-		defs := "{{$labels := .Labels}}{{$value := .Value}}"
-
-		expand := func(text model.LabelValue) model.LabelValue {
-			tmpl := template.NewTemplateExpander(
-				defs+string(text),
-				"__alert_"+rule.Name(),
-				tmplData,
-				timestamp,
-				g.opts.QueryEngine,
-				g.opts.ExternalURL.Path,
-			)
-			result, err := tmpl.Expand()
-			if err != nil {
-				result = fmt.Sprintf("", err)
-				log.Warnf("Error expanding alert template %v with data '%v': %s", rule.Name(), tmplData, err)
-			}
-			return model.LabelValue(result)
-		}
-
-		labels := make(model.LabelSet, len(alert.Labels)+1)
-		for ln, lv := range alert.Labels {
-			labels[ln] = expand(lv)
-		}
-		labels[model.AlertNameLabel] = model.LabelValue(rule.Name())
-
-		annotations := make(model.LabelSet, len(rule.annotations))
-		for an, av := range rule.annotations {
-			annotations[an] = expand(av)
-		}
-
 		a := &model.Alert{
 			StartsAt:     alert.ActiveAt.Add(rule.holdDuration).Time(),
-			Labels:       labels,
-			Annotations:  annotations,
+			Labels:       alert.Labels,
+			Annotations:  alert.Annotations,
 			GeneratorURL: g.opts.ExternalURL.String() + strutil.GraphLinkForExpression(rule.vector.String()),
 		}
 		if alert.ResolvedAt != 0 {
diff --git a/rules/manager_test.go b/rules/manager_test.go
index 40e57203a..9c18ff53e 100644
--- a/rules/manager_test.go
+++ b/rules/manager_test.go
@@ -27,8 +27,8 @@ import (
 func TestAlertingRule(t *testing.T) {
 	suite, err := promql.NewTest(t, `
 		load 5m
-			http_requests{job="app-server", instance="0", group="canary"}	75 85  95 105 105  95  85
-			http_requests{job="app-server", instance="1", group="canary"}	80 90 100 110 120 130 140
+			http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"}	75 85  95 105 105  95  85
+			http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"}	80 90 100 110 120 130 140
 	`)
 	if err != nil {
 		t.Fatal(err)
@@ -48,7 +48,7 @@ func TestAlertingRule(t *testing.T) {
 		"HTTPRequestRateLow",
 		expr,
 		time.Minute,
-		model.LabelSet{"severity": "critical"},
+		model.LabelSet{"severity": "{{\"c\"}}ritical"},
 		model.LabelSet{},
 	)
 
@@ -105,7 +105,7 @@ func TestAlertingRule(t *testing.T) {
 	for i, test := range tests {
 		evalTime := model.Time(0).Add(test.time)
 
-		res, err := rule.eval(evalTime, suite.QueryEngine())
+		res, err := rule.eval(evalTime, suite.QueryEngine(), "")
 		if err != nil {
 			t.Fatalf("Error during alerting rule evaluation: %s", err)
 		}
diff --git a/rules/recording.go b/rules/recording.go
index 698a08323..258faf4b1 100644
--- a/rules/recording.go
+++ b/rules/recording.go
@@ -45,7 +45,7 @@ func (rule RecordingRule) Name() string {
 }
 
 // eval evaluates the rule and then overrides the metric names and labels accordingly.
-func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine) (model.Vector, error) {
+func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine, _ string) (model.Vector, error) {
 	query, err := engine.NewInstantQuery(rule.vector.String(), timestamp)
 	if err != nil {
 		return nil, err
diff --git a/rules/recording_test.go b/rules/recording_test.go
index 310df2fe4..7dd32392c 100644
--- a/rules/recording_test.go
+++ b/rules/recording_test.go
@@ -59,7 +59,7 @@ func TestRuleEval(t *testing.T) {
 
 	for _, test := range suite {
 		rule := NewRecordingRule(test.name, test.expr, test.labels)
-		result, err := rule.eval(now, engine)
+		result, err := rule.eval(now, engine, "")
 		if err != nil {
 			t.Fatalf("Error evaluating %s", test.name)
 		}