diff --git a/rules/alerting.go b/rules/alerting.go index d7cf5545e..54514db9b 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -16,7 +16,6 @@ package rules import ( "fmt" "html/template" - "reflect" "sync" "time" @@ -28,12 +27,12 @@ import ( const ( // AlertMetricName is the metric name for synthetic alert timeseries. - AlertMetricName clientmodel.LabelValue = "ALERTS" + alertMetricName clientmodel.LabelValue = "ALERTS" // AlertNameLabel is the label name indicating the name of an alert. - AlertNameLabel clientmodel.LabelName = "alertname" + alertNameLabel clientmodel.LabelName = "alertname" // AlertStateLabel is the label name indicating the state of an alert. - AlertStateLabel clientmodel.LabelName = "alertstate" + alertStateLabel clientmodel.LabelName = "alertstate" ) // AlertState denotes the state of an active alert. @@ -41,11 +40,11 @@ type AlertState int func (s AlertState) String() string { switch s { - case Inactive: + case StateInactive: return "inactive" - case Pending: + case StatePending: return "pending" - case Firing: + case StateFiring: return "firing" default: panic("undefined") @@ -54,13 +53,13 @@ func (s AlertState) String() string { const ( // Inactive alerts are neither firing nor pending. - Inactive AlertState = iota + StateInactive AlertState = iota // Pending alerts have been active for less than the configured // threshold duration. - Pending + StatePending // Firing alerts have been active for longer than the configured // threshold duration. - Firing + StateFiring ) // Alert is used to track active (pending/firing) alerts over time. @@ -84,9 +83,9 @@ func (a Alert) sample(timestamp clientmodel.Timestamp, value clientmodel.SampleV recordedMetric[label] = value } - recordedMetric[clientmodel.MetricNameLabel] = AlertMetricName - recordedMetric[AlertNameLabel] = clientmodel.LabelValue(a.Name) - recordedMetric[AlertStateLabel] = clientmodel.LabelValue(a.State.String()) + recordedMetric[clientmodel.MetricNameLabel] = alertMetricName + recordedMetric[alertNameLabel] = clientmodel.LabelValue(a.Name) + recordedMetric[alertStateLabel] = clientmodel.LabelValue(a.State.String()) return &promql.Sample{ Metric: clientmodel.COWMetric{ @@ -103,16 +102,16 @@ type AlertingRule struct { // The name of the alert. name string // The vector expression from which to generate alerts. - Vector promql.Expr + vector promql.Expr // The duration for which a labelset needs to persist in the expression // output vector before an alert transitions from Pending to Firing state. holdDuration time.Duration // Extra labels to attach to the resulting alert sample vectors. - Labels clientmodel.LabelSet + labels clientmodel.LabelSet // Short alert summary, suitable for email subjects. - Summary string + summary string // More detailed alert description. - Description string + description string // Protects the below. mutex sync.Mutex @@ -121,24 +120,40 @@ type AlertingRule struct { activeAlerts map[clientmodel.Fingerprint]*Alert } +// NewAlertingRule constructs a new AlertingRule. +func NewAlertingRule( + name string, + vector promql.Expr, + holdDuration time.Duration, + labels clientmodel.LabelSet, + summary string, + description string, +) *AlertingRule { + return &AlertingRule{ + name: name, + vector: vector, + holdDuration: holdDuration, + labels: labels, + summary: summary, + description: description, + + activeAlerts: map[clientmodel.Fingerprint]*Alert{}, + } +} + // Name returns the name of the alert. func (rule *AlertingRule) Name() string { return rule.name } -// EvalRaw returns the raw value of the rule expression, without creating alerts. -func (rule *AlertingRule) EvalRaw(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { - query, err := engine.NewInstantQuery(rule.Vector.String(), timestamp) +// eval evaluates the rule expression and then creates pending alerts and fires +// or removes previously pending alerts accordingly. +func (rule *AlertingRule) eval(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { + query, err := engine.NewInstantQuery(rule.vector.String(), timestamp) if err != nil { return nil, err } - return query.Exec().Vector() -} - -// Eval evaluates the rule expression and then creates pending alerts and fires -// or removes previously pending alerts accordingly. -func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { - exprResult, err := rule.EvalRaw(timestamp, engine) + exprResult, err := query.Exec().Vector() if err != nil { return nil, err } @@ -156,14 +171,14 @@ func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.E if alert, ok := rule.activeAlerts[fp]; !ok { labels := clientmodel.LabelSet{} labels.MergeFromMetric(sample.Metric.Metric) - labels = labels.Merge(rule.Labels) + labels = labels.Merge(rule.labels) if _, ok := labels[clientmodel.MetricNameLabel]; ok { delete(labels, clientmodel.MetricNameLabel) } rule.activeAlerts[fp] = &Alert{ Name: rule.name, Labels: labels, - State: Pending, + State: StatePending, ActiveSince: timestamp, Value: sample.Value, } @@ -182,9 +197,9 @@ func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.E continue } - if activeAlert.State == Pending && timestamp.Sub(activeAlert.ActiveSince) >= rule.holdDuration { + if activeAlert.State == StatePending && timestamp.Sub(activeAlert.ActiveSince) >= rule.holdDuration { vector = append(vector, activeAlert.sample(timestamp, 0)) - activeAlert.State = Firing + activeAlert.State = StateFiring } vector = append(vector, activeAlert.sample(timestamp, 1)) @@ -193,39 +208,24 @@ func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.E return vector, nil } -// DotGraph returns the text representation of a dot graph. -func (rule *AlertingRule) DotGraph() string { - graph := fmt.Sprintf( - `digraph "Rules" { - %#p[shape="box",label="ALERT %s IF FOR %s"]; - %#p -> %x; - %s - }`, - &rule, rule.name, strutil.DurationToString(rule.holdDuration), - &rule, reflect.ValueOf(rule.Vector).Pointer(), - rule.Vector.DotGraph(), - ) - return graph -} - func (rule *AlertingRule) String() string { - return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s", rule.name, rule.Vector, strutil.DurationToString(rule.holdDuration), rule.Labels) + return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s", rule.name, rule.vector, strutil.DurationToString(rule.holdDuration), rule.labels) } // HTMLSnippet returns an HTML snippet representing this alerting rule. func (rule *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML { alertMetric := clientmodel.Metric{ - clientmodel.MetricNameLabel: AlertMetricName, - AlertNameLabel: clientmodel.LabelValue(rule.name), + clientmodel.MetricNameLabel: alertMetricName, + alertNameLabel: clientmodel.LabelValue(rule.name), } return template.HTML(fmt.Sprintf( `ALERT %s IF %s FOR %s WITH %s`, pathPrefix+strutil.GraphLinkForExpression(alertMetric.String()), rule.name, - pathPrefix+strutil.GraphLinkForExpression(rule.Vector.String()), - rule.Vector, + pathPrefix+strutil.GraphLinkForExpression(rule.vector.String()), + rule.vector, strutil.DurationToString(rule.holdDuration), - rule.Labels)) + rule.labels)) } // State returns the "maximum" state: firing > pending > inactive. @@ -233,7 +233,7 @@ func (rule *AlertingRule) State() AlertState { rule.mutex.Lock() defer rule.mutex.Unlock() - maxState := Inactive + maxState := StateInactive for _, activeAlert := range rule.activeAlerts { if activeAlert.State > maxState { maxState = activeAlert.State @@ -253,17 +253,3 @@ func (rule *AlertingRule) ActiveAlerts() []Alert { } return alerts } - -// NewAlertingRule constructs a new AlertingRule. -func NewAlertingRule(name string, vector promql.Expr, holdDuration time.Duration, labels clientmodel.LabelSet, summary string, description string) *AlertingRule { - return &AlertingRule{ - name: name, - Vector: vector, - holdDuration: holdDuration, - Labels: labels, - Summary: summary, - Description: description, - - activeAlerts: map[clientmodel.Fingerprint]*Alert{}, - } -} diff --git a/rules/manager.go b/rules/manager.go index 13b137cee..521fa16b0 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -39,8 +39,8 @@ const ( namespace = "prometheus" ruleTypeLabel = "rule_type" - alertingRuleType = "alerting" - recordingRuleType = "recording" + ruleTypeAlerting = "alerting" + ruleTypeRecording = "recording" ) var ( @@ -173,7 +173,7 @@ func (m *Manager) queueAlertNotifications(rule *AlertingRule, timestamp clientmo notifications := make(notification.NotificationReqs, 0, len(activeAlerts)) for _, aa := range activeAlerts { - if aa.State != Firing { + if aa.State != StateFiring { // BUG: In the future, make AlertManager support pending alerts? continue } @@ -205,15 +205,15 @@ func (m *Manager) queueAlertNotifications(rule *AlertingRule, timestamp clientmo } notifications = append(notifications, ¬ification.NotificationReq{ - Summary: expand(rule.Summary), - Description: expand(rule.Description), + Summary: expand(rule.summary), + Description: expand(rule.description), Labels: aa.Labels.Merge(clientmodel.LabelSet{ - AlertNameLabel: clientmodel.LabelValue(rule.Name()), + alertNameLabel: clientmodel.LabelValue(rule.Name()), }), Value: aa.Value, ActiveSince: aa.ActiveSince.Time(), RuleString: rule.String(), - GeneratorURL: m.prometheusURL + strings.TrimLeft(strutil.GraphLinkForExpression(rule.Vector.String()), "/"), + GeneratorURL: m.prometheusURL + strings.TrimLeft(strutil.GraphLinkForExpression(rule.vector.String()), "/"), }) } m.notificationHandler.SubmitReqs(notifications) @@ -235,7 +235,7 @@ func (m *Manager) runIteration() { defer wg.Done() start := time.Now() - vector, err := rule.Eval(now, m.queryEngine) + vector, err := rule.eval(now, m.queryEngine) duration := time.Since(start) if err != nil { @@ -247,11 +247,11 @@ func (m *Manager) runIteration() { switch r := rule.(type) { case *AlertingRule: m.queueAlertNotifications(r, now) - evalDuration.WithLabelValues(alertingRuleType).Observe( + evalDuration.WithLabelValues(ruleTypeAlerting).Observe( float64(duration / time.Millisecond), ) case *RecordingRule: - evalDuration.WithLabelValues(recordingRuleType).Observe( + evalDuration.WithLabelValues(ruleTypeRecording).Observe( float64(duration / time.Millisecond), ) default: @@ -319,7 +319,7 @@ func (m *Manager) loadRuleFiles(filenames ...string) error { rule := NewAlertingRule(r.Name, r.Expr, r.Duration, r.Labels, r.Summary, r.Description) m.rules = append(m.rules, rule) case *promql.RecordStmt: - rule := &RecordingRule{r.Name, r.Expr, r.Labels} + rule := NewRecordingRule(r.Name, r.Expr, r.Labels) m.rules = append(m.rules, rule) default: panic("retrieval.Manager.LoadRuleFiles: unknown statement type") diff --git a/rules/recording.go b/rules/recording.go index 95bb3d262..4624e2941 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -16,7 +16,6 @@ package rules import ( "fmt" "html/template" - "reflect" clientmodel "github.com/prometheus/client_golang/model" @@ -31,21 +30,25 @@ type RecordingRule struct { labels clientmodel.LabelSet } +// NewRecordingRule returns a new recording rule. +func NewRecordingRule(name string, vector promql.Expr, labels clientmodel.LabelSet) *RecordingRule { + return &RecordingRule{ + name: name, + vector: vector, + labels: labels, + } +} + // Name returns the rule name. func (rule RecordingRule) Name() string { return rule.name } -// EvalRaw returns the raw value of the rule expression. -func (rule RecordingRule) EvalRaw(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { +// eval evaluates the rule and then overrides the metric names and labels accordingly. +func (rule RecordingRule) eval(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { query, err := engine.NewInstantQuery(rule.vector.String(), timestamp) if err != nil { return nil, err } - return query.Exec().Vector() -} - -// Eval evaluates the rule and then overrides the metric names and labels accordingly. -func (rule RecordingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.Engine) (promql.Vector, error) { - vector, err := rule.EvalRaw(timestamp, engine) + vector, err := query.Exec().Vector() if err != nil { return nil, err } @@ -65,21 +68,6 @@ func (rule RecordingRule) Eval(timestamp clientmodel.Timestamp, engine *promql.E return vector, nil } -// DotGraph returns the text representation of a dot graph. -func (rule RecordingRule) DotGraph() string { - graph := fmt.Sprintf( - `digraph "Rules" { - %#p[shape="box",label="%s = "]; - %#p -> %x; - %s - }`, - &rule, rule.name, - &rule, reflect.ValueOf(rule.vector).Pointer(), - rule.vector.DotGraph(), - ) - return graph -} - func (rule RecordingRule) String() string { return fmt.Sprintf("%s%s = %s\n", rule.name, rule.labels, rule.vector) } diff --git a/rules/rules.go b/rules/rules.go index 2af1d5c61..5d3a9cc6d 100644 --- a/rules/rules.go +++ b/rules/rules.go @@ -26,13 +26,8 @@ import ( type Rule interface { // Name returns the name of the rule. Name() string - // EvalRaw evaluates the rule's vector expression without triggering any - // other actions, like recording or alerting. - EvalRaw(clientmodel.Timestamp, *promql.Engine) (promql.Vector, error) // Eval evaluates the rule, including any associated recording or alerting actions. - Eval(clientmodel.Timestamp, *promql.Engine) (promql.Vector, error) - // DotGraph returns a Graphviz dot graph of the rule. - DotGraph() string + eval(clientmodel.Timestamp, *promql.Engine) (promql.Vector, error) // String returns a human-readable string representation of the rule. String() string // HTMLSnippet returns a human-readable string representation of the rule, diff --git a/rules/rules_test.go b/rules/rules_test.go index 9511f9682..6869b1651 100644 --- a/rules/rules_test.go +++ b/rules/rules_test.go @@ -186,7 +186,7 @@ func TestAlertingRule(t *testing.T) { for i, expectedLines := range evalOutputs { evalTime := testStartTime.Add(testSampleInterval * time.Duration(i)) - res, err := rule.Eval(evalTime, engine) + res, err := rule.eval(evalTime, engine) if err != nil { t.Fatalf("Error during alerting rule evaluation: %s", err) } diff --git a/web/alerts.go b/web/alerts.go index 0e02965f2..1c9dfa4d4 100644 --- a/web/alerts.go +++ b/web/alerts.go @@ -63,9 +63,9 @@ func (h *AlertsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { alertStatus := AlertStatus{ AlertingRules: alertsSorter.alerts, AlertStateToRowClass: map[rules.AlertState]string{ - rules.Inactive: "success", - rules.Pending: "warning", - rules.Firing: "danger", + rules.StateInactive: "success", + rules.StatePending: "warning", + rules.StateFiring: "danger", }, } executeTemplate(w, "alerts", alertStatus, h.PathPrefix)