mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-24 21:24:05 -08:00
parent
f8bb0ee91f
commit
0509b0f2db
|
@ -15,13 +15,16 @@ package rules
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"html/template"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
html_template "html/template"
|
||||||
|
|
||||||
|
"github.com/prometheus/common/log"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/promql"
|
"github.com/prometheus/prometheus/promql"
|
||||||
|
"github.com/prometheus/prometheus/template"
|
||||||
"github.com/prometheus/prometheus/util/strutil"
|
"github.com/prometheus/prometheus/util/strutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -63,8 +66,9 @@ func (s AlertState) String() string {
|
||||||
|
|
||||||
// Alert is the user-level representation of a single instance of an alerting rule.
|
// Alert is the user-level representation of a single instance of an alerting rule.
|
||||||
type Alert struct {
|
type Alert struct {
|
||||||
State AlertState
|
State AlertState
|
||||||
Labels model.LabelSet
|
Labels model.LabelSet
|
||||||
|
Annotations model.LabelSet
|
||||||
// The value at the last evaluation of the alerting expression.
|
// The value at the last evaluation of the alerting expression.
|
||||||
Value model.SampleValue
|
Value model.SampleValue
|
||||||
// The interval during which the condition of this alert held true.
|
// The interval during which the condition of this alert held true.
|
||||||
|
@ -142,7 +146,7 @@ const resolvedRetention = 15 * time.Minute
|
||||||
|
|
||||||
// eval evaluates the rule expression and then creates pending alerts and fires
|
// eval evaluates the rule expression and then creates pending alerts and fires
|
||||||
// or removes previously pending alerts accordingly.
|
// or removes previously pending alerts accordingly.
|
||||||
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, error) {
|
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine, externalURLPath string) (model.Vector, error) {
|
||||||
query, err := engine.NewInstantQuery(r.vector.String(), ts)
|
query, err := engine.NewInstantQuery(r.vector.String(), ts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -160,6 +164,53 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
||||||
resultFPs := map[model.Fingerprint]struct{}{}
|
resultFPs := map[model.Fingerprint]struct{}{}
|
||||||
|
|
||||||
for _, smpl := range res {
|
for _, smpl := range res {
|
||||||
|
// Provide the alert information to the template.
|
||||||
|
l := make(map[string]string, len(smpl.Metric))
|
||||||
|
for k, v := range smpl.Metric {
|
||||||
|
l[string(k)] = string(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmplData := struct {
|
||||||
|
Labels map[string]string
|
||||||
|
Value float64
|
||||||
|
}{
|
||||||
|
Labels: l,
|
||||||
|
Value: float64(smpl.Value),
|
||||||
|
}
|
||||||
|
// Inject some convenience variables that are easier to remember for users
|
||||||
|
// who are not used to Go's templating system.
|
||||||
|
defs := "{{$labels := .Labels}}{{$value := .Value}}"
|
||||||
|
|
||||||
|
expand := func(text model.LabelValue) model.LabelValue {
|
||||||
|
tmpl := template.NewTemplateExpander(
|
||||||
|
defs+string(text),
|
||||||
|
"__alert_"+r.Name(),
|
||||||
|
tmplData,
|
||||||
|
ts,
|
||||||
|
engine,
|
||||||
|
externalURLPath,
|
||||||
|
)
|
||||||
|
result, err := tmpl.Expand()
|
||||||
|
if err != nil {
|
||||||
|
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||||
|
log.Warnf("Error expanding alert template %v with data '%v': %s", r.Name(), tmplData, err)
|
||||||
|
}
|
||||||
|
return model.LabelValue(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
labels := make(model.LabelSet, len(smpl.Metric)+len(r.labels)+1)
|
||||||
|
for ln, lv := range smpl.Metric {
|
||||||
|
labels[ln] = lv
|
||||||
|
}
|
||||||
|
for ln, lv := range r.labels {
|
||||||
|
labels[ln] = expand(lv)
|
||||||
|
}
|
||||||
|
labels[model.AlertNameLabel] = model.LabelValue(r.Name())
|
||||||
|
|
||||||
|
annotations := make(model.LabelSet, len(r.annotations))
|
||||||
|
for an, av := range r.annotations {
|
||||||
|
annotations[an] = expand(av)
|
||||||
|
}
|
||||||
fp := smpl.Metric.Fingerprint()
|
fp := smpl.Metric.Fingerprint()
|
||||||
resultFPs[fp] = struct{}{}
|
resultFPs[fp] = struct{}{}
|
||||||
|
|
||||||
|
@ -171,10 +222,11 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
||||||
delete(smpl.Metric, model.MetricNameLabel)
|
delete(smpl.Metric, model.MetricNameLabel)
|
||||||
|
|
||||||
r.active[fp] = &Alert{
|
r.active[fp] = &Alert{
|
||||||
Labels: model.LabelSet(smpl.Metric),
|
Labels: labels,
|
||||||
ActiveAt: ts,
|
Annotations: annotations,
|
||||||
State: StatePending,
|
ActiveAt: ts,
|
||||||
Value: smpl.Value,
|
State: StatePending,
|
||||||
|
Value: smpl.Value,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,13 +295,7 @@ func (r *AlertingRule) currentAlerts() []*Alert {
|
||||||
alerts := make([]*Alert, 0, len(r.active))
|
alerts := make([]*Alert, 0, len(r.active))
|
||||||
|
|
||||||
for _, a := range r.active {
|
for _, a := range r.active {
|
||||||
labels := r.labels.Clone()
|
|
||||||
for ln, lv := range a.Labels {
|
|
||||||
labels[ln] = lv
|
|
||||||
}
|
|
||||||
anew := *a
|
anew := *a
|
||||||
anew.Labels = labels
|
|
||||||
|
|
||||||
alerts = append(alerts, &anew)
|
alerts = append(alerts, &anew)
|
||||||
}
|
}
|
||||||
return alerts
|
return alerts
|
||||||
|
@ -273,7 +319,7 @@ func (r *AlertingRule) String() string {
|
||||||
// HTMLSnippet returns an HTML snippet representing this alerting rule. The
|
// HTMLSnippet returns an HTML snippet representing this alerting rule. The
|
||||||
// resulting snippet is expected to be presented in a <pre> element, so that
|
// resulting snippet is expected to be presented in a <pre> element, so that
|
||||||
// line breaks and other returned whitespace is respected.
|
// line breaks and other returned whitespace is respected.
|
||||||
func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
|
func (r *AlertingRule) HTMLSnippet(pathPrefix string) html_template.HTML {
|
||||||
alertMetric := model.Metric{
|
alertMetric := model.Metric{
|
||||||
model.MetricNameLabel: alertMetricName,
|
model.MetricNameLabel: alertMetricName,
|
||||||
alertNameLabel: model.LabelValue(r.name),
|
alertNameLabel: model.LabelValue(r.name),
|
||||||
|
@ -289,5 +335,5 @@ func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
|
||||||
if len(r.annotations) > 0 {
|
if len(r.annotations) > 0 {
|
||||||
s += fmt.Sprintf("\n ANNOTATIONS %s", r.annotations)
|
s += fmt.Sprintf("\n ANNOTATIONS %s", r.annotations)
|
||||||
}
|
}
|
||||||
return template.HTML(s)
|
return html_template.HTML(s)
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,6 @@ import (
|
||||||
"github.com/prometheus/prometheus/promql"
|
"github.com/prometheus/prometheus/promql"
|
||||||
"github.com/prometheus/prometheus/storage"
|
"github.com/prometheus/prometheus/storage"
|
||||||
"github.com/prometheus/prometheus/storage/local"
|
"github.com/prometheus/prometheus/storage/local"
|
||||||
"github.com/prometheus/prometheus/template"
|
|
||||||
"github.com/prometheus/prometheus/util/strutil"
|
"github.com/prometheus/prometheus/util/strutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -106,7 +105,7 @@ const (
|
||||||
type Rule interface {
|
type Rule interface {
|
||||||
Name() string
|
Name() string
|
||||||
// eval evaluates the rule, including any associated recording or alerting actions.
|
// eval evaluates the rule, including any associated recording or alerting actions.
|
||||||
eval(model.Time, *promql.Engine) (model.Vector, error)
|
eval(model.Time, *promql.Engine, string) (model.Vector, error)
|
||||||
// String returns a human-readable string representation of the rule.
|
// String returns a human-readable string representation of the rule.
|
||||||
String() string
|
String() string
|
||||||
// HTMLSnippet returns a human-readable string representation of the rule,
|
// HTMLSnippet returns a human-readable string representation of the rule,
|
||||||
|
@ -257,7 +256,7 @@ func (g *Group) eval() {
|
||||||
|
|
||||||
evalTotal.WithLabelValues(rtyp).Inc()
|
evalTotal.WithLabelValues(rtyp).Inc()
|
||||||
|
|
||||||
vector, err := rule.eval(now, g.opts.QueryEngine)
|
vector, err := rule.eval(now, g.opts.QueryEngine, g.opts.ExternalURL.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Canceled queries are intentional termination of queries. This normally
|
// Canceled queries are intentional termination of queries. This normally
|
||||||
// happens on shutdown and thus we skip logging of any errors here.
|
// happens on shutdown and thus we skip logging of any errors here.
|
||||||
|
@ -310,55 +309,10 @@ func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Provide the alert information to the template.
|
|
||||||
l := make(map[string]string, len(alert.Labels))
|
|
||||||
for k, v := range alert.Labels {
|
|
||||||
l[string(k)] = string(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
tmplData := struct {
|
|
||||||
Labels map[string]string
|
|
||||||
Value float64
|
|
||||||
}{
|
|
||||||
Labels: l,
|
|
||||||
Value: float64(alert.Value),
|
|
||||||
}
|
|
||||||
// Inject some convenience variables that are easier to remember for users
|
|
||||||
// who are not used to Go's templating system.
|
|
||||||
defs := "{{$labels := .Labels}}{{$value := .Value}}"
|
|
||||||
|
|
||||||
expand := func(text model.LabelValue) model.LabelValue {
|
|
||||||
tmpl := template.NewTemplateExpander(
|
|
||||||
defs+string(text),
|
|
||||||
"__alert_"+rule.Name(),
|
|
||||||
tmplData,
|
|
||||||
timestamp,
|
|
||||||
g.opts.QueryEngine,
|
|
||||||
g.opts.ExternalURL.Path,
|
|
||||||
)
|
|
||||||
result, err := tmpl.Expand()
|
|
||||||
if err != nil {
|
|
||||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
|
||||||
log.Warnf("Error expanding alert template %v with data '%v': %s", rule.Name(), tmplData, err)
|
|
||||||
}
|
|
||||||
return model.LabelValue(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
labels := make(model.LabelSet, len(alert.Labels)+1)
|
|
||||||
for ln, lv := range alert.Labels {
|
|
||||||
labels[ln] = expand(lv)
|
|
||||||
}
|
|
||||||
labels[model.AlertNameLabel] = model.LabelValue(rule.Name())
|
|
||||||
|
|
||||||
annotations := make(model.LabelSet, len(rule.annotations))
|
|
||||||
for an, av := range rule.annotations {
|
|
||||||
annotations[an] = expand(av)
|
|
||||||
}
|
|
||||||
|
|
||||||
a := &model.Alert{
|
a := &model.Alert{
|
||||||
StartsAt: alert.ActiveAt.Add(rule.holdDuration).Time(),
|
StartsAt: alert.ActiveAt.Add(rule.holdDuration).Time(),
|
||||||
Labels: labels,
|
Labels: alert.Labels,
|
||||||
Annotations: annotations,
|
Annotations: alert.Annotations,
|
||||||
GeneratorURL: g.opts.ExternalURL.String() + strutil.GraphLinkForExpression(rule.vector.String()),
|
GeneratorURL: g.opts.ExternalURL.String() + strutil.GraphLinkForExpression(rule.vector.String()),
|
||||||
}
|
}
|
||||||
if alert.ResolvedAt != 0 {
|
if alert.ResolvedAt != 0 {
|
||||||
|
|
|
@ -27,8 +27,8 @@ import (
|
||||||
func TestAlertingRule(t *testing.T) {
|
func TestAlertingRule(t *testing.T) {
|
||||||
suite, err := promql.NewTest(t, `
|
suite, err := promql.NewTest(t, `
|
||||||
load 5m
|
load 5m
|
||||||
http_requests{job="app-server", instance="0", group="canary"} 75 85 95 105 105 95 85
|
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85
|
||||||
http_requests{job="app-server", instance="1", group="canary"} 80 90 100 110 120 130 140
|
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140
|
||||||
`)
|
`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -48,7 +48,7 @@ func TestAlertingRule(t *testing.T) {
|
||||||
"HTTPRequestRateLow",
|
"HTTPRequestRateLow",
|
||||||
expr,
|
expr,
|
||||||
time.Minute,
|
time.Minute,
|
||||||
model.LabelSet{"severity": "critical"},
|
model.LabelSet{"severity": "{{\"c\"}}ritical"},
|
||||||
model.LabelSet{},
|
model.LabelSet{},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ func TestAlertingRule(t *testing.T) {
|
||||||
for i, test := range tests {
|
for i, test := range tests {
|
||||||
evalTime := model.Time(0).Add(test.time)
|
evalTime := model.Time(0).Add(test.time)
|
||||||
|
|
||||||
res, err := rule.eval(evalTime, suite.QueryEngine())
|
res, err := rule.eval(evalTime, suite.QueryEngine(), "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error during alerting rule evaluation: %s", err)
|
t.Fatalf("Error during alerting rule evaluation: %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@ func (rule RecordingRule) Name() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// eval evaluates the rule and then overrides the metric names and labels accordingly.
|
// eval evaluates the rule and then overrides the metric names and labels accordingly.
|
||||||
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine) (model.Vector, error) {
|
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine, _ string) (model.Vector, error) {
|
||||||
query, err := engine.NewInstantQuery(rule.vector.String(), timestamp)
|
query, err := engine.NewInstantQuery(rule.vector.String(), timestamp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
@ -59,7 +59,7 @@ func TestRuleEval(t *testing.T) {
|
||||||
|
|
||||||
for _, test := range suite {
|
for _, test := range suite {
|
||||||
rule := NewRecordingRule(test.name, test.expr, test.labels)
|
rule := NewRecordingRule(test.name, test.expr, test.labels)
|
||||||
result, err := rule.eval(now, engine)
|
result, err := rule.eval(now, engine, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error evaluating %s", test.name)
|
t.Fatalf("Error evaluating %s", test.name)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue