mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-13 00:54:04 -08:00
Merge pull request #355 from prometheus/feature/interpolation
Add variable interpolation to notification messages.
This commit is contained in:
commit
1800b2f39e
4
main.go
4
main.go
|
@ -81,7 +81,7 @@ type prometheus struct {
|
||||||
unwrittenSamples chan *extraction.Result
|
unwrittenSamples chan *extraction.Result
|
||||||
|
|
||||||
ruleManager rules.RuleManager
|
ruleManager rules.RuleManager
|
||||||
notifications chan rules.NotificationReqs
|
notifications chan notification.NotificationReqs
|
||||||
storage *metric.TieredStorage
|
storage *metric.TieredStorage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ func main() {
|
||||||
targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance)
|
targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance)
|
||||||
targetManager.AddTargetsFromConfig(conf)
|
targetManager.AddTargetsFromConfig(conf)
|
||||||
|
|
||||||
notifications := make(chan rules.NotificationReqs, *notificationQueueCapacity)
|
notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity)
|
||||||
|
|
||||||
// Queue depth will need to be exposed
|
// Queue depth will need to be exposed
|
||||||
ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts)
|
ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts)
|
||||||
|
|
|
@ -17,14 +17,15 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/rules"
|
|
||||||
"github.com/prometheus/prometheus/utility"
|
"github.com/prometheus/prometheus/utility"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,6 +38,29 @@ var (
|
||||||
deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.")
|
deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A request for sending a notification to the alert manager for a single alert
|
||||||
|
// vector element.
|
||||||
|
type NotificationReq struct {
|
||||||
|
// Short-form alert summary. May contain text/template-style interpolations.
|
||||||
|
Summary string
|
||||||
|
// Longer alert description. May contain text/template-style interpolations.
|
||||||
|
Description string
|
||||||
|
// Labels associated with this alert notification, including alert name.
|
||||||
|
Labels clientmodel.LabelSet
|
||||||
|
// Current value of alert
|
||||||
|
Value clientmodel.SampleValue
|
||||||
|
// Since when this alert has been active (pending or firing).
|
||||||
|
ActiveSince time.Time
|
||||||
|
// A textual representation of the rule that triggered the alert.
|
||||||
|
RuleString string
|
||||||
|
}
|
||||||
|
|
||||||
|
type NotificationReqs []*NotificationReq
|
||||||
|
|
||||||
|
type httpPoster interface {
|
||||||
|
Post(url string, bodyType string, body io.Reader) (*http.Response, error)
|
||||||
|
}
|
||||||
|
|
||||||
// NotificationHandler is responsible for dispatching alert notifications to an
|
// NotificationHandler is responsible for dispatching alert notifications to an
|
||||||
// alert manager service.
|
// alert manager service.
|
||||||
type NotificationHandler struct {
|
type NotificationHandler struct {
|
||||||
|
@ -45,13 +69,13 @@ type NotificationHandler struct {
|
||||||
// The URL of this Prometheus instance to include in notifications.
|
// The URL of this Prometheus instance to include in notifications.
|
||||||
prometheusUrl string
|
prometheusUrl string
|
||||||
// Buffer of notifications that have not yet been sent.
|
// Buffer of notifications that have not yet been sent.
|
||||||
pendingNotifications <-chan rules.NotificationReqs
|
pendingNotifications <-chan NotificationReqs
|
||||||
// HTTP client with custom timeout settings.
|
// HTTP client with custom timeout settings.
|
||||||
httpClient http.Client
|
httpClient httpPoster
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct a new NotificationHandler.
|
// Construct a new NotificationHandler.
|
||||||
func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan rules.NotificationReqs) *NotificationHandler {
|
func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler {
|
||||||
return &NotificationHandler{
|
return &NotificationHandler{
|
||||||
alertmanagerUrl: alertmanagerUrl,
|
alertmanagerUrl: alertmanagerUrl,
|
||||||
pendingNotifications: notificationReqs,
|
pendingNotifications: notificationReqs,
|
||||||
|
@ -60,21 +84,55 @@ func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notifi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Interpolate alert information into summary/description templates.
|
||||||
|
func interpolateMessage(msg string, labels clientmodel.LabelSet, value clientmodel.SampleValue) string {
|
||||||
|
t := template.New("message")
|
||||||
|
|
||||||
|
// Inject some convenience variables that are easier to remember for users
|
||||||
|
// who are not used to Go's templating system.
|
||||||
|
defs :=
|
||||||
|
"{{$labels := .Labels}}" +
|
||||||
|
"{{$value := .Value}}"
|
||||||
|
|
||||||
|
if _, err := t.Parse(defs + msg); err != nil {
|
||||||
|
log.Println("Error parsing template:", err)
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
|
||||||
|
l := map[string]string{}
|
||||||
|
for k, v := range labels {
|
||||||
|
l[string(k)] = string(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmplData := struct {
|
||||||
|
Labels map[string]string
|
||||||
|
Value clientmodel.SampleValue
|
||||||
|
}{
|
||||||
|
Labels: l,
|
||||||
|
Value: value,
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := t.Execute(&buf, &tmplData); err != nil {
|
||||||
|
log.Println("Error executing template:", err)
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
// Send a list of notifications to the configured alert manager.
|
// Send a list of notifications to the configured alert manager.
|
||||||
func (n *NotificationHandler) sendNotifications(reqs rules.NotificationReqs) error {
|
func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error {
|
||||||
alerts := make([]map[string]interface{}, 0, len(reqs))
|
alerts := make([]map[string]interface{}, 0, len(reqs))
|
||||||
for _, req := range reqs {
|
for _, req := range reqs {
|
||||||
alerts = append(alerts, map[string]interface{}{
|
alerts = append(alerts, map[string]interface{}{
|
||||||
"Summary": req.Rule.Summary,
|
"Summary": interpolateMessage(req.Summary, req.Labels, req.Value),
|
||||||
"Description": req.Rule.Description,
|
"Description": interpolateMessage(req.Description, req.Labels, req.Value),
|
||||||
"Labels": req.ActiveAlert.Labels.Merge(clientmodel.LabelSet{
|
"Labels": req.Labels,
|
||||||
rules.AlertNameLabel: clientmodel.LabelValue(req.Rule.Name()),
|
|
||||||
}),
|
|
||||||
"Payload": map[string]interface{}{
|
"Payload": map[string]interface{}{
|
||||||
"Value": req.ActiveAlert.Value,
|
"Value": req.Value,
|
||||||
"ActiveSince": req.ActiveAlert.ActiveSince,
|
"ActiveSince": req.ActiveSince,
|
||||||
"GeneratorUrl": n.prometheusUrl,
|
"GeneratorUrl": n.prometheusUrl,
|
||||||
"AlertingRule": req.Rule.String(),
|
"AlertingRule": req.RuleString,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
109
notification/notification_test.go
Normal file
109
notification/notification_test.go
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
// Copyright 2013 Prometheus Team
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package notification
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testHttpPoster struct {
|
||||||
|
message string
|
||||||
|
receivedPost chan<- bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *testHttpPoster) Post(url string, bodyType string, body io.Reader) (*http.Response, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.ReadFrom(body)
|
||||||
|
p.message = buf.String()
|
||||||
|
p.receivedPost <- true
|
||||||
|
return &http.Response{
|
||||||
|
Body: ioutil.NopCloser(&bytes.Buffer{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type testNotificationScenario struct {
|
||||||
|
description string
|
||||||
|
summary string
|
||||||
|
message string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *testNotificationScenario) test(i int, t *testing.T) {
|
||||||
|
notifications := make(chan NotificationReqs)
|
||||||
|
defer close(notifications)
|
||||||
|
h := NewNotificationHandler("alertmanager_url", "prometheus_url", notifications)
|
||||||
|
|
||||||
|
receivedPost := make(chan bool, 1)
|
||||||
|
poster := testHttpPoster{receivedPost: receivedPost}
|
||||||
|
h.httpClient = &poster
|
||||||
|
|
||||||
|
go h.Run()
|
||||||
|
|
||||||
|
notifications <- NotificationReqs{
|
||||||
|
{
|
||||||
|
Summary: s.summary,
|
||||||
|
Description: s.description,
|
||||||
|
Labels: clientmodel.LabelSet{
|
||||||
|
clientmodel.LabelName("instance"): clientmodel.LabelValue("testinstance"),
|
||||||
|
},
|
||||||
|
Value: clientmodel.SampleValue(1.0 / 3.0),
|
||||||
|
ActiveSince: time.Time{},
|
||||||
|
RuleString: "Test rule string",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
<-receivedPost
|
||||||
|
if poster.message != s.message {
|
||||||
|
t.Fatalf("%d. Expected '%s', received '%s'", i, s.message, poster.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotificationHandler(t *testing.T) {
|
||||||
|
scenarios := []testNotificationScenario{
|
||||||
|
{
|
||||||
|
// Correct message.
|
||||||
|
summary: "{{$labels.instance}} = {{$value}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{$value}}",
|
||||||
|
message: `[{"Description":"The alert value for testinstance is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"testinstance = 0.3333333333333333"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown label.
|
||||||
|
summary: "{{$labels.badlabel}} = {{$value}}",
|
||||||
|
description: "The alert value for {{$labels.badlabel}} is {{$value}}",
|
||||||
|
message: `[{"Description":"The alert value for \u003cno value\u003e is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"\u003cno value\u003e = 0.3333333333333333"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown variable.
|
||||||
|
summary: "{{$labels.instance}} = {{$badvar}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{$badvar}}",
|
||||||
|
message: `[{"Description":"The alert value for {{$labels.instance}} is {{$badvar}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{$badvar}}"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown struct field.
|
||||||
|
summary: "{{$labels.instance}} = {{.Val}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{.Val}}",
|
||||||
|
message: `[{"Description":"The alert value for {{$labels.instance}} is {{.Val}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{.Val}}"}]`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, s := range scenarios {
|
||||||
|
s.test(i, t)
|
||||||
|
}
|
||||||
|
}
|
|
@ -136,7 +136,7 @@ type target struct {
|
||||||
// Any base labels that are added to this target and its metrics.
|
// Any base labels that are added to this target and its metrics.
|
||||||
baseLabels clientmodel.LabelSet
|
baseLabels clientmodel.LabelSet
|
||||||
// The HTTP client used to scrape the target's endpoint.
|
// The HTTP client used to scrape the target's endpoint.
|
||||||
client http.Client
|
httpClient *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// Furnish a reasonably configured target for querying.
|
// Furnish a reasonably configured target for querying.
|
||||||
|
@ -145,7 +145,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La
|
||||||
address: address,
|
address: address,
|
||||||
Deadline: deadline,
|
Deadline: deadline,
|
||||||
baseLabels: baseLabels,
|
baseLabels: baseLabels,
|
||||||
client: utility.NewDeadlineClient(deadline),
|
httpClient: utility.NewDeadlineClient(deadline),
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler := &healthScheduler{
|
scheduler := &healthScheduler{
|
||||||
|
@ -220,7 +220,7 @@ func (t *target) scrape(timestamp time.Time, results chan<- *extraction.Result)
|
||||||
}
|
}
|
||||||
req.Header.Add("Accept", acceptHeader)
|
req.Header.Add("Accept", acceptHeader)
|
||||||
|
|
||||||
resp, err := t.client.Do(req)
|
resp, err := t.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,8 @@ import (
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/extraction"
|
"github.com/prometheus/client_golang/extraction"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/utility"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestTargetScrapeUpdatesState(t *testing.T) {
|
func TestTargetScrapeUpdatesState(t *testing.T) {
|
||||||
|
@ -29,6 +31,7 @@ func TestTargetScrapeUpdatesState(t *testing.T) {
|
||||||
scheduler: literalScheduler{},
|
scheduler: literalScheduler{},
|
||||||
state: UNKNOWN,
|
state: UNKNOWN,
|
||||||
address: "bad schema",
|
address: "bad schema",
|
||||||
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
}
|
}
|
||||||
testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2))
|
testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2))
|
||||||
if testTarget.state != UNREACHABLE {
|
if testTarget.state != UNREACHABLE {
|
||||||
|
@ -41,6 +44,7 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
|
||||||
scheduler: literalScheduler{},
|
scheduler: literalScheduler{},
|
||||||
address: "http://example.url",
|
address: "http://example.url",
|
||||||
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
|
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
|
||||||
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
|
"github.com/prometheus/prometheus/notification"
|
||||||
"github.com/prometheus/prometheus/storage/metric"
|
"github.com/prometheus/prometheus/storage/metric"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -38,29 +39,19 @@ type RuleManager interface {
|
||||||
AlertingRules() []*AlertingRule
|
AlertingRules() []*AlertingRule
|
||||||
}
|
}
|
||||||
|
|
||||||
// A request for sending an alert notification to the alert manager. This needs
|
|
||||||
// to be defined in this package to prevent a circular import between
|
|
||||||
// rules<->notification.
|
|
||||||
type NotificationReq struct {
|
|
||||||
Rule *AlertingRule
|
|
||||||
ActiveAlert Alert
|
|
||||||
}
|
|
||||||
|
|
||||||
type NotificationReqs []*NotificationReq
|
|
||||||
|
|
||||||
type ruleManager struct {
|
type ruleManager struct {
|
||||||
// Protects the rules list.
|
// Protects the rules list.
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
rules []Rule
|
rules []Rule
|
||||||
|
|
||||||
results chan<- *extraction.Result
|
results chan<- *extraction.Result
|
||||||
notifications chan<- NotificationReqs
|
notifications chan<- notification.NotificationReqs
|
||||||
done chan bool
|
done chan bool
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
storage *metric.TieredStorage
|
storage *metric.TieredStorage
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRuleManager(results chan<- *extraction.Result, notifications chan<- NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager {
|
func NewRuleManager(results chan<- *extraction.Result, notifications chan<- notification.NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager {
|
||||||
manager := &ruleManager{
|
manager := &ruleManager{
|
||||||
results: results,
|
results: results,
|
||||||
notifications: notifications,
|
notifications: notifications,
|
||||||
|
@ -102,16 +93,22 @@ func (m *ruleManager) queueAlertNotifications(rule *AlertingRule) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
notifications := make(NotificationReqs, 0, len(activeAlerts))
|
notifications := make(notification.NotificationReqs, 0, len(activeAlerts))
|
||||||
for _, aa := range activeAlerts {
|
for _, aa := range activeAlerts {
|
||||||
if aa.State != FIRING {
|
if aa.State != FIRING {
|
||||||
// BUG: In the future, make AlertManager support pending alerts?
|
// BUG: In the future, make AlertManager support pending alerts?
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
notifications = append(notifications, &NotificationReq{
|
notifications = append(notifications, ¬ification.NotificationReq{
|
||||||
Rule: rule,
|
Summary: rule.Summary,
|
||||||
ActiveAlert: aa,
|
Description: rule.Description,
|
||||||
|
Labels: aa.Labels.Merge(clientmodel.LabelSet{
|
||||||
|
AlertNameLabel: clientmodel.LabelValue(rule.Name()),
|
||||||
|
}),
|
||||||
|
Value: aa.Value,
|
||||||
|
ActiveSince: aa.ActiveSince,
|
||||||
|
RuleString: rule.String(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
m.notifications <- notifications
|
m.notifications <- notifications
|
||||||
|
|
|
@ -21,8 +21,8 @@ import (
|
||||||
|
|
||||||
// NewDeadlineClient returns a new http.Client which will time out long running
|
// NewDeadlineClient returns a new http.Client which will time out long running
|
||||||
// requests.
|
// requests.
|
||||||
func NewDeadlineClient(timeout time.Duration) http.Client {
|
func NewDeadlineClient(timeout time.Duration) *http.Client {
|
||||||
return http.Client{
|
return &http.Client{
|
||||||
Transport: &http.Transport{
|
Transport: &http.Transport{
|
||||||
// We need to disable keepalive, becasue we set a deadline on the
|
// We need to disable keepalive, becasue we set a deadline on the
|
||||||
// underlying connection.
|
// underlying connection.
|
||||||
|
|
Loading…
Reference in a new issue