// Copyright 2013 Prometheus Team // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package notification import ( "bytes" "encoding/json" "flag" "io" "io/ioutil" "net/http" "time" "github.com/golang/glog" clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/utility" ) const ( alertmanagerApiEventsPath = "/api/alerts" contentTypeJson = "application/json" ) // String constants for instrumentation. const ( result = "result" success = "success" failure = "failure" dropped = "dropped" facet = "facet" occupancy = "occupancy" capacity = "capacity" ) var ( deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.") ) // A request for sending a notification to the alert manager for a single alert // vector element. type NotificationReq struct { // Short-form alert summary. May contain text/template-style interpolations. Summary string // Longer alert description. May contain text/template-style interpolations. Description string // Labels associated with this alert notification, including alert name. Labels clientmodel.LabelSet // Current value of alert Value clientmodel.SampleValue // Since when this alert has been active (pending or firing). ActiveSince time.Time // A textual representation of the rule that triggered the alert. RuleString string // Prometheus console link to alert expression. GeneratorUrl string } type NotificationReqs []*NotificationReq type httpPoster interface { Post(url string, bodyType string, body io.Reader) (*http.Response, error) } // NotificationHandler is responsible for dispatching alert notifications to an // alert manager service. type NotificationHandler struct { // The URL of the alert manager to send notifications to. alertmanagerUrl string // Buffer of notifications that have not yet been sent. pendingNotifications <-chan NotificationReqs // HTTP client with custom timeout settings. httpClient httpPoster notificationLatency *prometheus.SummaryVec notificationsQueueSize *prometheus.GaugeVec } // Construct a new NotificationHandler. func NewNotificationHandler(alertmanagerUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler { return &NotificationHandler{ alertmanagerUrl: alertmanagerUrl, pendingNotifications: notificationReqs, httpClient: utility.NewDeadlineClient(*deadline), notificationLatency: prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "prometheus_notifications_latency_ms", Help: "Latency quantiles for sending alert notifications in milliseconds.", }, []string{result}, ), notificationsQueueSize: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "prometheus_notifications_queue_size_total", Help: "The size and capacity of the alert notification queue.", }, []string{facet}, ), } } // Send a list of notifications to the configured alert manager. func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error { alerts := make([]map[string]interface{}, 0, len(reqs)) for _, req := range reqs { alerts = append(alerts, map[string]interface{}{ "Summary": req.Summary, "Description": req.Description, "Labels": req.Labels, "Payload": map[string]interface{}{ "Value": req.Value, "ActiveSince": req.ActiveSince, "GeneratorUrl": req.GeneratorUrl, "AlertingRule": req.RuleString, }, }) } buf, err := json.Marshal(alerts) if err != nil { return err } glog.V(1).Infoln("Sending notifications to alertmanager:", string(buf)) resp, err := n.httpClient.Post( n.alertmanagerUrl+alertmanagerApiEventsPath, contentTypeJson, bytes.NewBuffer(buf), ) if err != nil { return err } defer resp.Body.Close() _, err = ioutil.ReadAll(resp.Body) if err != nil { return err } // BUG: Do we need to check the response code? return nil } // Continuously dispatch notifications. func (n *NotificationHandler) Run() { for reqs := range n.pendingNotifications { if n.alertmanagerUrl == "" { glog.Warning("No alert manager configured, not dispatching notification") n.notificationLatency.WithLabelValues(dropped).Observe(0) continue } begin := time.Now() err := n.sendNotifications(reqs) labelValue := success if err != nil { glog.Error("Error sending notification: ", err) labelValue = failure } n.notificationLatency.WithLabelValues(labelValue).Observe( float64(time.Since(begin) / time.Millisecond), ) } } // Describe implements prometheus.Collector. func (n *NotificationHandler) Describe(ch chan<- *prometheus.Desc) { n.notificationLatency.Describe(ch) n.notificationsQueueSize.Describe(ch) } // Collect implements prometheus.Collector. func (n *NotificationHandler) Collect(ch chan<- prometheus.Metric) { n.notificationLatency.Collect(ch) n.notificationsQueueSize.WithLabelValues(occupancy).Set(float64(len(n.pendingNotifications))) n.notificationsQueueSize.WithLabelValues(capacity).Set(float64(cap(n.pendingNotifications))) n.notificationsQueueSize.Collect(ch) }