Merge pull request #301 from prometheus/feature/alerts-dashboard

Implement alerts dashboard and expression console links.
This commit is contained in:
juliusv 2013-06-13 13:38:45 -07:00
commit f4af4cb741
20 changed files with 321 additions and 39 deletions

View file

@ -230,7 +230,7 @@ func main() {
PrometheusStatus: &web.PrometheusStatus{ PrometheusStatus: &web.PrometheusStatus{
BuildInfo: BuildInfo, BuildInfo: BuildInfo,
Config: conf.String(), Config: conf.String(),
Rules: ruleManager.Rules(), RuleManager: ruleManager,
TargetPools: targetManager.Pools(), TargetPools: targetManager.Pools(),
Flags: flags, Flags: flags,
Birth: time.Now(), Birth: time.Now(),
@ -238,6 +238,10 @@ func main() {
CurationState: curationState, CurationState: curationState,
} }
alertsHandler := &web.AlertsHandler{
RuleManager: ruleManager,
}
databasesHandler := &web.DatabasesHandler{ databasesHandler := &web.DatabasesHandler{
Incoming: databaseStates, Incoming: databaseStates,
} }
@ -252,6 +256,7 @@ func main() {
StatusHandler: statusHandler, StatusHandler: statusHandler,
MetricsHandler: metricsService, MetricsHandler: metricsService,
DatabasesHandler: databasesHandler, DatabasesHandler: databasesHandler,
AlertsHandler: alertsHandler,
} }
prometheus := prometheus{ prometheus := prometheus{

View file

@ -94,6 +94,14 @@ func (l LabelSet) ToMetric() Metric {
return metric return metric
} }
func (m Metric) ToLabelSet() LabelSet {
labels := LabelSet{}
for label, value := range m {
labels[label] = value
}
return labels
}
// A Metric is similar to a LabelSet, but the key difference is that a Metric is // A Metric is similar to a LabelSet, but the key difference is that a Metric is
// a singleton and refers to one and only one stream of samples. // a singleton and refers to one and only one stream of samples.
type Metric map[LabelName]LabelValue type Metric map[LabelName]LabelValue

View file

@ -15,19 +15,24 @@ package rules
import ( import (
"fmt" "fmt"
"html/template"
"sync"
"time"
"github.com/prometheus/prometheus/model" "github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats" "github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric" "github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/utility" "github.com/prometheus/prometheus/utility"
"time"
) )
// States that active alerts can be in. // States that active alerts can be in.
type alertState int type AlertState int
func (s alertState) String() string { func (s AlertState) String() string {
switch s { switch s {
case INACTIVE:
return "inactive"
case PENDING: case PENDING:
return "pending" return "pending"
case FIRING: case FIRING:
@ -38,32 +43,35 @@ func (s alertState) String() string {
} }
const ( const (
PENDING alertState = iota INACTIVE AlertState = iota
PENDING
FIRING FIRING
) )
// alert is used to track active (pending/firing) alerts over time. // Alert is used to track active (pending/firing) alerts over time.
type alert struct { type Alert struct {
// The name of the alert. // The name of the alert.
name string Name string
// The vector element labelset triggering this alert. // The vector element labelset triggering this alert.
metric model.Metric Labels model.LabelSet
// The state of the alert (PENDING or FIRING). // The state of the alert (PENDING or FIRING).
state alertState State AlertState
// The time when the alert first transitioned into PENDING state. // The time when the alert first transitioned into PENDING state.
activeSince time.Time ActiveSince time.Time
// The value of the alert expression for this vector element.
Value model.SampleValue
} }
// sample returns a Sample suitable for recording the alert. // sample returns a Sample suitable for recording the alert.
func (a alert) sample(timestamp time.Time, value model.SampleValue) model.Sample { func (a Alert) sample(timestamp time.Time, value model.SampleValue) model.Sample {
recordedMetric := model.Metric{} recordedMetric := model.Metric{}
for label, value := range a.metric { for label, value := range a.Labels {
recordedMetric[label] = value recordedMetric[label] = value
} }
recordedMetric[model.MetricNameLabel] = model.AlertMetricName recordedMetric[model.MetricNameLabel] = model.AlertMetricName
recordedMetric[model.AlertNameLabel] = model.LabelValue(a.name) recordedMetric[model.AlertNameLabel] = model.LabelValue(a.Name)
recordedMetric[model.AlertStateLabel] = model.LabelValue(a.state.String()) recordedMetric[model.AlertStateLabel] = model.LabelValue(a.State.String())
return model.Sample{ return model.Sample{
Metric: recordedMetric, Metric: recordedMetric,
@ -83,37 +91,51 @@ type AlertingRule struct {
holdDuration time.Duration holdDuration time.Duration
// Extra labels to attach to the resulting alert sample vectors. // Extra labels to attach to the resulting alert sample vectors.
labels model.LabelSet labels model.LabelSet
// Protects the below.
mutex sync.Mutex
// A map of alerts which are currently active (PENDING or FIRING), keyed by // A map of alerts which are currently active (PENDING or FIRING), keyed by
// the fingerprint of the labelset they correspond to. // the fingerprint of the labelset they correspond to.
activeAlerts map[model.Fingerprint]*alert activeAlerts map[model.Fingerprint]*Alert
} }
func (rule AlertingRule) Name() string { return rule.name } func (rule *AlertingRule) Name() string { return rule.name }
func (rule AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) { func (rule *AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup()) return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup())
} }
func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) { func (rule *AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
// Get the raw value of the rule expression. // Get the raw value of the rule expression.
exprResult, err := rule.EvalRaw(timestamp, storage) exprResult, err := rule.EvalRaw(timestamp, storage)
if err != nil { if err != nil {
return nil, err return nil, err
} }
rule.mutex.Lock()
defer rule.mutex.Unlock()
// Create pending alerts for any new vector elements in the alert expression. // Create pending alerts for any new vector elements in the alert expression.
resultFingerprints := utility.Set{} resultFingerprints := utility.Set{}
for _, sample := range exprResult { for _, sample := range exprResult {
fp := *model.NewFingerprintFromMetric(sample.Metric) fp := *model.NewFingerprintFromMetric(sample.Metric)
resultFingerprints.Add(fp) resultFingerprints.Add(fp)
if _, ok := rule.activeAlerts[fp]; !ok { alert, ok := rule.activeAlerts[fp]
rule.activeAlerts[fp] = &alert{ if !ok {
name: rule.name, labels := sample.Metric.ToLabelSet()
metric: sample.Metric, if _, ok := labels[model.MetricNameLabel]; ok {
state: PENDING, delete(labels, model.MetricNameLabel)
activeSince: timestamp,
} }
rule.activeAlerts[fp] = &Alert{
Name: rule.name,
Labels: sample.Metric.ToLabelSet(),
State: PENDING,
ActiveSince: timestamp,
Value: sample.Value,
}
} else {
alert.Value = sample.Value
} }
} }
@ -127,9 +149,9 @@ func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage
continue continue
} }
if activeAlert.state == PENDING && timestamp.Sub(activeAlert.activeSince) >= rule.holdDuration { if activeAlert.State == PENDING && timestamp.Sub(activeAlert.ActiveSince) >= rule.holdDuration {
vector = append(vector, activeAlert.sample(timestamp, 0)) vector = append(vector, activeAlert.sample(timestamp, 0))
activeAlert.state = FIRING activeAlert.State = FIRING
} }
vector = append(vector, activeAlert.sample(timestamp, 1)) vector = append(vector, activeAlert.sample(timestamp, 1))
@ -138,7 +160,7 @@ func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage
return vector, nil return vector, nil
} }
func (rule AlertingRule) ToDotGraph() string { func (rule *AlertingRule) ToDotGraph() string {
graph := fmt.Sprintf(`digraph "Rules" { graph := fmt.Sprintf(`digraph "Rules" {
%#p[shape="box",label="ALERT %s IF FOR %s"]; %#p[shape="box",label="ALERT %s IF FOR %s"];
%#p -> %#p; %#p -> %#p;
@ -147,8 +169,47 @@ func (rule AlertingRule) ToDotGraph() string {
return graph return graph
} }
func (rule AlertingRule) String() string { func (rule *AlertingRule) String() string {
return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s\n", rule.name, rule.vector, utility.DurationToString(rule.holdDuration), rule.labels) return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s", rule.name, rule.vector, utility.DurationToString(rule.holdDuration), rule.labels)
}
func (rule *AlertingRule) HTMLSnippet() template.HTML {
alertMetric := model.Metric{
model.MetricNameLabel: model.AlertMetricName,
model.AlertNameLabel: model.LabelValue(rule.name),
}
return template.HTML(fmt.Sprintf(
`ALERT <a href="%s">%s</a> IF <a href="%s">%s</a> FOR %s WITH %s`,
ConsoleLinkForExpression(alertMetric.String()),
rule.name,
ConsoleLinkForExpression(rule.vector.String()),
rule.vector,
utility.DurationToString(rule.holdDuration),
rule.labels))
}
func (rule *AlertingRule) State() AlertState {
rule.mutex.Lock()
defer rule.mutex.Unlock()
maxState := INACTIVE
for _, activeAlert := range rule.activeAlerts {
if activeAlert.State > maxState {
maxState = activeAlert.State
}
}
return maxState
}
func (rule *AlertingRule) ActiveAlerts() []Alert {
rule.mutex.Lock()
defer rule.mutex.Unlock()
alerts := make([]Alert, 0, len(rule.activeAlerts))
for _, alert := range rule.activeAlerts {
alerts = append(alerts, *alert)
}
return alerts
} }
// Construct a new AlertingRule. // Construct a new AlertingRule.
@ -158,6 +219,6 @@ func NewAlertingRule(name string, vector ast.VectorNode, holdDuration time.Durat
vector: vector, vector: vector,
holdDuration: holdDuration, holdDuration: holdDuration,
labels: labels, labels: labels,
activeAlerts: map[model.Fingerprint]*alert{}, activeAlerts: map[model.Fingerprint]*Alert{},
} }
} }

View file

@ -321,7 +321,12 @@ func (node *VectorFunctionCall) String() string {
} }
func (node *VectorAggregation) String() string { func (node *VectorAggregation) String() string {
return fmt.Sprintf("%s(%s) BY (%s)", node.aggrType, node.vector, node.groupBy) aggrString := fmt.Sprintf("%s(%s)", node.aggrType, node.vector)
if len(node.groupBy) > 0 {
return fmt.Sprintf("%s BY (%s)", aggrString, node.groupBy)
} else {
return aggrString
}
} }
func (node *VectorArithExpr) String() string { func (node *VectorArithExpr) String() string {

View file

@ -15,6 +15,8 @@ package rules
import ( import (
"fmt" "fmt"
"html"
"github.com/prometheus/prometheus/model" "github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/utility" "github.com/prometheus/prometheus/utility"
@ -111,3 +113,7 @@ func NewMatrix(vector ast.Node, intervalStr string) (ast.MatrixNode, error) {
vectorLiteral := vector.(*ast.VectorLiteral) vectorLiteral := vector.(*ast.VectorLiteral)
return ast.NewMatrixLiteral(vectorLiteral, interval), nil return ast.NewMatrixLiteral(vectorLiteral, interval), nil
} }
func ConsoleLinkForExpression(expr string) string {
return html.EscapeString(fmt.Sprintf(`graph#[{"expr":%q,"tab":1}]`, expr))
}

View file

@ -28,9 +28,16 @@ type Result struct {
} }
type RuleManager interface { type RuleManager interface {
// Load and add rules from rule files specified in the configuration.
AddRulesFromConfig(config config.Config) error AddRulesFromConfig(config config.Config) error
// Start the rule manager's periodic rule evaluation.
Run() Run()
// Stop the rule manager's rule evaluation cycles.
Stop()
// Return all rules.
Rules() []Rule Rules() []Rule
// Return all alerting rules.
AlertingRules() []*AlertingRule
} }
type ruleManager struct { type ruleManager struct {
@ -127,3 +134,16 @@ func (m *ruleManager) Rules() []Rule {
copy(rules, m.rules) copy(rules, m.rules)
return rules return rules
} }
func (m *ruleManager) AlertingRules() []*AlertingRule {
m.Lock()
defer m.Unlock()
alerts := []*AlertingRule{}
for _, rule := range m.rules {
if alertingRule, ok := rule.(*AlertingRule); ok {
alerts = append(alerts, alertingRule)
}
}
return alerts
}

View file

@ -15,11 +15,13 @@ package rules
import ( import (
"fmt" "fmt"
"html/template"
"time"
"github.com/prometheus/prometheus/model" "github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats" "github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric" "github.com/prometheus/prometheus/storage/metric"
"time"
) )
// A RecordingRule records its vector expression into new timeseries. // A RecordingRule records its vector expression into new timeseries.
@ -71,6 +73,17 @@ func (rule RecordingRule) String() string {
return fmt.Sprintf("%s%s = %s\n", rule.name, rule.labels, rule.vector) return fmt.Sprintf("%s%s = %s\n", rule.name, rule.labels, rule.vector)
} }
func (rule RecordingRule) HTMLSnippet() template.HTML {
ruleExpr := rule.vector.String()
return template.HTML(fmt.Sprintf(
`<a href="%s">%s</a>%s = <a href="%s">%s</a>`,
ConsoleLinkForExpression(rule.name),
rule.name,
rule.labels,
ConsoleLinkForExpression(ruleExpr),
ruleExpr))
}
// Construct a new RecordingRule. // Construct a new RecordingRule.
func NewRecordingRule(name string, labels model.LabelSet, vector ast.VectorNode, permanent bool) *RecordingRule { func NewRecordingRule(name string, labels model.LabelSet, vector ast.VectorNode, permanent bool) *RecordingRule {
return &RecordingRule{ return &RecordingRule{

View file

@ -14,9 +14,11 @@
package rules package rules
import ( import (
"html/template"
"time"
"github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/storage/metric" "github.com/prometheus/prometheus/storage/metric"
"time"
) )
// A Rule encapsulates a vector expression which is evaluated at a specified // A Rule encapsulates a vector expression which is evaluated at a specified
@ -33,4 +35,7 @@ type Rule interface {
ToDotGraph() string ToDotGraph() string
// String returns a human-readable string representation of the rule. // String returns a human-readable string representation of the rule.
String() string String() string
// HTMLSnippet returns a human-readable string representation of the rule,
// decorated with HTML elements for use the web frontend.
HTMLSnippet() template.HTML
} }

View file

@ -13,7 +13,7 @@
all: blob-stamp all: blob-stamp
blob-stamp: static/generated/protocol_buffer.descriptor blob-stamp: static/generated/protocol_buffer.descriptor templates/*
$(MAKE) -C blob $(MAKE) -C blob
touch $@ touch $@

61
web/alerts.go Normal file
View file

@ -0,0 +1,61 @@
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package web
import (
"github.com/prometheus/prometheus/rules"
"net/http"
"sort"
"sync"
)
type AlertStatus struct {
AlertingRules []*rules.AlertingRule
}
type AlertsHandler struct {
RuleManager rules.RuleManager
mutex sync.Mutex
}
type byAlertStateSorter struct {
alerts []*rules.AlertingRule
}
func (s byAlertStateSorter) Len() int {
return len(s.alerts)
}
func (s byAlertStateSorter) Less(i, j int) bool {
return s.alerts[i].State() > s.alerts[j].State()
}
func (s byAlertStateSorter) Swap(i, j int) {
s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i]
}
func (h *AlertsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.mutex.Lock()
defer h.mutex.Unlock()
alerts := h.RuleManager.AlertingRules()
alertsSorter := byAlertStateSorter{alerts: alerts}
sort.Sort(alertsSorter)
alertStatus := AlertStatus{
AlertingRules: alertsSorter.alerts,
}
executeTemplate(w, "alerts", alertStatus)
}

48
web/static/css/alerts.css Normal file
View file

@ -0,0 +1,48 @@
.alert_wrapper {
padding: 2px;
}
.alert_header {
padding: 3px;
cursor: pointer;
}
.alert_content {
padding: 3px;
display: none;
}
.alert_header.firing {
background-color: #ff7673;
}
.alert_header.pending {
background-color: #ffcf40;
}
.alert_header.inactive {
background-color: #92ed6b;
}
.alert_description {
margin-left: 3px;
padding: 8px 0 8px 0;
}
.alert_active_elements {
border: 1px solid #dddddd;
}
.alert_active_elements th {
background-color: #dddddd;
padding: 0 5px 0 5px;
}
.alert_active_elements td {
background-color: #eebbbb;
padding: 0 5px 0 5px;
}
.alert_active_elements tr:hover td {
background-color: #ffcf40;
}

5
web/static/js/alerts.js Normal file
View file

@ -0,0 +1,5 @@
function init() {
$(".alert_header").click(function() {$(this).next().toggle(); });
}
$(init);

View file

@ -519,6 +519,7 @@ function parseGraphOptionsFromUrl() {
return options; return options;
} }
// NOTE: This needs to be kept in sync with rules/helpers.go:ConsoleLinkForExpression!
function storeGraphOptionsInUrl() { function storeGraphOptionsInUrl() {
var allGraphsOptions = []; var allGraphsOptions = [];
for (var i = 0; i < graphs.length; i++) { for (var i = 0; i < graphs.length; i++) {
@ -559,4 +560,5 @@ function init() {
} }
}) })
} }
$(init); $(init);

View file

@ -27,7 +27,7 @@ type PrometheusStatus struct {
Config string Config string
Curation metric.CurationState Curation metric.CurationState
Flags map[string]string Flags map[string]string
Rules []rules.Rule RuleManager rules.RuleManager
TargetPools map[string]*retrieval.TargetPool TargetPools map[string]*retrieval.TargetPool
Birth time.Time Birth time.Time

View file

@ -13,6 +13,7 @@
<a href="/graph">Graph &amp; Console</a> <a href="/graph">Graph &amp; Console</a>
<a href="/">Status</a> <a href="/">Status</a>
<a href="/databases">Databases</a> <a href="/databases">Databases</a>
<a href="/alerts">Alerts</a>
{{ define "user_dashboard_link" }}{{ end }} {{ define "user_dashboard_link" }}{{ end }}
{{ template "user_dashboard_link" .}} {{ template "user_dashboard_link" .}}

41
web/templates/alerts.html Normal file
View file

@ -0,0 +1,41 @@
{{define "head"}}
<link type="text/css" rel="stylesheet" href="/static/css/alerts.css">
<script src="/static/js/alerts.js"></script>
{{end}}
{{define "content"}}
<h2>Alerts</h2>
<div class="grouping_box">
{{range .AlertingRules}}
{{$activeAlerts := .ActiveAlerts}}
<div class="alert_wrapper">
<div class="alert_header {{.State}}">
{{.Name}} ({{len $activeAlerts}} active)
</div>
<div class="alert_content">
<div class="alert_description">
<b>Rule:</b> {{.HTMLSnippet}}
</div>
{{if $activeAlerts}}
<table class="alert_active_elements">
<tr>
<th>Labels</th>
<th>State</th>
<th>Active Since</th>
<th>Value</th>
</tr>
{{range $activeAlerts}}
<tr>
<td>{{.Labels}}</td>
<td>{{.State}}</td>
<td>{{.ActiveSince}}</td>
<td>{{.Value}}</td>
</tr>
{{end}}
</table>
{{end}}
</div>
</div>
{{end}}
</div>
{{end}}

View file

@ -1,8 +1,8 @@
{{define "head"}}<!-- nix -->{{end}} {{define "head"}}<!-- nix -->{{end}}
{{define "content"}} {{define "content"}}
<div class="grouping_box">
<h2>Database Information</h2> <h2>Database Information</h2>
<div class="grouping_box">
{{range .States}} {{range .States}}
<div class="grouping_box"> <div class="grouping_box">
<h3>{{.Name}}</h3> <h3>{{.Name}}</h3>

View file

@ -2,7 +2,6 @@
<script src="/static/vendor/jqueryui/jquery-ui.min.js"></script> <script src="/static/vendor/jqueryui/jquery-ui.min.js"></script>
<link rel="stylesheet" href="/static/vendor/jqueryui/themes/base/jquery-ui.css" /> <link rel="stylesheet" href="/static/vendor/jqueryui/themes/base/jquery-ui.css" />
<link type="text/css" rel="stylesheet" href="/static/css/prometheus.css">
<link type="text/css" rel="stylesheet" href="/static/css/graph.css"> <link type="text/css" rel="stylesheet" href="/static/css/graph.css">
<script src="/static/vendor/jquery-simple-datetimepicker/jquery.simple-dtpicker.js"></script> <script src="/static/vendor/jquery-simple-datetimepicker/jquery.simple-dtpicker.js"></script>

View file

@ -35,7 +35,7 @@
<h2>Rules</h2> <h2>Rules</h2>
<div class="grouping_box"> <div class="grouping_box">
<pre>{{range .Rules}}{{.String}}{{end}}</pre> <pre>{{range .RuleManager.Rules}}{{.HTMLSnippet}}<br/>{{end}}</pre>
</div> </div>
<h2>Targets</h2> <h2>Targets</h2>

View file

@ -38,6 +38,7 @@ type WebService struct {
StatusHandler *StatusHandler StatusHandler *StatusHandler
DatabasesHandler *DatabasesHandler DatabasesHandler *DatabasesHandler
MetricsHandler *api.MetricsService MetricsHandler *api.MetricsService
AlertsHandler *AlertsHandler
} }
func (w WebService) ServeForever() error { func (w WebService) ServeForever() error {
@ -56,6 +57,7 @@ func (w WebService) ServeForever() error {
exp.Handle("/", w.StatusHandler) exp.Handle("/", w.StatusHandler)
exp.Handle("/databases", w.DatabasesHandler) exp.Handle("/databases", w.DatabasesHandler)
exp.Handle("/alerts", w.AlertsHandler)
exp.HandleFunc("/graph", graphHandler) exp.HandleFunc("/graph", graphHandler)
exp.Handle("/api/", gorest.Handle()) exp.Handle("/api/", gorest.Handle())