mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 22:07:27 -08:00
Unit testing for rules (#4350)
* Unit testing for rules * Specifying order of group evaluation in unit tests Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
This commit is contained in:
parent
b75ec7e6ef
commit
5790d23fd8
|
@ -94,6 +94,13 @@ func main() {
|
|||
queryLabelsServer := queryLabelsCmd.Arg("server", "Prometheus server to query.").Required().URL()
|
||||
queryLabelsName := queryLabelsCmd.Arg("name", "Label name to provide label values for.").Required().String()
|
||||
|
||||
testCmd := app.Command("test", "Unit testing.")
|
||||
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
|
||||
testRulesFiles := testRulesCmd.Arg(
|
||||
"test-rule-file",
|
||||
"The unit test file.",
|
||||
).Required().ExistingFiles()
|
||||
|
||||
switch kingpin.MustParse(app.Parse(os.Args[1:])) {
|
||||
case checkConfigCmd.FullCommand():
|
||||
os.Exit(CheckConfig(*configFiles...))
|
||||
|
@ -127,6 +134,9 @@ func main() {
|
|||
|
||||
case queryLabelsCmd.FullCommand():
|
||||
os.Exit(QueryLabels(*queryLabelsServer, *queryLabelsName))
|
||||
|
||||
case testRulesCmd.FullCommand():
|
||||
os.Exit(RulesUnitTest(*testRulesFiles...))
|
||||
}
|
||||
|
||||
}
|
||||
|
|
476
cmd/promtool/unittest.go
Normal file
476
cmd/promtool/unittest.go
Normal file
|
@ -0,0 +1,476 @@
|
|||
// Copyright 2018 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/rules"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
)
|
||||
|
||||
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
|
||||
// More info about the file format can be found in the docs.
|
||||
func RulesUnitTest(files ...string) int {
|
||||
failed := false
|
||||
|
||||
for _, f := range files {
|
||||
if errs := ruleUnitTest(f); errs != nil {
|
||||
fmt.Fprintln(os.Stderr, " FAILED:")
|
||||
for _, e := range errs {
|
||||
fmt.Fprintln(os.Stderr, e.Error())
|
||||
}
|
||||
failed = true
|
||||
} else {
|
||||
fmt.Println(" SUCCESS")
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
if failed {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func ruleUnitTest(filename string) []error {
|
||||
fmt.Println("Unit Testing: ", filename)
|
||||
|
||||
b, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
var unitTestInp unitTestFile
|
||||
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
if unitTestInp.EvaluationInterval == 0 {
|
||||
unitTestInp.EvaluationInterval = 1 * time.Minute
|
||||
}
|
||||
|
||||
// Bounds for evaluating the rules.
|
||||
mint := time.Unix(0, 0)
|
||||
maxd := unitTestInp.maxEvalTime()
|
||||
maxt := mint.Add(maxd)
|
||||
// Rounding off to nearest Eval time (> maxt).
|
||||
maxt = maxt.Add(unitTestInp.EvaluationInterval / 2).Round(unitTestInp.EvaluationInterval)
|
||||
|
||||
// Giving number for groups mentioned in the file for ordering.
|
||||
// Lower number group should be evaluated before higher number group.
|
||||
groupOrderMap := make(map[string]int)
|
||||
for i, gn := range unitTestInp.GroupEvalOrder {
|
||||
if _, ok := groupOrderMap[gn]; ok {
|
||||
return []error{fmt.Errorf("Group name repeated in evaluation order: %s", gn)}
|
||||
}
|
||||
groupOrderMap[gn] = i
|
||||
}
|
||||
|
||||
// Testing.
|
||||
var errs []error
|
||||
for _, t := range unitTestInp.Tests {
|
||||
ers := t.test(mint, maxt, unitTestInp.EvaluationInterval, groupOrderMap,
|
||||
unitTestInp.RuleFiles...)
|
||||
if ers != nil {
|
||||
errs = append(errs, ers...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unitTestFile holds the contents of a single unit test file.
|
||||
type unitTestFile struct {
|
||||
RuleFiles []string `yaml:"rule_files"`
|
||||
EvaluationInterval time.Duration `yaml:"evaluation_interval,omitempty"`
|
||||
GroupEvalOrder []string `yaml:"group_eval_order"`
|
||||
Tests []testGroup `yaml:"tests"`
|
||||
}
|
||||
|
||||
func (utf *unitTestFile) maxEvalTime() time.Duration {
|
||||
var maxd time.Duration
|
||||
for _, t := range utf.Tests {
|
||||
d := t.maxEvalTime()
|
||||
if d > maxd {
|
||||
maxd = d
|
||||
}
|
||||
}
|
||||
return maxd
|
||||
}
|
||||
|
||||
// testGroup is a group of input series and tests associated with it.
|
||||
type testGroup struct {
|
||||
Interval time.Duration `yaml:"interval"`
|
||||
InputSeries []series `yaml:"input_series"`
|
||||
AlertRuleTests []alertTestCase `yaml:"alert_rule_test,omitempty"`
|
||||
PromqlExprTests []promqlTestCase `yaml:"promql_expr_test,omitempty"`
|
||||
}
|
||||
|
||||
// test performs the unit tests.
|
||||
func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error {
|
||||
// Setup testing suite.
|
||||
suite, err := promql.NewTest(nil, tg.seriesLoadingString())
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
defer suite.Close()
|
||||
|
||||
err = suite.Run()
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
// Load the rule files.
|
||||
opts := &rules.ManagerOptions{
|
||||
QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()),
|
||||
Appendable: suite.Storage(),
|
||||
Context: context.Background(),
|
||||
NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
|
||||
Logger: &dummyLogger{},
|
||||
}
|
||||
m := rules.NewManager(opts)
|
||||
groupsMap, ers := m.LoadGroups(tg.Interval, ruleFiles...)
|
||||
if ers != nil {
|
||||
return ers
|
||||
}
|
||||
groups := orderedGroups(groupsMap, groupOrderMap)
|
||||
|
||||
// Pre-processing some data for testing alerts.
|
||||
// All this preparation is so that we can test alerts as we evaluate the rules.
|
||||
// This avoids storing them in memory, as the number of evals might be high.
|
||||
|
||||
// All the `eval_time` for which we have unit tests.
|
||||
var alertEvalTimes []time.Duration
|
||||
// Map of all the eval_time+alertname combination present in the unit tests.
|
||||
alertsInTest := make(map[time.Duration]map[string]struct{})
|
||||
// Map of all the unit tests for given eval_time.
|
||||
alertTests := make(map[time.Duration][]alertTestCase)
|
||||
for _, alert := range tg.AlertRuleTests {
|
||||
alertEvalTimes = append(alertEvalTimes, alert.EvalTime)
|
||||
|
||||
if _, ok := alertsInTest[alert.EvalTime]; !ok {
|
||||
alertsInTest[alert.EvalTime] = make(map[string]struct{})
|
||||
}
|
||||
alertsInTest[alert.EvalTime][alert.Alertname] = struct{}{}
|
||||
|
||||
alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert)
|
||||
}
|
||||
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||
})
|
||||
|
||||
// Current index in alertEvalTimes what we are looking at.
|
||||
curr := 0
|
||||
|
||||
var errs []error
|
||||
for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) {
|
||||
// Collects the alerts asked for unit testing.
|
||||
for _, g := range groups {
|
||||
g.Eval(suite.Context(), ts)
|
||||
}
|
||||
|
||||
for {
|
||||
if !(curr < len(alertEvalTimes) && ts.Sub(mint) <= alertEvalTimes[curr] &&
|
||||
alertEvalTimes[curr] < ts.Add(evalInterval).Sub(mint)) {
|
||||
break
|
||||
}
|
||||
|
||||
// We need to check alerts for this time.
|
||||
// If 'ts <= `eval_time=alertEvalTimes[curr]` < ts+evalInterval'
|
||||
// then we compare alerts with the Eval at `ts`.
|
||||
t := alertEvalTimes[curr]
|
||||
|
||||
presentAlerts := alertsInTest[t]
|
||||
got := make(map[string]labelsAndAnnotations)
|
||||
|
||||
// Same Alert name can be present in multiple groups.
|
||||
// Hence we collect them all to check against expected alerts.
|
||||
for _, g := range groups {
|
||||
grules := g.Rules()
|
||||
for _, r := range grules {
|
||||
ar, ok := r.(*rules.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := presentAlerts[ar.Name()]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
var alerts labelsAndAnnotations
|
||||
for _, a := range ar.ActiveAlerts() {
|
||||
if a.State == rules.StateFiring {
|
||||
alerts = append(alerts, labelAndAnnotation{
|
||||
Labels: append(labels.Labels{}, a.Labels...),
|
||||
Annotations: append(labels.Labels{}, a.Annotations...),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
got[ar.Name()] = append(got[ar.Name()], alerts...)
|
||||
}
|
||||
}
|
||||
|
||||
for _, testcase := range alertTests[t] {
|
||||
// Checking alerts.
|
||||
gotAlerts := got[testcase.Alertname]
|
||||
|
||||
var expAlerts labelsAndAnnotations
|
||||
for _, a := range testcase.ExpAlerts {
|
||||
// User gives only the labels from alerting rule, which doesn't
|
||||
// include this label (added by Prometheus during Eval).
|
||||
a.ExpLabels[labels.AlertName] = testcase.Alertname
|
||||
|
||||
expAlerts = append(expAlerts, labelAndAnnotation{
|
||||
Labels: labels.FromMap(a.ExpLabels),
|
||||
Annotations: labels.FromMap(a.ExpAnnotations),
|
||||
})
|
||||
}
|
||||
|
||||
if gotAlerts.Len() != expAlerts.Len() {
|
||||
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
|
||||
} else {
|
||||
sort.Sort(gotAlerts)
|
||||
sort.Sort(expAlerts)
|
||||
|
||||
if !reflect.DeepEqual(expAlerts, gotAlerts) {
|
||||
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
curr++
|
||||
}
|
||||
}
|
||||
|
||||
// Checking promql expressions.
|
||||
Outer:
|
||||
for _, testCase := range tg.PromqlExprTests {
|
||||
got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime),
|
||||
suite.QueryEngine(), suite.Queryable())
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
testCase.EvalTime.String(), err.Error()))
|
||||
continue
|
||||
}
|
||||
|
||||
var gotSamples []parsedSample
|
||||
for _, s := range got {
|
||||
gotSamples = append(gotSamples, parsedSample{
|
||||
Labels: s.Metric.Copy(),
|
||||
Value: s.V,
|
||||
})
|
||||
}
|
||||
|
||||
var expSamples []parsedSample
|
||||
for _, s := range testCase.ExpSamples {
|
||||
lb, err := promql.ParseMetric(s.Labels)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
testCase.EvalTime.String(), err.Error()))
|
||||
continue Outer
|
||||
}
|
||||
expSamples = append(expSamples, parsedSample{
|
||||
Labels: lb,
|
||||
Value: s.Value,
|
||||
})
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expSamples, gotSamples) {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr,
|
||||
testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// seriesLoadingString returns the input series in PromQL notation.
|
||||
func (tg *testGroup) seriesLoadingString() string {
|
||||
result := ""
|
||||
result += "load " + shortDuration(tg.Interval) + "\n"
|
||||
for _, is := range tg.InputSeries {
|
||||
result += " " + is.Series + " " + is.Values + "\n"
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func shortDuration(d time.Duration) string {
|
||||
s := d.String()
|
||||
if strings.HasSuffix(s, "m0s") {
|
||||
s = s[:len(s)-2]
|
||||
}
|
||||
if strings.HasSuffix(s, "h0m") {
|
||||
s = s[:len(s)-2]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// orderedGroups returns a slice of `*rules.Group` from `groupsMap` which follows the order
|
||||
// mentioned by `groupOrderMap`. NOTE: This is partial ordering.
|
||||
func orderedGroups(groupsMap map[string]*rules.Group, groupOrderMap map[string]int) []*rules.Group {
|
||||
groups := make([]*rules.Group, 0, len(groupsMap))
|
||||
for _, g := range groupsMap {
|
||||
groups = append(groups, g)
|
||||
}
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
return groupOrderMap[groups[i].Name()] < groupOrderMap[groups[j].Name()]
|
||||
})
|
||||
return groups
|
||||
}
|
||||
|
||||
// maxEvalTime returns the max eval time among all alert and promql unit tests.
|
||||
func (tg *testGroup) maxEvalTime() time.Duration {
|
||||
var maxd time.Duration
|
||||
for _, alert := range tg.AlertRuleTests {
|
||||
if alert.EvalTime > maxd {
|
||||
maxd = alert.EvalTime
|
||||
}
|
||||
}
|
||||
for _, pet := range tg.PromqlExprTests {
|
||||
if pet.EvalTime > maxd {
|
||||
maxd = pet.EvalTime
|
||||
}
|
||||
}
|
||||
return maxd
|
||||
}
|
||||
|
||||
func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, qu storage.Queryable) (promql.Vector, error) {
|
||||
q, err := engine.NewInstantQuery(qu, qs, t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res := q.Exec(ctx)
|
||||
if res.Err != nil {
|
||||
return nil, res.Err
|
||||
}
|
||||
switch v := res.Value.(type) {
|
||||
case promql.Vector:
|
||||
return v, nil
|
||||
case promql.Scalar:
|
||||
return promql.Vector{promql.Sample{
|
||||
Point: promql.Point(v),
|
||||
Metric: labels.Labels{},
|
||||
}}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("rule result is not a vector or scalar")
|
||||
}
|
||||
}
|
||||
|
||||
type labelsAndAnnotations []labelAndAnnotation
|
||||
|
||||
func (la labelsAndAnnotations) Len() int { return len(la) }
|
||||
func (la labelsAndAnnotations) Swap(i, j int) { la[i], la[j] = la[j], la[i] }
|
||||
func (la labelsAndAnnotations) Less(i, j int) bool {
|
||||
diff := labels.Compare(la[i].Labels, la[j].Labels)
|
||||
if diff != 0 {
|
||||
return diff < 0
|
||||
}
|
||||
return labels.Compare(la[i].Annotations, la[j].Annotations) < 0
|
||||
}
|
||||
|
||||
func (la labelsAndAnnotations) String() string {
|
||||
if len(la) == 0 {
|
||||
return "[]"
|
||||
}
|
||||
s := "[" + la[0].String()
|
||||
for _, l := range la[1:] {
|
||||
s += ", " + l.String()
|
||||
}
|
||||
s += "]"
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
type labelAndAnnotation struct {
|
||||
Labels labels.Labels
|
||||
Annotations labels.Labels
|
||||
}
|
||||
|
||||
func (la *labelAndAnnotation) String() string {
|
||||
return "Labels:" + la.Labels.String() + " Annotations:" + la.Annotations.String()
|
||||
}
|
||||
|
||||
type series struct {
|
||||
Series string `yaml:"series"`
|
||||
Values string `yaml:"values"`
|
||||
}
|
||||
|
||||
type alertTestCase struct {
|
||||
EvalTime time.Duration `yaml:"eval_time"`
|
||||
Alertname string `yaml:"alertname"`
|
||||
ExpAlerts []alert `yaml:"exp_alerts"`
|
||||
}
|
||||
|
||||
type alert struct {
|
||||
ExpLabels map[string]string `yaml:"exp_labels"`
|
||||
ExpAnnotations map[string]string `yaml:"exp_annotations"`
|
||||
}
|
||||
|
||||
type promqlTestCase struct {
|
||||
Expr string `yaml:"expr"`
|
||||
EvalTime time.Duration `yaml:"eval_time"`
|
||||
ExpSamples []sample `yaml:"exp_samples"`
|
||||
}
|
||||
|
||||
type sample struct {
|
||||
Labels string `yaml:"labels"`
|
||||
Value float64 `yaml:"value"`
|
||||
}
|
||||
|
||||
// parsedSample is a sample with parsed Labels.
|
||||
type parsedSample struct {
|
||||
Labels labels.Labels
|
||||
Value float64
|
||||
}
|
||||
|
||||
func parsedSamplesString(pss []parsedSample) string {
|
||||
if len(pss) == 0 {
|
||||
return "nil"
|
||||
}
|
||||
s := pss[0].String()
|
||||
for _, ps := range pss[0:] {
|
||||
s += ", " + ps.String()
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (ps *parsedSample) String() string {
|
||||
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
|
||||
}
|
||||
|
||||
type dummyLogger struct{}
|
||||
|
||||
func (l *dummyLogger) Log(keyvals ...interface{}) error {
|
||||
return nil
|
||||
}
|
217
docs/configuration/unit_testing_rules.md
Normal file
217
docs/configuration/unit_testing_rules.md
Normal file
|
@ -0,0 +1,217 @@
|
|||
---
|
||||
title: Unit Testing for Rules
|
||||
sort_rank: 6
|
||||
---
|
||||
|
||||
# Unit Testing for Rules
|
||||
|
||||
You can use `promtool` to test your rules.
|
||||
|
||||
```shell
|
||||
# For a single test file.
|
||||
./promtool test rules test.yml
|
||||
|
||||
# If you have multiple test files, say test1.yml,test2.yml,test2.yml
|
||||
./promtool test rules test1.yml test2.yml test3.yml
|
||||
```
|
||||
|
||||
## Test file format
|
||||
|
||||
```yaml
|
||||
# This is a list of rule files to consider for testing.
|
||||
rule_files:
|
||||
[ - <file_name> ]
|
||||
|
||||
# optional, default = 1m
|
||||
evaluation_interval: <duration>
|
||||
|
||||
# The order in which group names are listed below will be the order of evaluation of
|
||||
# rule groups (at a given evaluation time). The order is guaranteed only for the groups mentioned below.
|
||||
# All the groups need not be mentioned below.
|
||||
group_eval_order:
|
||||
[ - <group_name> ]
|
||||
|
||||
# All the tests are listed here.
|
||||
tests:
|
||||
[ - <test_group> ]
|
||||
```
|
||||
|
||||
### `<test_group>`
|
||||
|
||||
``` yaml
|
||||
# Series data
|
||||
interval: <duration>
|
||||
input_series:
|
||||
[ - <series> ]
|
||||
|
||||
# Unit tests for the above data.
|
||||
|
||||
# Unit tests for alerting rules. We consider the alerting rules from the input file.
|
||||
alert_rule_test:
|
||||
[ - <alert_test_case> ]
|
||||
|
||||
# Unit tests PromQL expressions.
|
||||
promql_expr_test:
|
||||
[ - <promql_test_case> ]
|
||||
```
|
||||
|
||||
### `<series>`
|
||||
|
||||
```yaml
|
||||
# This follows the usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
series: <string>
|
||||
|
||||
# This uses expanding notation.
|
||||
# Expanding notation:
|
||||
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||
# Examples:
|
||||
# 1. '-2+4x3' becomes '-2 2 6 10'
|
||||
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7'
|
||||
values: <string>
|
||||
```
|
||||
|
||||
### `<alert_test_case>`
|
||||
|
||||
Prometheus allows you to have same alertname for different alerting rules. Hence in this unit testing, you have to list the union of all the firing alerts for the alertname under a single `<alert_test_case>`.
|
||||
|
||||
``` yaml
|
||||
# It's the time elapsed from time=0s when the alerts have to be checked.
|
||||
eval_time: <duration>
|
||||
|
||||
# Name of the alert to be tested.
|
||||
alertname: <string>
|
||||
|
||||
# List of expected alerts which are firing under the given alertname at
|
||||
# given evaluation time. If you want to test if an alerting rule should
|
||||
# not be firing, then you can mention the above fields and leave 'exp_alerts' empty.
|
||||
exp_alerts:
|
||||
[ - <alert> ]
|
||||
```
|
||||
|
||||
### `<alert>`
|
||||
|
||||
``` yaml
|
||||
# These are the expanded labels and annotations of the expected alert.
|
||||
# Note: labels also include the labels of the sample associated with the
|
||||
# alert (same as what you see in `/alerts`, without series `__name__` and `alertname`)
|
||||
exp_labels:
|
||||
[ <labelname>: <string> ]
|
||||
exp_annotations:
|
||||
[ <labelname>: <string> ]
|
||||
```
|
||||
|
||||
### `<promql_test_case>`
|
||||
|
||||
```yaml
|
||||
# Expression to evaluate
|
||||
expr: <string>
|
||||
|
||||
# It's the time elapsed from time=0s when the alerts have to be checked.
|
||||
eval_time: <duration>
|
||||
|
||||
# Expected samples at the given evaluation time.
|
||||
exp_samples:
|
||||
[ - <sample> ]
|
||||
```
|
||||
|
||||
### `<sample>`
|
||||
|
||||
```yaml
|
||||
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
labels: <string>
|
||||
|
||||
# The expected value of the promql expression.
|
||||
value: <number>
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
This is an example input files for unit testing which passes the test. `alerts.yml` contains the alerting rule, `tests.yml` is the test file which follows the syntax above.
|
||||
|
||||
### `alerts.yml`
|
||||
|
||||
```yaml
|
||||
# This is the rules file.
|
||||
|
||||
groups:
|
||||
- name: example
|
||||
rules:
|
||||
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: AnotherInstanceDown
|
||||
expr: up == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
```
|
||||
|
||||
### `test.yml`
|
||||
|
||||
```yaml
|
||||
# This is the main input for unit testing.
|
||||
# Only this file is passed as command line argument.
|
||||
|
||||
rule_files:
|
||||
- alerts.yml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
# Test 1.
|
||||
- interval: 1m
|
||||
# Series data.
|
||||
input_series:
|
||||
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
|
||||
- series: 'up{job="node_exporter", instance="localhost:9100"}'
|
||||
values: '1+0x6 0 0 0 0 0 0 0 0' # 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0
|
||||
- series: 'go_goroutines{job="prometheus", instance="localhost:9090"}'
|
||||
values: '10+10x2 30+20x5' # 10 20 30 30 50 70 90 110 130
|
||||
- series: 'go_goroutines{job="node_exporter", instance="localhost:9100"}'
|
||||
values: '10+10x7 10+30x4' # 10 20 30 40 50 60 70 80 10 40 70 100 130
|
||||
|
||||
# Unit test for alerting rules.
|
||||
alert_rule_test:
|
||||
# Unit test 1.
|
||||
- eval_time: 10m
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
# Alert 1.
|
||||
- exp_labels:
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
job: prometheus
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||
# Unit tests for promql expressions.
|
||||
promql_expr_test:
|
||||
# Unit test 1.
|
||||
- expr: go_goroutines > 5
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
# Sample 1.
|
||||
- labels: 'go_goroutines{job="prometheus",instance="localhost:9090"}'
|
||||
value: 50
|
||||
# Sample 2.
|
||||
- labels: 'go_goroutines{job="node_exporter",instance="localhost:9100"}'
|
||||
value: 50
|
||||
```
|
|
@ -650,7 +650,7 @@ func (m *Manager) Update(interval time.Duration, files []string) error {
|
|||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
|
||||
groups, errs := m.loadGroups(interval, files...)
|
||||
groups, errs := m.LoadGroups(interval, files...)
|
||||
if errs != nil {
|
||||
for _, e := range errs {
|
||||
level.Error(m.logger).Log("msg", "loading groups failed", "err", e)
|
||||
|
@ -697,8 +697,8 @@ func (m *Manager) Update(interval time.Duration, files []string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// loadGroups reads groups from a list of files.
|
||||
func (m *Manager) loadGroups(interval time.Duration, filenames ...string) (map[string]*Group, []error) {
|
||||
// LoadGroups reads groups from a list of files.
|
||||
func (m *Manager) LoadGroups(interval time.Duration, filenames ...string) (map[string]*Group, []error) {
|
||||
groups := make(map[string]*Group)
|
||||
|
||||
shouldRestore := !m.restored
|
||||
|
|
Loading…
Reference in a new issue