2015-01-21 11:07:45 -08:00
// Copyright 2013 The Prometheus Authors
2013-02-07 02:49:04 -08:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2013-01-07 14:24:26 -08:00
package rules
import (
2017-05-18 09:47:00 -07:00
"context"
2019-12-19 02:41:11 -08:00
"fmt"
"io/ioutil"
2017-05-18 09:47:00 -07:00
"math"
2019-12-19 02:41:11 -08:00
"os"
2017-11-23 04:04:54 -08:00
"sort"
2013-01-07 14:24:26 -08:00
"testing"
2013-03-21 10:06:15 -07:00
"time"
2013-06-25 05:02:27 -07:00
2017-08-11 11:45:52 -07:00
"github.com/go-kit/kit/log"
2015-08-20 08:18:46 -07:00
"github.com/prometheus/common/model"
2019-12-19 02:41:11 -08:00
yaml "gopkg.in/yaml.v2"
2013-06-25 05:02:27 -07:00
2016-12-29 08:31:14 -08:00
"github.com/prometheus/prometheus/pkg/labels"
2019-12-19 02:41:11 -08:00
"github.com/prometheus/prometheus/pkg/rulefmt"
2016-12-29 08:31:14 -08:00
"github.com/prometheus/prometheus/pkg/timestamp"
2017-05-18 09:47:00 -07:00
"github.com/prometheus/prometheus/pkg/value"
2015-03-30 10:43:19 -07:00
"github.com/prometheus/prometheus/promql"
2017-05-18 09:47:00 -07:00
"github.com/prometheus/prometheus/storage"
2019-08-08 18:35:39 -07:00
"github.com/prometheus/prometheus/util/teststorage"
2017-05-18 09:47:00 -07:00
"github.com/prometheus/prometheus/util/testutil"
2013-01-07 14:24:26 -08:00
)
2015-06-30 02:51:05 -07:00
func TestAlertingRule ( t * testing . T ) {
suite , err := promql . NewTest ( t , `
load 5 m
2016-07-12 09:11:31 -07:00
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 95 105 105 95 85
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 80 90 100 110 120 130 140
2015-06-30 02:51:05 -07:00
` )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2015-06-30 02:51:05 -07:00
defer suite . Close ( )
2015-03-30 10:43:19 -07:00
2017-11-11 02:29:47 -08:00
err = suite . Run ( )
testutil . Ok ( t , err )
2013-01-07 14:24:26 -08:00
2015-06-30 02:51:05 -07:00
expr , err := promql . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2013-04-22 15:26:59 -07:00
2015-06-30 02:51:05 -07:00
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
time . Minute ,
2016-12-29 08:31:14 -08:00
labels . FromStrings ( "severity" , "{{\"c\"}}ritical" ) ,
2019-04-15 09:52:58 -07:00
nil , nil , true , nil ,
2015-06-30 02:51:05 -07:00
)
2017-11-23 04:04:54 -08:00
result := promql . Vector {
{
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "pending" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "pending" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "firing" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "firing" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
}
2015-06-30 02:51:05 -07:00
2016-12-29 08:31:14 -08:00
baseTime := time . Unix ( 0 , 0 )
2015-06-30 02:51:05 -07:00
var tests = [ ] struct {
time time . Duration
2017-11-23 04:04:54 -08:00
result promql . Vector
2015-06-30 02:51:05 -07:00
} {
{
2017-11-23 04:04:54 -08:00
time : 0 ,
result : result [ : 2 ] ,
2015-06-30 02:51:05 -07:00
} , {
2017-11-23 04:04:54 -08:00
time : 5 * time . Minute ,
result : result [ 2 : ] ,
2015-06-30 02:51:05 -07:00
} , {
2017-11-23 04:04:54 -08:00
time : 10 * time . Minute ,
result : result [ 2 : 3 ] ,
2015-03-30 10:43:19 -07:00
} ,
2013-04-24 02:51:40 -07:00
{
2017-05-19 09:02:25 -07:00
time : 15 * time . Minute ,
2017-11-23 04:04:54 -08:00
result : nil ,
2013-04-24 02:51:40 -07:00
} ,
{
2015-06-30 02:51:05 -07:00
time : 20 * time . Minute ,
2017-11-23 04:04:54 -08:00
result : nil ,
2016-02-04 20:42:55 -08:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 25 * time . Minute ,
result : result [ : 1 ] ,
2016-02-04 20:42:55 -08:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 30 * time . Minute ,
result : result [ 2 : 3 ] ,
2013-04-24 02:51:40 -07:00
} ,
}
2015-03-30 10:43:19 -07:00
2015-06-30 02:51:05 -07:00
for i , test := range tests {
2017-11-23 04:04:54 -08:00
t . Logf ( "case %d" , i )
2016-12-29 08:31:14 -08:00
evalTime := baseTime . Add ( test . time )
2015-03-30 10:43:19 -07:00
2018-01-09 08:44:23 -08:00
res , err := rule . Eval ( suite . Context ( ) , evalTime , EngineQueryFunc ( suite . QueryEngine ( ) , suite . Storage ( ) ) , nil )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2015-03-30 10:43:19 -07:00
2018-08-02 03:18:24 -07:00
var filteredRes promql . Vector // After removing 'ALERTS_FOR_STATE' samples.
for _ , smpl := range res {
smplName := smpl . Metric . Get ( "__name__" )
if smplName == "ALERTS" {
filteredRes = append ( filteredRes , smpl )
} else {
// If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'.
testutil . Equals ( t , smplName , "ALERTS_FOR_STATE" )
}
}
2017-11-23 04:04:54 -08:00
for i := range test . result {
test . result [ i ] . T = timestamp . FromTime ( evalTime )
2013-04-24 02:51:40 -07:00
}
2018-08-02 03:18:24 -07:00
testutil . Assert ( t , len ( test . result ) == len ( filteredRes ) , "%d. Number of samples in expected and actual output don't match (%d vs. %d)" , i , len ( test . result ) , len ( res ) )
2017-11-23 04:04:54 -08:00
2018-08-02 03:18:24 -07:00
sort . Slice ( filteredRes , func ( i , j int ) bool {
return labels . Compare ( filteredRes [ i ] . Metric , filteredRes [ j ] . Metric ) < 0
2017-11-23 04:04:54 -08:00
} )
2018-08-02 03:18:24 -07:00
testutil . Equals ( t , test . result , filteredRes )
2016-08-01 15:32:01 -07:00
for _ , aa := range rule . ActiveAlerts ( ) {
2017-11-11 02:29:47 -08:00
testutil . Assert ( t , aa . Labels . Get ( model . MetricNameLabel ) == "" , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
2016-08-01 15:32:01 -07:00
}
2013-04-24 02:51:40 -07:00
}
}
2015-06-30 02:51:05 -07:00
2018-08-02 03:18:24 -07:00
func TestForStateAddSamples ( t * testing . T ) {
suite , err := promql . NewTest ( t , `
load 5 m
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 95 105 105 95 85
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 80 90 100 110 120 130 140
` )
testutil . Ok ( t , err )
defer suite . Close ( )
err = suite . Run ( )
testutil . Ok ( t , err )
expr , err := promql . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
testutil . Ok ( t , err )
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
time . Minute ,
labels . FromStrings ( "severity" , "{{\"c\"}}ritical" ) ,
2019-04-15 09:52:58 -07:00
nil , nil , true , nil ,
2018-08-02 03:18:24 -07:00
)
result := promql . Vector {
{
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
{
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
Point : promql . Point { V : 1 } ,
} ,
}
baseTime := time . Unix ( 0 , 0 )
var tests = [ ] struct {
time time . Duration
result promql . Vector
persistThisTime bool // If true, it means this 'time' is persisted for 'for'.
} {
{
time : 0 ,
result : append ( promql . Vector { } , result [ : 2 ] ... ) ,
persistThisTime : true ,
} ,
{
time : 5 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : ] ... ) ,
} ,
{
time : 10 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : 3 ] ... ) ,
} ,
{
time : 15 * time . Minute ,
result : nil ,
} ,
{
time : 20 * time . Minute ,
result : nil ,
} ,
{
time : 25 * time . Minute ,
result : append ( promql . Vector { } , result [ : 1 ] ... ) ,
persistThisTime : true ,
} ,
{
time : 30 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : 3 ] ... ) ,
} ,
}
var forState float64
for i , test := range tests {
t . Logf ( "case %d" , i )
evalTime := baseTime . Add ( test . time )
if test . persistThisTime {
forState = float64 ( evalTime . Unix ( ) )
}
if test . result == nil {
forState = float64 ( value . StaleNaN )
}
res , err := rule . Eval ( suite . Context ( ) , evalTime , EngineQueryFunc ( suite . QueryEngine ( ) , suite . Storage ( ) ) , nil )
testutil . Ok ( t , err )
var filteredRes promql . Vector // After removing 'ALERTS' samples.
for _ , smpl := range res {
smplName := smpl . Metric . Get ( "__name__" )
if smplName == "ALERTS_FOR_STATE" {
filteredRes = append ( filteredRes , smpl )
} else {
// If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'.
testutil . Equals ( t , smplName , "ALERTS" )
}
}
for i := range test . result {
test . result [ i ] . T = timestamp . FromTime ( evalTime )
// Updating the expected 'for' state.
if test . result [ i ] . V >= 0 {
test . result [ i ] . V = forState
}
}
testutil . Assert ( t , len ( test . result ) == len ( filteredRes ) , "%d. Number of samples in expected and actual output don't match (%d vs. %d)" , i , len ( test . result ) , len ( res ) )
sort . Slice ( filteredRes , func ( i , j int ) bool {
return labels . Compare ( filteredRes [ i ] . Metric , filteredRes [ j ] . Metric ) < 0
} )
testutil . Equals ( t , test . result , filteredRes )
for _ , aa := range rule . ActiveAlerts ( ) {
testutil . Assert ( t , aa . Labels . Get ( model . MetricNameLabel ) == "" , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
}
}
2018-08-16 10:26:15 -07:00
// sortAlerts sorts `[]*Alert` w.r.t. the Labels.
func sortAlerts ( items [ ] * Alert ) {
sort . Slice ( items , func ( i , j int ) bool {
return labels . Compare ( items [ i ] . Labels , items [ j ] . Labels ) <= 0
} )
}
2018-08-02 03:18:24 -07:00
2018-08-16 10:26:15 -07:00
func TestForStateRestore ( t * testing . T ) {
2018-08-02 03:18:24 -07:00
suite , err := promql . NewTest ( t , `
load 5 m
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 50 0 0 25 0 0 40 0 120
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 125 90 60 0 0 25 0 0 40 0 130
` )
testutil . Ok ( t , err )
defer suite . Close ( )
err = suite . Run ( )
testutil . Ok ( t , err )
expr , err := promql . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
testutil . Ok ( t , err )
opts := & ManagerOptions {
2018-08-04 12:31:12 -07:00
QueryFunc : EngineQueryFunc ( suite . QueryEngine ( ) , suite . Storage ( ) ) ,
Appendable : suite . Storage ( ) ,
TSDB : suite . Storage ( ) ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
NotifyFunc : func ( ctx context . Context , expr string , alerts ... * Alert ) { } ,
2018-08-02 03:18:24 -07:00
OutageTolerance : 30 * time . Minute ,
ForGracePeriod : 10 * time . Minute ,
}
alertForDuration := 25 * time . Minute
// Initial run before prometheus goes down.
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
labels . FromStrings ( "severity" , "critical" ) ,
2019-04-15 09:52:58 -07:00
nil , nil , true , nil ,
2018-08-02 03:18:24 -07:00
)
group := NewGroup ( "default" , "" , time . Second , [ ] Rule { rule } , true , opts )
groups := make ( map [ string ] * Group )
groups [ "default;" ] = group
initialRuns := [ ] time . Duration { 0 , 5 * time . Minute }
baseTime := time . Unix ( 0 , 0 )
for _ , duration := range initialRuns {
evalTime := baseTime . Add ( duration )
group . Eval ( suite . Context ( ) , evalTime )
}
exp := rule . ActiveAlerts ( )
for _ , aa := range exp {
testutil . Assert ( t , aa . Labels . Get ( model . MetricNameLabel ) == "" , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
sort . Slice ( exp , func ( i , j int ) bool {
return labels . Compare ( exp [ i ] . Labels , exp [ j ] . Labels ) < 0
} )
// Prometheus goes down here. We create new rules and groups.
type testInput struct {
restoreDuration time . Duration
alerts [ ] * Alert
num int
noRestore bool
gracePeriod bool
downDuration time . Duration
}
tests := [ ] testInput {
{
// Normal restore (alerts were not firing).
2018-08-16 10:26:15 -07:00
restoreDuration : 15 * time . Minute ,
2018-08-02 03:18:24 -07:00
alerts : rule . ActiveAlerts ( ) ,
2018-08-16 10:26:15 -07:00
downDuration : 10 * time . Minute ,
2018-08-02 03:18:24 -07:00
} ,
{
// Testing Outage Tolerance.
restoreDuration : 40 * time . Minute ,
noRestore : true ,
num : 2 ,
} ,
{
// No active alerts.
restoreDuration : 50 * time . Minute ,
alerts : [ ] * Alert { } ,
} ,
}
testFunc := func ( tst testInput ) {
newRule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
labels . FromStrings ( "severity" , "critical" ) ,
2019-04-15 09:52:58 -07:00
nil , nil , false , nil ,
2018-08-02 03:18:24 -07:00
)
newGroup := NewGroup ( "default" , "" , time . Second , [ ] Rule { newRule } , true , opts )
newGroups := make ( map [ string ] * Group )
newGroups [ "default;" ] = newGroup
restoreTime := baseTime . Add ( tst . restoreDuration )
// First eval before restoration.
newGroup . Eval ( suite . Context ( ) , restoreTime )
// Restore happens here.
newGroup . RestoreForState ( restoreTime )
got := newRule . ActiveAlerts ( )
for _ , aa := range got {
testutil . Assert ( t , aa . Labels . Get ( model . MetricNameLabel ) == "" , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
}
sort . Slice ( got , func ( i , j int ) bool {
return labels . Compare ( got [ i ] . Labels , got [ j ] . Labels ) < 0
} )
// Checking if we have restored it correctly.
if tst . noRestore {
testutil . Equals ( t , tst . num , len ( got ) )
for _ , e := range got {
testutil . Equals ( t , e . ActiveAt , restoreTime )
}
} else if tst . gracePeriod {
testutil . Equals ( t , tst . num , len ( got ) )
for _ , e := range got {
testutil . Equals ( t , opts . ForGracePeriod , e . ActiveAt . Add ( alertForDuration ) . Sub ( restoreTime ) )
}
} else {
exp := tst . alerts
testutil . Equals ( t , len ( exp ) , len ( got ) )
2018-08-16 10:26:15 -07:00
sortAlerts ( exp )
sortAlerts ( got )
2018-08-02 03:18:24 -07:00
for i , e := range exp {
testutil . Equals ( t , e . Labels , got [ i ] . Labels )
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64 ( e . ActiveAt . Unix ( ) + int64 ( tst . downDuration / time . Second ) - got [ i ] . ActiveAt . Unix ( ) )
testutil . Assert ( t , math . Abs ( activeAtDiff ) == 0 , "'for' state restored time is wrong" )
}
}
}
for _ , tst := range tests {
testFunc ( tst )
}
// Testing the grace period.
for _ , duration := range [ ] time . Duration { 10 * time . Minute , 15 * time . Minute , 20 * time . Minute } {
evalTime := baseTime . Add ( duration )
group . Eval ( suite . Context ( ) , evalTime )
}
testFunc ( testInput {
restoreDuration : 25 * time . Minute ,
alerts : [ ] * Alert { } ,
gracePeriod : true ,
num : 2 ,
} )
}
2017-05-18 09:47:00 -07:00
func TestStaleness ( t * testing . T ) {
2019-08-08 18:35:39 -07:00
storage := teststorage . New ( t )
2017-05-18 09:47:00 -07:00
defer storage . Close ( )
2018-10-02 04:59:19 -07:00
engineOpts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxConcurrent : 10 ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( engineOpts )
2017-05-18 09:47:00 -07:00
opts := & ManagerOptions {
2018-01-09 08:44:23 -08:00
QueryFunc : EngineQueryFunc ( engine , storage ) ,
2017-11-23 04:04:54 -08:00
Appendable : storage ,
2018-08-02 03:18:24 -07:00
TSDB : storage ,
2017-11-23 04:04:54 -08:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
2017-05-18 09:47:00 -07:00
}
expr , err := promql . ParseExpr ( "a + 1" )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2017-05-18 09:47:00 -07:00
rule := NewRecordingRule ( "a_plus_one" , expr , labels . Labels { } )
2018-08-02 03:18:24 -07:00
group := NewGroup ( "default" , "" , time . Second , [ ] Rule { rule } , true , opts )
2017-05-18 09:47:00 -07:00
// A time series that has two samples and then goes stale.
app , _ := storage . Appender ( )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 0 , 1 )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 1000 , 2 )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 2000 , math . Float64frombits ( value . StaleNaN ) )
2017-11-11 02:29:47 -08:00
err = app . Commit ( )
testutil . Ok ( t , err )
2017-05-18 09:47:00 -07:00
2017-11-23 23:59:05 -08:00
ctx := context . Background ( )
2017-05-18 09:47:00 -07:00
// Execute 3 times, 1 second apart.
2017-11-23 23:59:05 -08:00
group . Eval ( ctx , time . Unix ( 0 , 0 ) )
group . Eval ( ctx , time . Unix ( 1 , 0 ) )
group . Eval ( ctx , time . Unix ( 2 , 0 ) )
2017-05-18 09:47:00 -07:00
2017-10-04 12:04:15 -07:00
querier , err := storage . Querier ( context . Background ( ) , 0 , 2000 )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2017-10-09 09:03:33 -07:00
defer querier . Close ( )
2017-11-23 04:50:06 -08:00
matcher , err := labels . NewMatcher ( labels . MatchEqual , model . MetricNameLabel , "a_plus_one" )
testutil . Ok ( t , err )
2019-01-02 03:10:13 -08:00
set , _ , err := querier . Select ( nil , matcher )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2017-11-23 04:50:06 -08:00
samples , err := readSeriesSet ( set )
testutil . Ok ( t , err )
2017-05-18 09:47:00 -07:00
metric := labels . FromStrings ( model . MetricNameLabel , "a_plus_one" ) . String ( )
metricSample , ok := samples [ metric ]
2017-11-11 02:29:47 -08:00
testutil . Assert ( t , ok , "Series %s not returned." , metric )
testutil . Assert ( t , value . IsStaleNaN ( metricSample [ 2 ] . V ) , "Appended second sample not as expected. Wanted: stale NaN Got: %x" , math . Float64bits ( metricSample [ 2 ] . V ) )
2017-05-18 09:47:00 -07:00
metricSample [ 2 ] . V = 42 // reflect.DeepEqual cannot handle NaN.
want := map [ string ] [ ] promql . Point {
2019-05-03 06:11:28 -07:00
metric : { { T : 0 , V : 2 } , { T : 1000 , V : 3 } , { T : 2000 , V : 42 } } ,
2017-05-18 09:47:00 -07:00
}
2017-11-11 02:29:47 -08:00
testutil . Equals ( t , want , samples )
2017-05-18 09:47:00 -07:00
}
2018-11-27 08:44:29 -08:00
// Convert a SeriesSet into a form usable with reflect.DeepEqual.
2017-05-18 09:47:00 -07:00
func readSeriesSet ( ss storage . SeriesSet ) ( map [ string ] [ ] promql . Point , error ) {
result := map [ string ] [ ] promql . Point { }
for ss . Next ( ) {
series := ss . At ( )
points := [ ] promql . Point { }
it := series . Iterator ( )
for it . Next ( ) {
t , v := it . At ( )
points = append ( points , promql . Point { T : t , V : v } )
}
name := series . Labels ( ) . String ( )
result [ name ] = points
}
2017-05-19 08:43:59 -07:00
return result , ss . Err ( )
}
func TestCopyState ( t * testing . T ) {
oldGroup := & Group {
rules : [ ] Rule {
2019-04-15 09:52:58 -07:00
NewAlertingRule ( "alert" , nil , 0 , nil , nil , nil , true , nil ) ,
2017-05-19 08:43:59 -07:00
NewRecordingRule ( "rule1" , nil , nil ) ,
NewRecordingRule ( "rule2" , nil , nil ) ,
2019-03-15 08:23:36 -07:00
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v1" } } ) ,
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v2" } } ) ,
2019-08-07 08:11:05 -07:00
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v3" } } ) ,
2019-04-15 09:52:58 -07:00
NewAlertingRule ( "alert2" , nil , 0 , labels . Labels { { Name : "l2" , Value : "v1" } } , nil , nil , true , nil ) ,
2017-05-19 08:43:59 -07:00
} ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels {
2019-08-07 08:11:05 -07:00
{ } ,
{ } ,
{ } ,
2019-03-15 08:23:36 -07:00
{ "r3a" : labels . Labels { { Name : "l1" , Value : "v1" } } } ,
{ "r3b" : labels . Labels { { Name : "l1" , Value : "v2" } } } ,
2019-08-07 08:11:05 -07:00
{ "r3c" : labels . Labels { { Name : "l1" , Value : "v3" } } } ,
2019-03-15 08:23:36 -07:00
{ "a2" : labels . Labels { { Name : "l2" , Value : "v1" } } } ,
2017-05-19 08:43:59 -07:00
} ,
2018-07-17 20:54:33 -07:00
evaluationDuration : time . Second ,
2017-05-19 08:43:59 -07:00
}
oldGroup . rules [ 0 ] . ( * AlertingRule ) . active [ 42 ] = nil
newGroup := & Group {
rules : [ ] Rule {
2019-03-15 08:23:36 -07:00
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v0" } } ) ,
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v1" } } ) ,
NewRecordingRule ( "rule3" , nil , labels . Labels { { Name : "l1" , Value : "v2" } } ) ,
2019-04-15 09:52:58 -07:00
NewAlertingRule ( "alert" , nil , 0 , nil , nil , nil , true , nil ) ,
2017-05-19 08:43:59 -07:00
NewRecordingRule ( "rule1" , nil , nil ) ,
2019-04-15 09:52:58 -07:00
NewAlertingRule ( "alert2" , nil , 0 , labels . Labels { { Name : "l2" , Value : "v0" } } , nil , nil , true , nil ) ,
NewAlertingRule ( "alert2" , nil , 0 , labels . Labels { { Name : "l2" , Value : "v1" } } , nil , nil , true , nil ) ,
2017-05-19 08:43:59 -07:00
NewRecordingRule ( "rule4" , nil , nil ) ,
} ,
2019-03-15 08:23:36 -07:00
seriesInPreviousEval : make ( [ ] map [ string ] labels . Labels , 8 ) ,
2017-05-19 08:43:59 -07:00
}
2018-07-18 06:14:38 -07:00
newGroup . CopyState ( oldGroup )
2017-05-19 08:43:59 -07:00
want := [ ] map [ string ] labels . Labels {
nil ,
2019-03-15 08:23:36 -07:00
{ "r3a" : labels . Labels { { Name : "l1" , Value : "v1" } } } ,
{ "r3b" : labels . Labels { { Name : "l1" , Value : "v2" } } } ,
2019-08-07 08:11:05 -07:00
{ } ,
{ } ,
2017-05-19 08:43:59 -07:00
nil ,
2019-03-15 08:23:36 -07:00
{ "a2" : labels . Labels { { Name : "l2" , Value : "v1" } } } ,
nil ,
2017-05-19 08:43:59 -07:00
}
2017-11-11 02:29:47 -08:00
testutil . Equals ( t , want , newGroup . seriesInPreviousEval )
testutil . Equals ( t , oldGroup . rules [ 0 ] , newGroup . rules [ 3 ] )
2018-07-17 20:54:33 -07:00
testutil . Equals ( t , oldGroup . evaluationDuration , newGroup . evaluationDuration )
2019-08-07 08:11:05 -07:00
testutil . Equals ( t , [ ] labels . Labels { labels . Labels { { Name : "l1" , Value : "v3" } } } , newGroup . staleSeries )
}
func TestDeletedRuleMarkedStale ( t * testing . T ) {
2019-08-08 18:35:39 -07:00
storage := teststorage . New ( t )
2019-08-07 08:11:05 -07:00
defer storage . Close ( )
oldGroup := & Group {
rules : [ ] Rule {
NewRecordingRule ( "rule1" , nil , labels . Labels { { Name : "l1" , Value : "v1" } } ) ,
} ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels {
{ "r1" : labels . Labels { { Name : "l1" , Value : "v1" } } } ,
} ,
}
newGroup := & Group {
rules : [ ] Rule { } ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels { } ,
opts : & ManagerOptions {
Appendable : storage ,
} ,
}
newGroup . CopyState ( oldGroup )
newGroup . Eval ( context . Background ( ) , time . Unix ( 0 , 0 ) )
querier , err := storage . Querier ( context . Background ( ) , 0 , 2000 )
testutil . Ok ( t , err )
defer querier . Close ( )
matcher , err := labels . NewMatcher ( labels . MatchEqual , "l1" , "v1" )
testutil . Ok ( t , err )
set , _ , err := querier . Select ( nil , matcher )
testutil . Ok ( t , err )
samples , err := readSeriesSet ( set )
testutil . Ok ( t , err )
metric := labels . FromStrings ( "l1" , "v1" ) . String ( )
metricSample , ok := samples [ metric ]
testutil . Assert ( t , ok , "Series %s not returned." , metric )
testutil . Assert ( t , value . IsStaleNaN ( metricSample [ 0 ] . V ) , "Appended sample not as expected. Wanted: stale NaN Got: %x" , math . Float64bits ( metricSample [ 0 ] . V ) )
2017-05-18 09:47:00 -07:00
}
2017-11-01 04:58:00 -07:00
2017-11-23 06:48:14 -08:00
func TestUpdate ( t * testing . T ) {
2018-06-22 07:21:04 -07:00
files := [ ] string { "fixtures/rules.yaml" }
2017-11-01 04:58:00 -07:00
expected := map [ string ] labels . Labels {
2017-11-23 06:48:14 -08:00
"test" : labels . FromStrings ( "name" , "value" ) ,
2017-11-01 04:58:00 -07:00
}
2019-08-08 18:35:39 -07:00
storage := teststorage . New ( t )
2018-08-20 05:51:05 -07:00
defer storage . Close ( )
2018-10-02 04:59:19 -07:00
opts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxConcurrent : 10 ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( opts )
2017-11-01 04:58:00 -07:00
ruleManager := NewManager ( & ManagerOptions {
2018-08-20 05:51:05 -07:00
Appendable : storage ,
TSDB : storage ,
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
2017-11-01 04:58:00 -07:00
} )
ruleManager . Run ( )
2018-08-20 05:51:05 -07:00
defer ruleManager . Stop ( )
2017-11-01 04:58:00 -07:00
2019-04-15 09:52:58 -07:00
err := ruleManager . Update ( 10 * time . Second , files , nil )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2018-06-22 07:21:04 -07:00
testutil . Assert ( t , len ( ruleManager . groups ) > 0 , "expected non-empty rule groups" )
2019-12-19 02:41:11 -08:00
ogs := map [ string ] * Group { }
for h , g := range ruleManager . groups {
2017-11-01 04:58:00 -07:00
g . seriesInPreviousEval = [ ] map [ string ] labels . Labels {
expected ,
}
2019-12-19 02:41:11 -08:00
ogs [ h ] = g
2017-11-01 04:58:00 -07:00
}
2019-04-15 09:52:58 -07:00
err = ruleManager . Update ( 10 * time . Second , files , nil )
2017-11-11 02:29:47 -08:00
testutil . Ok ( t , err )
2019-12-19 02:41:11 -08:00
for h , g := range ruleManager . groups {
2017-11-01 04:58:00 -07:00
for _ , actual := range g . seriesInPreviousEval {
2017-11-11 02:29:47 -08:00
testutil . Equals ( t , expected , actual )
2017-11-01 04:58:00 -07:00
}
2019-12-19 02:41:11 -08:00
// Groups are the same because of no updates.
testutil . Equals ( t , ogs [ h ] , g )
}
// Groups will be recreated if updated.
rgs , errs := rulefmt . ParseFile ( "fixtures/rules.yaml" )
testutil . Assert ( t , len ( errs ) == 0 , "file parsing failures" )
tmpFile , err := ioutil . TempFile ( "" , "rules.test.*.yaml" )
testutil . Ok ( t , err )
defer os . Remove ( tmpFile . Name ( ) )
defer tmpFile . Close ( )
err = ruleManager . Update ( 10 * time . Second , [ ] string { tmpFile . Name ( ) } , nil )
testutil . Ok ( t , err )
for h , g := range ruleManager . groups {
ogs [ h ] = g
}
2019-12-19 02:46:22 -08:00
// Update interval and reload.
2019-12-19 02:41:11 -08:00
for i , g := range rgs . Groups {
if g . Interval != 0 {
rgs . Groups [ i ] . Interval = g . Interval * 2
} else {
rgs . Groups [ i ] . Interval = model . Duration ( 10 )
}
}
reloadAndValidate ( rgs , t , tmpFile , ruleManager , expected , ogs )
2019-12-19 02:46:22 -08:00
// Change group rules and reload.
2019-12-19 02:41:11 -08:00
for i , g := range rgs . Groups {
for j , r := range g . Rules {
rgs . Groups [ i ] . Rules [ j ] . Expr = fmt . Sprintf ( "%s * 0" , r . Expr )
}
}
reloadAndValidate ( rgs , t , tmpFile , ruleManager , expected , ogs )
}
func reloadAndValidate ( rgs * rulefmt . RuleGroups , t * testing . T , tmpFile * os . File , ruleManager * Manager , expected map [ string ] labels . Labels , ogs map [ string ] * Group ) {
bs , err := yaml . Marshal ( rgs )
testutil . Ok ( t , err )
tmpFile . Seek ( 0 , 0 )
_ , err = tmpFile . Write ( bs )
testutil . Ok ( t , err )
err = ruleManager . Update ( 10 * time . Second , [ ] string { tmpFile . Name ( ) } , nil )
testutil . Ok ( t , err )
for h , g := range ruleManager . groups {
if ogs [ h ] == g {
t . Fail ( )
}
ogs [ h ] = g
2017-11-01 04:58:00 -07:00
}
}
2018-08-27 09:41:42 -07:00
func TestNotify ( t * testing . T ) {
2019-08-08 18:35:39 -07:00
storage := teststorage . New ( t )
2018-08-27 09:41:42 -07:00
defer storage . Close ( )
2018-10-02 04:59:19 -07:00
engineOpts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxConcurrent : 10 ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( engineOpts )
2018-08-27 09:41:42 -07:00
var lastNotified [ ] * Alert
notifyFunc := func ( ctx context . Context , expr string , alerts ... * Alert ) {
lastNotified = alerts
}
opts := & ManagerOptions {
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Appendable : storage ,
TSDB : storage ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
NotifyFunc : notifyFunc ,
ResendDelay : 2 * time . Second ,
}
expr , err := promql . ParseExpr ( "a > 1" )
testutil . Ok ( t , err )
2019-04-15 09:52:58 -07:00
rule := NewAlertingRule ( "aTooHigh" , expr , 0 , labels . Labels { } , labels . Labels { } , nil , true , log . NewNopLogger ( ) )
2018-08-27 09:41:42 -07:00
group := NewGroup ( "alert" , "" , time . Second , [ ] Rule { rule } , true , opts )
app , _ := storage . Appender ( )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 1000 , 2 )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 2000 , 3 )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 5000 , 3 )
app . Add ( labels . FromStrings ( model . MetricNameLabel , "a" ) , 6000 , 0 )
err = app . Commit ( )
testutil . Ok ( t , err )
ctx := context . Background ( )
// Alert sent right away
group . Eval ( ctx , time . Unix ( 1 , 0 ) )
testutil . Equals ( t , 1 , len ( lastNotified ) )
2018-08-28 08:05:00 -07:00
testutil . Assert ( t , ! lastNotified [ 0 ] . ValidUntil . IsZero ( ) , "ValidUntil should not be zero" )
2018-08-27 09:41:42 -07:00
// Alert is not sent 1s later
group . Eval ( ctx , time . Unix ( 2 , 0 ) )
testutil . Equals ( t , 0 , len ( lastNotified ) )
// Alert is resent at t=5s
group . Eval ( ctx , time . Unix ( 5 , 0 ) )
testutil . Equals ( t , 1 , len ( lastNotified ) )
// Resolution alert sent right away
group . Eval ( ctx , time . Unix ( 6 , 0 ) )
testutil . Equals ( t , 1 , len ( lastNotified ) )
}