Fix regression of alert rules state loss on config reload. (#3382)

* incorrect map name for the group prevented copying state from existing alert rules on config reload

* applyConfig test

* few nits

* nits 2
This commit is contained in:
Krasi Georgiev 2017-11-01 11:58:00 +00:00 committed by Tobias Schmidt
parent 3382f39046
commit e86d82ad2d
3 changed files with 64 additions and 4 deletions

10
config/testdata/first.rules vendored Normal file
View file

@ -0,0 +1,10 @@
groups:
- name: my-group-name
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
description: "stuff's happening with {{ $labels.service }}"

View file

@ -494,8 +494,9 @@ func (m *Manager) ApplyConfig(conf *config.Config) error {
// If there is an old group with the same identifier, stop it and wait for
// it to finish the current iteration. Then copy it into the new group.
oldg, ok := m.groups[newg.name]
delete(m.groups, newg.name)
gn := groupKey(newg.name, newg.file)
oldg, ok := m.groups[gn]
delete(m.groups, gn)
go func(newg *Group) {
if ok {
@ -567,14 +568,18 @@ func (m *Manager) loadGroups(interval time.Duration, filenames ...string) (map[s
))
}
// Group names need not be unique across filenames.
groups[rg.Name+";"+fn] = NewGroup(rg.Name, fn, itv, rules, m.opts)
groups[groupKey(rg.Name, fn)] = NewGroup(rg.Name, fn, itv, rules, m.opts)
}
}
return groups, nil
}
// Group names need not be unique across filenames.
func groupKey(name, file string) string {
return name + ";" + file
}
// RuleGroups returns the list of manager's rule groups.
func (m *Manager) RuleGroups() []*Group {
m.mtx.RLock()

View file

@ -25,6 +25,7 @@ import (
"github.com/go-kit/kit/log"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/timestamp"
"github.com/prometheus/prometheus/pkg/value"
@ -286,3 +287,47 @@ func TestCopyState(t *testing.T) {
t.Fatalf("Active alerts not as expected. Wanted: %+v Got: %+v", oldGroup.rules[0], oldGroup.rules[3])
}
}
func TestApplyConfig(t *testing.T) {
expected := map[string]labels.Labels{
"test": labels.Labels{
labels.Label{
Name: "name",
Value: "value",
},
},
}
conf, err := config.LoadFile("../config/testdata/conf.good.yml")
if err != nil {
t.Fatalf(err.Error())
}
ruleManager := NewManager(&ManagerOptions{
Appendable: nil,
Notifier: nil,
QueryEngine: nil,
Context: context.Background(),
Logger: log.NewNopLogger(),
})
ruleManager.Run()
if err := ruleManager.ApplyConfig(conf); err != nil {
t.Fatalf(err.Error())
}
for _, g := range ruleManager.groups {
g.seriesInPreviousEval = []map[string]labels.Labels{
expected,
}
}
if err := ruleManager.ApplyConfig(conf); err != nil {
t.Fatalf(err.Error())
}
for _, g := range ruleManager.groups {
for _, actual := range g.seriesInPreviousEval {
if !reflect.DeepEqual(expected, actual) {
t.Fatalf("Rule groups state lost after config reload. Expected: %+v Got: %+v", expected, actual)
}
}
}
}