mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
fix(notifier): stop dropping known alertmanagers on each ApplyConfig and waiting on SD to update them.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
This commit is contained in:
parent
3dc623d30b
commit
83ee57343a
|
@ -16,6 +16,8 @@ package notifier
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
@ -35,6 +37,7 @@ import (
|
||||||
"github.com/prometheus/common/sigv4"
|
"github.com/prometheus/common/sigv4"
|
||||||
"github.com/prometheus/common/version"
|
"github.com/prometheus/common/version"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||||
|
@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
|
||||||
n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs
|
n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs
|
||||||
|
|
||||||
amSets := make(map[string]*alertmanagerSet)
|
amSets := make(map[string]*alertmanagerSet)
|
||||||
|
// configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig,
|
||||||
|
// helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config.
|
||||||
|
configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers))
|
||||||
|
for _, oldAmSet := range n.alertmanagers {
|
||||||
|
hash, err := oldAmSet.configHash()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
configToAlertmanagers[hash] = oldAmSet
|
||||||
|
}
|
||||||
|
|
||||||
for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() {
|
for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() {
|
||||||
ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics)
|
ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics)
|
||||||
|
@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hash, err := ams.configHash()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if oldAmSet, ok := configToAlertmanagers[hash]; ok {
|
||||||
|
ams.ams = oldAmSet.ams
|
||||||
|
ams.droppedAms = oldAmSet.droppedAms
|
||||||
|
}
|
||||||
|
|
||||||
amSets[k] = ams
|
amSets[k] = ams
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -803,6 +826,15 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *alertmanagerSet) configHash() (string, error) {
|
||||||
|
b, err := yaml.Marshal(s.cfg)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
hash := md5.Sum(b)
|
||||||
|
return hex.EncodeToString(hash[:]), nil
|
||||||
|
}
|
||||||
|
|
||||||
func postPath(pre string, v config.AlertmanagerAPIVersion) string {
|
func postPath(pre string, v config.AlertmanagerAPIVersion) string {
|
||||||
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
|
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
|
||||||
return path.Join("/", pre, alertPushEndpoint)
|
return path.Join("/", pre, alertPushEndpoint)
|
||||||
|
|
|
@ -1019,7 +1019,7 @@ func TestStop_DrainingEnabled(t *testing.T) {
|
||||||
require.Equal(t, int64(2), alertsReceived.Load())
|
require.Equal(t, int64(2), alertsReceived.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertmanagersNotDroppedDuringApplyConfig(t *testing.T) {
|
func TestApplyConfig(t *testing.T) {
|
||||||
targetURL := "alertmanager:9093"
|
targetURL := "alertmanager:9093"
|
||||||
targetGroup := &targetgroup.Group{
|
targetGroup := &targetgroup.Group{
|
||||||
Targets: []model.LabelSet{
|
Targets: []model.LabelSet{
|
||||||
|
@ -1039,27 +1039,86 @@ alerting:
|
||||||
- files:
|
- files:
|
||||||
- foo.json
|
- foo.json
|
||||||
`
|
`
|
||||||
// TODO: add order change test
|
// 1. Ensure known alertmanagers are not dropped during ApplyConfig.
|
||||||
// TODO: add entry removed with DS manager
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
|
||||||
err := yaml.UnmarshalStrict([]byte(s), cfg)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
|
||||||
|
|
||||||
yaml.Marshal(cfg.AlertingConfig.AlertmanagerConfigs)
|
// First, apply the config and reload.
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
// First apply config and reload.
|
|
||||||
err = n.ApplyConfig(cfg)
|
|
||||||
require.NoError(t, err)
|
|
||||||
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}}
|
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}}
|
||||||
n.reload(tgs)
|
n.reload(tgs)
|
||||||
require.Len(t, n.Alertmanagers(), 1)
|
require.Len(t, n.Alertmanagers(), 1)
|
||||||
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
|
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
|
||||||
|
|
||||||
// Reapply the config.
|
// Reapply the config.
|
||||||
err = n.ApplyConfig(cfg)
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
require.NoError(t, err)
|
// Ensure the known alertmanagers are not dropped.
|
||||||
// The already known alertmanagers shouldn't get dropped.
|
|
||||||
require.Len(t, n.Alertmanagers(), 1)
|
require.Len(t, n.Alertmanagers(), 1)
|
||||||
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
|
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
|
||||||
|
|
||||||
|
// 2. Ensure known alertmanagers are not dropped during ApplyConfig even when
|
||||||
|
// the config order changes.
|
||||||
|
s = `
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- foo.json
|
||||||
|
`
|
||||||
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
|
||||||
|
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
|
require.Len(t, n.Alertmanagers(), 1)
|
||||||
|
// Ensure no unnecessary alertmanagers are injected.
|
||||||
|
require.Empty(t, n.alertmanagers["config-0"].ams)
|
||||||
|
// Ensure the config order is taken into account.
|
||||||
|
ams := n.alertmanagers["config-1"].ams
|
||||||
|
require.Len(t, ams, 1)
|
||||||
|
require.Equal(t, alertmanagerURL, ams[0].url().String())
|
||||||
|
|
||||||
|
// 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig.
|
||||||
|
s = `
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- foo.json
|
||||||
|
- file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- foo.json
|
||||||
|
`
|
||||||
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
|
||||||
|
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
|
require.Len(t, n.Alertmanagers(), 2)
|
||||||
|
for cfgIdx := range 2 {
|
||||||
|
ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams
|
||||||
|
require.Len(t, ams, 1)
|
||||||
|
require.Equal(t, alertmanagerURL, ams[0].url().String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig.
|
||||||
|
s = `
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- foo.json
|
||||||
|
path_prefix: /bar
|
||||||
|
- file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- foo.json
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: ['__address__']
|
||||||
|
regex: 'doesntmatter:1234'
|
||||||
|
action: drop
|
||||||
|
`
|
||||||
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
|
||||||
|
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
|
require.Empty(t, n.Alertmanagers())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue