diff --git a/notifier/notifier.go b/notifier/notifier.go index b7f719ea58..482d2fdaab 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -16,6 +16,8 @@ package notifier import ( "bytes" "context" + "crypto/md5" + "encoding/hex" "encoding/json" "fmt" "io" @@ -35,6 +37,7 @@ import ( "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" "go.uber.org/atomic" + "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs amSets := make(map[string]*alertmanagerSet) + // configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig, + // helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config. + configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers)) + for _, oldAmSet := range n.alertmanagers { + hash, err := oldAmSet.configHash() + if err != nil { + return err + } + configToAlertmanagers[hash] = oldAmSet + } for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) @@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { return err } + hash, err := ams.configHash() + if err != nil { + return err + } + + if oldAmSet, ok := configToAlertmanagers[hash]; ok { + ams.ams = oldAmSet.ams + ams.droppedAms = oldAmSet.droppedAms + } + amSets[k] = ams } @@ -803,6 +826,15 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { } } +func (s *alertmanagerSet) configHash() (string, error) { + b, err := yaml.Marshal(s.cfg) + if err != nil { + return "", err + } + hash := md5.Sum(b) + return hex.EncodeToString(hash[:]), nil +} + func postPath(pre string, v config.AlertmanagerAPIVersion) string { alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v)) return path.Join("/", pre, alertPushEndpoint) diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index ac722eb23c..83eaf8168b 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/config" + _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -1017,3 +1018,107 @@ func TestStop_DrainingEnabled(t *testing.T) { require.Equal(t, int64(2), alertsReceived.Load()) } + +func TestApplyConfig(t *testing.T) { + targetURL := "alertmanager:9093" + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(targetURL), + }, + }, + } + alertmanagerURL := fmt.Sprintf("http://%s/api/v2/alerts", targetURL) + + n := NewManager(&Options{}, nil) + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + // 1. Ensure known alertmanagers are not dropped during ApplyConfig. + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) + + // First, apply the config and reload. + require.NoError(t, n.ApplyConfig(cfg)) + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // Reapply the config. + require.NoError(t, n.ApplyConfig(cfg)) + // Ensure the known alertmanagers are not dropped. + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // 2. Ensure known alertmanagers are not dropped during ApplyConfig even when + // the config order changes. + s = ` +alerting: + alertmanagers: + - static_configs: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 1) + // Ensure no unnecessary alertmanagers are injected. + require.Empty(t, n.alertmanagers["config-0"].ams) + // Ensure the config order is taken into account. + ams := n.alertmanagers["config-1"].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + + // 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 2) + for cfgIdx := range 2 { + ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + } + + // 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + path_prefix: /bar + - file_sd_configs: + - files: + - foo.json + relabel_configs: + - source_labels: ['__address__'] + regex: 'doesntmatter:1234' + action: drop +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Empty(t, n.Alertmanagers()) +}