From cccd5428912c6380eef2e024f66018960d68d0d1 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Thu, 12 Dec 2019 17:00:19 +0100 Subject: [PATCH] *: avoid missed Alertmanager targets (#6455) This change makes sure that nearly-identical Alertmanager configurations aren't merged together. The config's identifier was the MD5 hash of the configuration serialized to JSON but because `relabel.Regexp` has no public field and doesn't implement the JSON.Marshaler interface, it was always serialized to "{}". In practice, the identifier can be based on the index of the configuration in the list. Signed-off-by: Simon Pasquier --- cmd/prometheus/main.go | 11 ++--------- config/config.go | 16 ++++++++++++++-- notifier/notifier.go | 23 ++++++++--------------- notifier/notifier_test.go | 31 ++++++++++++++----------------- 4 files changed, 38 insertions(+), 43 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 00ec8eee6..f4e6b84f4 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -16,8 +16,6 @@ package main import ( "context" - "crypto/md5" - "encoding/json" "fmt" "net" "net/http" @@ -435,13 +433,8 @@ func main() { notifierManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.AlertingConfig.AlertmanagerConfigs { - // AlertmanagerConfigs doesn't hold an unique identifier so we use the config hash as the identifier. - b, err := json.Marshal(v) - if err != nil { - return err - } - c[fmt.Sprintf("%x", md5.Sum(b))] = v.ServiceDiscoveryConfig + for k, v := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { + c[k] = v.ServiceDiscoveryConfig } return discoveryManagerNotify.ApplyConfig(c) }, diff --git a/config/config.go b/config/config.go index 391d51e31..e162655be 100644 --- a/config/config.go +++ b/config/config.go @@ -432,8 +432,8 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // AlertingConfig configures alerting and alertmanager related configs. type AlertingConfig struct { - AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` - AlertmanagerConfigs []*AlertmanagerConfig `yaml:"alertmanagers,omitempty"` + AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` + AlertmanagerConfigs AlertmanagerConfigs `yaml:"alertmanagers,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -454,6 +454,18 @@ func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error return nil } +// AlertmanagerConfigs is a slice of *AlertmanagerConfig. +type AlertmanagerConfigs []*AlertmanagerConfig + +// ToMap converts a slice of *AlertmanagerConfig to a map. +func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { + ret := make(map[string]*AlertmanagerConfig) + for i := range a { + ret[fmt.Sprintf("config-%d", i)] = a[i] + } + return ret +} + // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. type AlertmanagerAPIVersion string diff --git a/notifier/notifier.go b/notifier/notifier.go index 300dc781f..b0c1409aa 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -16,7 +16,6 @@ package notifier import ( "bytes" "context" - "crypto/md5" "encoding/json" "fmt" "io" @@ -262,20 +261,13 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { amSets := make(map[string]*alertmanagerSet) - for _, cfg := range conf.AlertingConfig.AlertmanagerConfigs { - ams, err := newAlertmanagerSet(cfg, n.logger) + for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { + ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) if err != nil { return err } - ams.metrics = n.metrics - - // The config hash is used for the map lookup identifier. - b, err := json.Marshal(cfg) - if err != nil { - return err - } - amSets[fmt.Sprintf("%x", md5.Sum(b))] = ams + amSets[k] = ams } n.alertmanagers = amSets @@ -638,15 +630,16 @@ type alertmanagerSet struct { logger log.Logger } -func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*alertmanagerSet, error) { +func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager", false) if err != nil { return nil, err } s := &alertmanagerSet{ - client: client, - cfg: cfg, - logger: logger, + client: client, + cfg: cfg, + logger: logger, + metrics: metrics, } return s, nil } diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index 7f949584c..b95b1edd3 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -16,7 +16,6 @@ package notifier import ( "bytes" "context" - "crypto/md5" "encoding/json" "fmt" "io/ioutil" @@ -467,6 +466,7 @@ alerting: if err := yaml.UnmarshalStrict([]byte(s), cfg); err != nil { t.Fatalf("Unable to load YAML config: %s", err) } + testutil.Equals(t, 1, len(cfg.AlertingConfig.AlertmanagerConfigs)) if err := n.ApplyConfig(cfg); err != nil { t.Fatalf("Error Applying the config:%v", err) @@ -474,18 +474,16 @@ alerting: tgs := make(map[string][]*targetgroup.Group) for _, tt := range tests { - - b, err := json.Marshal(cfg.AlertingConfig.AlertmanagerConfigs[0]) - if err != nil { - t.Fatalf("Error creating config hash:%v", err) - } - tgs[fmt.Sprintf("%x", md5.Sum(b))] = []*targetgroup.Group{ - tt.in, + for k := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { + tgs[k] = []*targetgroup.Group{ + tt.in, + } + break } n.reload(tgs) res := n.Alertmanagers()[0].String() - testutil.Equals(t, res, tt.out) + testutil.Equals(t, tt.out, res) } } @@ -522,6 +520,7 @@ alerting: if err := yaml.UnmarshalStrict([]byte(s), cfg); err != nil { t.Fatalf("Unable to load YAML config: %s", err) } + testutil.Equals(t, 1, len(cfg.AlertingConfig.AlertmanagerConfigs)) if err := n.ApplyConfig(cfg); err != nil { t.Fatalf("Error Applying the config:%v", err) @@ -529,20 +528,18 @@ alerting: tgs := make(map[string][]*targetgroup.Group) for _, tt := range tests { + for k := range cfg.AlertingConfig.AlertmanagerConfigs.ToMap() { + tgs[k] = []*targetgroup.Group{ + tt.in, + } + break + } - b, err := json.Marshal(cfg.AlertingConfig.AlertmanagerConfigs[0]) - if err != nil { - t.Fatalf("Error creating config hash:%v", err) - } - tgs[fmt.Sprintf("%x", md5.Sum(b))] = []*targetgroup.Group{ - tt.in, - } n.reload(tgs) res := n.DroppedAlertmanagers()[0].String() testutil.Equals(t, res, tt.out) } - } func makeInputTargetGroup() *targetgroup.Group {