From 0b20119f7b9b3e5ea85c0701ca9748a2e4e331f5 Mon Sep 17 00:00:00 2001 From: machine424 Date: Tue, 1 Oct 2024 15:38:26 +0200 Subject: [PATCH] fix(notifier): wip Signed-off-by: machine424 --- notifier/notifier.go | 32 +++++++++++---- notifier/notifier_test.go | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/notifier/notifier.go b/notifier/notifier.go index 5374e73d6..82558eb21 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -196,6 +196,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag m.queueCapacity.Set(float64(queueCap)) if r != nil { + r.MustRegister( m.latency, m.errors, @@ -267,6 +268,15 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { amSets[k] = ams } + // Drop series of the previous alertmanagers. + // TODO: This may result in unneeded + resets? (if nothing changes?) + for _, ams := range n.alertmanagers { + for _, am := range ams.ams { + ams.dropMetrics(am.url().String()) + } + } + n.alertmanagers = amSets return nil @@ -752,6 +762,18 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metri return s, nil } +func (s *alertmanagerSet) initializeMetrics(lvs ...string) { + // This will initialize the Counters for the AM to 0. + s.metrics.sent.WithLabelValues(lvs...) + s.metrics.errors.WithLabelValues(lvs...) +} + +func (s *alertmanagerSet) dropMetrics(lvs ...string) { + s.metrics.latency.DeleteLabelValues(lvs...) + s.metrics.sent.DeleteLabelValues(lvs...) + s.metrics.errors.DeleteLabelValues(lvs...) +} + // sync extracts a deduplicated set of Alertmanager endpoints from a list // of target groups definitions. func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { @@ -782,11 +804,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { if _, ok := seen[us]; ok { continue } - - // This will initialize the Counters for the AM to 0. - s.metrics.sent.WithLabelValues(us) - s.metrics.errors.WithLabelValues(us) - + s.initializeMetrics(us) seen[us] = struct{}{} s.ams = append(s.ams, am) } @@ -796,9 +814,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { if _, ok := seen[us]; ok { continue } - s.metrics.latency.DeleteLabelValues(us) - s.metrics.sent.DeleteLabelValues(us) - s.metrics.errors.DeleteLabelValues(us) + s.dropMetrics(us) seen[us] = struct{}{} } } diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index 68dd44581..a32d21246 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -1017,3 +1017,85 @@ func TestStop_DrainingEnabled(t *testing.T) { require.Equal(t, int64(2), alertsReceived.Load()) } + +func metricsWithStringAsLabelValue(g prometheus.Gatherer, s string) ([]string, error) { + families, err := g.Gather() + if err != nil { + return nil, err + } + + metrics := []string{} + for _, f := range families { + for _, m := range f.GetMetric() { + for _, v := range m.GetLabel() { + if v.GetValue() == s { + metrics = append(metrics, f.GetName()) + } + } + } + } + return metrics, nil +} + +func TestAlertMetrics(t *testing.T) { + targetGroup := func(s string) *targetgroup.Group { + return &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(s), + }, + }, + } + } + alertmanagerURL := func(s string) string { + return fmt.Sprintf("http://%s/api/v2/alerts", s) + } + + reg := prometheus.NewRegistry() + n := NewManager(&Options{Registerer: reg}, nil) + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - static_configs: +` + + targetURL1 := "alertmanager:9093" + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) + + require.NoError(t, n.ApplyConfig(cfg)) + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL1)}} + n.reload(tgs) + + metrics, err := metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1)) + require.NoError(t, err) + // Corresponds to: + // metrics.sent + // metrics.errors + require.Len(t, metrics, 2) + + // The alertmanager targer gets changed. + targetURL2 := "alertmanager:9094" + tgs = map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL2)}} + n.reload(tgs) + + // targetURL1 related series were dropped. + metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1)) + require.NoError(t, err) + require.Len(t, metrics, 0) + + s = ` +alerting: + alertmanagers: +` + // Drop the config. + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 0) + + require.NoError(t, n.ApplyConfig(cfg)) + // targetURL2 related series were dropped. + metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL2)) + require.NoError(t, err) + require.Len(t, metrics, 0) +} \ No newline at end of file