fix(notifier): wip

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
This commit is contained in:
machine424 2024-10-01 15:38:26 +02:00
parent c5c2566b8a
commit 0b20119f7b
No known key found for this signature in database
GPG key ID: A4B001A4FDEE017D
2 changed files with 106 additions and 8 deletions

View file

@ -196,6 +196,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag
m.queueCapacity.Set(float64(queueCap))
if r != nil {
r.MustRegister(
m.latency,
m.errors,
@ -267,6 +268,15 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
amSets[k] = ams
}
// Drop series of the previous alertmanagers.
// TODO: This may result in unneeded
resets? (if nothing changes?)
for _, ams := range n.alertmanagers {
for _, am := range ams.ams {
ams.dropMetrics(am.url().String())
}
}
n.alertmanagers = amSets
return nil
@ -752,6 +762,18 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metri
return s, nil
}
func (s *alertmanagerSet) initializeMetrics(lvs ...string) {
// This will initialize the Counters for the AM to 0.
s.metrics.sent.WithLabelValues(lvs...)
s.metrics.errors.WithLabelValues(lvs...)
}
func (s *alertmanagerSet) dropMetrics(lvs ...string) {
s.metrics.latency.DeleteLabelValues(lvs...)
s.metrics.sent.DeleteLabelValues(lvs...)
s.metrics.errors.DeleteLabelValues(lvs...)
}
// sync extracts a deduplicated set of Alertmanager endpoints from a list
// of target groups definitions.
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
@ -782,11 +804,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
if _, ok := seen[us]; ok {
continue
}
// This will initialize the Counters for the AM to 0.
s.metrics.sent.WithLabelValues(us)
s.metrics.errors.WithLabelValues(us)
s.initializeMetrics(us)
seen[us] = struct{}{}
s.ams = append(s.ams, am)
}
@ -796,9 +814,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
if _, ok := seen[us]; ok {
continue
}
s.metrics.latency.DeleteLabelValues(us)
s.metrics.sent.DeleteLabelValues(us)
s.metrics.errors.DeleteLabelValues(us)
s.dropMetrics(us)
seen[us] = struct{}{}
}
}

View file

@ -1017,3 +1017,85 @@ func TestStop_DrainingEnabled(t *testing.T) {
require.Equal(t, int64(2), alertsReceived.Load())
}
func metricsWithStringAsLabelValue(g prometheus.Gatherer, s string) ([]string, error) {
families, err := g.Gather()
if err != nil {
return nil, err
}
metrics := []string{}
for _, f := range families {
for _, m := range f.GetMetric() {
for _, v := range m.GetLabel() {
if v.GetValue() == s {
metrics = append(metrics, f.GetName())
}
}
}
}
return metrics, nil
}
func TestAlertMetrics(t *testing.T) {
targetGroup := func(s string) *targetgroup.Group {
return &targetgroup.Group{
Targets: []model.LabelSet{
{
"__address__": model.LabelValue(s),
},
},
}
}
alertmanagerURL := func(s string) string {
return fmt.Sprintf("http://%s/api/v2/alerts", s)
}
reg := prometheus.NewRegistry()
n := NewManager(&Options{Registerer: reg}, nil)
cfg := &config.Config{}
s := `
alerting:
alertmanagers:
- static_configs:
`
targetURL1 := "alertmanager:9093"
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
require.NoError(t, n.ApplyConfig(cfg))
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL1)}}
n.reload(tgs)
metrics, err := metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1))
require.NoError(t, err)
// Corresponds to:
// metrics.sent
// metrics.errors
require.Len(t, metrics, 2)
// The alertmanager targer gets changed.
targetURL2 := "alertmanager:9094"
tgs = map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL2)}}
n.reload(tgs)
// targetURL1 related series were dropped.
metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1))
require.NoError(t, err)
require.Len(t, metrics, 0)
s = `
alerting:
alertmanagers:
`
// Drop the config.
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 0)
require.NoError(t, n.ApplyConfig(cfg))
// targetURL2 related series were dropped.
metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL2))
require.NoError(t, err)
require.Len(t, metrics, 0)
}