mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
fix(notifier): wip
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
This commit is contained in:
parent
c5c2566b8a
commit
0b20119f7b
|
@ -196,6 +196,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag
|
||||||
m.queueCapacity.Set(float64(queueCap))
|
m.queueCapacity.Set(float64(queueCap))
|
||||||
|
|
||||||
if r != nil {
|
if r != nil {
|
||||||
|
|
||||||
r.MustRegister(
|
r.MustRegister(
|
||||||
m.latency,
|
m.latency,
|
||||||
m.errors,
|
m.errors,
|
||||||
|
@ -267,6 +268,15 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
|
||||||
amSets[k] = ams
|
amSets[k] = ams
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Drop series of the previous alertmanagers.
|
||||||
|
// TODO: This may result in unneeded
|
||||||
|
resets? (if nothing changes?)
|
||||||
|
for _, ams := range n.alertmanagers {
|
||||||
|
for _, am := range ams.ams {
|
||||||
|
ams.dropMetrics(am.url().String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
n.alertmanagers = amSets
|
n.alertmanagers = amSets
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -752,6 +762,18 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metri
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *alertmanagerSet) initializeMetrics(lvs ...string) {
|
||||||
|
// This will initialize the Counters for the AM to 0.
|
||||||
|
s.metrics.sent.WithLabelValues(lvs...)
|
||||||
|
s.metrics.errors.WithLabelValues(lvs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *alertmanagerSet) dropMetrics(lvs ...string) {
|
||||||
|
s.metrics.latency.DeleteLabelValues(lvs...)
|
||||||
|
s.metrics.sent.DeleteLabelValues(lvs...)
|
||||||
|
s.metrics.errors.DeleteLabelValues(lvs...)
|
||||||
|
}
|
||||||
|
|
||||||
// sync extracts a deduplicated set of Alertmanager endpoints from a list
|
// sync extracts a deduplicated set of Alertmanager endpoints from a list
|
||||||
// of target groups definitions.
|
// of target groups definitions.
|
||||||
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||||
|
@ -782,11 +804,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||||
if _, ok := seen[us]; ok {
|
if _, ok := seen[us]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
s.initializeMetrics(us)
|
||||||
// This will initialize the Counters for the AM to 0.
|
|
||||||
s.metrics.sent.WithLabelValues(us)
|
|
||||||
s.metrics.errors.WithLabelValues(us)
|
|
||||||
|
|
||||||
seen[us] = struct{}{}
|
seen[us] = struct{}{}
|
||||||
s.ams = append(s.ams, am)
|
s.ams = append(s.ams, am)
|
||||||
}
|
}
|
||||||
|
@ -796,9 +814,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||||
if _, ok := seen[us]; ok {
|
if _, ok := seen[us]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
s.metrics.latency.DeleteLabelValues(us)
|
s.dropMetrics(us)
|
||||||
s.metrics.sent.DeleteLabelValues(us)
|
|
||||||
s.metrics.errors.DeleteLabelValues(us)
|
|
||||||
seen[us] = struct{}{}
|
seen[us] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1017,3 +1017,85 @@ func TestStop_DrainingEnabled(t *testing.T) {
|
||||||
|
|
||||||
require.Equal(t, int64(2), alertsReceived.Load())
|
require.Equal(t, int64(2), alertsReceived.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func metricsWithStringAsLabelValue(g prometheus.Gatherer, s string) ([]string, error) {
|
||||||
|
families, err := g.Gather()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics := []string{}
|
||||||
|
for _, f := range families {
|
||||||
|
for _, m := range f.GetMetric() {
|
||||||
|
for _, v := range m.GetLabel() {
|
||||||
|
if v.GetValue() == s {
|
||||||
|
metrics = append(metrics, f.GetName())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAlertMetrics(t *testing.T) {
|
||||||
|
targetGroup := func(s string) *targetgroup.Group {
|
||||||
|
return &targetgroup.Group{
|
||||||
|
Targets: []model.LabelSet{
|
||||||
|
{
|
||||||
|
"__address__": model.LabelValue(s),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
alertmanagerURL := func(s string) string {
|
||||||
|
return fmt.Sprintf("http://%s/api/v2/alerts", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
reg := prometheus.NewRegistry()
|
||||||
|
n := NewManager(&Options{Registerer: reg}, nil)
|
||||||
|
cfg := &config.Config{}
|
||||||
|
s := `
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
`
|
||||||
|
|
||||||
|
targetURL1 := "alertmanager:9093"
|
||||||
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
|
||||||
|
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
|
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL1)}}
|
||||||
|
n.reload(tgs)
|
||||||
|
|
||||||
|
metrics, err := metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1))
|
||||||
|
require.NoError(t, err)
|
||||||
|
// Corresponds to:
|
||||||
|
// metrics.sent
|
||||||
|
// metrics.errors
|
||||||
|
require.Len(t, metrics, 2)
|
||||||
|
|
||||||
|
// The alertmanager targer gets changed.
|
||||||
|
targetURL2 := "alertmanager:9094"
|
||||||
|
tgs = map[string][]*targetgroup.Group{"config-0": {targetGroup(targetURL2)}}
|
||||||
|
n.reload(tgs)
|
||||||
|
|
||||||
|
// targetURL1 related series were dropped.
|
||||||
|
metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL1))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, metrics, 0)
|
||||||
|
|
||||||
|
s = `
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
`
|
||||||
|
// Drop the config.
|
||||||
|
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
|
||||||
|
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 0)
|
||||||
|
|
||||||
|
require.NoError(t, n.ApplyConfig(cfg))
|
||||||
|
// targetURL2 related series were dropped.
|
||||||
|
metrics, err = metricsWithStringAsLabelValue(reg, alertmanagerURL(targetURL2))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, metrics, 0)
|
||||||
|
}
|
Loading…
Reference in a new issue