From 1fd20fc95472b381ec2fe8b3e7ff3ca8ff148a18 Mon Sep 17 00:00:00 2001 From: Conor Broderick Date: Wed, 21 Feb 2018 09:00:07 +0000 Subject: [PATCH] Add dropped alertmanagers to alertmanagers API (#3865) --- docs/querying/api.md | 10 ++++--- notifier/notifier.go | 48 +++++++++++++++++++++++++-------- notifier/notifier_test.go | 57 ++++++++++++++++++++++++++++++++++++++- web/api/v1/api.go | 12 ++++++--- web/api/v1/api_test.go | 35 +++++++++++++++++------- 5 files changed, 132 insertions(+), 30 deletions(-) diff --git a/docs/querying/api.md b/docs/querying/api.md index 502658f378..8548cfebec 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -356,9 +356,6 @@ $ curl http://localhost:9090/api/v1/targets ## Alertmanagers -> This API is experimental as it is intended to be extended with Alertmanagers -> dropped due to relabelling in the future. - The following endpoint returns an overview of the current state of the Prometheus alertmanager discovery: @@ -366,7 +363,7 @@ Prometheus alertmanager discovery: GET /api/v1/alertmanagers ``` -Currently only the active Alertmanagers are part of the response. +Both the active and dropped Alertmanagers are part of the response. ```json $ curl http://localhost:9090/api/v1/alertmanagers @@ -377,6 +374,11 @@ $ curl http://localhost:9090/api/v1/alertmanagers { "url": "http://127.0.0.1:9090/api/v1/alerts" } + ], + "droppedAlertmanagers": [ + { + "url": "http://127.0.0.1:9093/api/v1/alerts" + } ] } } diff --git a/notifier/notifier.go b/notifier/notifier.go index 1f1dd7cfef..92ecde1b4f 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -419,6 +419,25 @@ func (n *Manager) Alertmanagers() []*url.URL { return res } +// DroppedAlertmanagers returns a slice of Alertmanager URLs. +func (n *Manager) DroppedAlertmanagers() []*url.URL { + n.mtx.RLock() + amSets := n.alertmanagers + n.mtx.RUnlock() + + var res []*url.URL + + for _, ams := range amSets { + ams.mtx.RLock() + for _, dam := range ams.droppedAms { + res = append(res, dam.url()) + } + ams.mtx.RUnlock() + } + + return res +} + // sendAll sends the alerts to all configured Alertmanagers concurrently. // It returns true if the alerts could be sent successfully to at least one Alertmanager. func (n *Manager) sendAll(alerts ...*Alert) bool { @@ -519,9 +538,10 @@ type alertmanagerSet struct { metrics *alertMetrics - mtx sync.RWMutex - ams []alertmanager - logger log.Logger + mtx sync.RWMutex + ams []alertmanager + droppedAms []alertmanager + logger log.Logger } func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*alertmanagerSet, error) { @@ -540,24 +560,28 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*ale // sync extracts a deduplicated set of Alertmanager endpoints from a list // of target groups definitions. func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { - all := []alertmanager{} + allAms := []alertmanager{} + allDroppedAms := []alertmanager{} for _, tg := range tgs { - ams, err := alertmanagerFromGroup(tg, s.cfg) + ams, droppedAms, err := alertmanagerFromGroup(tg, s.cfg) if err != nil { level.Error(s.logger).Log("msg", "Creating discovered Alertmanagers failed", "err", err) continue } - all = append(all, ams...) + allAms = append(allAms, ams...) + allDroppedAms = append(allDroppedAms, droppedAms...) } s.mtx.Lock() defer s.mtx.Unlock() // Set new Alertmanagers and deduplicate them along their unique URL. s.ams = []alertmanager{} + s.droppedAms = []alertmanager{} + s.droppedAms = append(s.droppedAms, allDroppedAms...) seen := map[string]struct{}{} - for _, am := range all { + for _, am := range allAms { us := am.url().String() if _, ok := seen[us]; ok { continue @@ -578,8 +602,9 @@ func postPath(pre string) string { // alertmanagersFromGroup extracts a list of alertmanagers from a target group and an associcated // AlertmanagerConfig. -func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, error) { +func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, []alertmanager, error) { var res []alertmanager + var droppedAlertManagers []alertmanager for _, tlset := range tg.Targets { lbls := make([]labels.Label, 0, len(tlset)+2+len(tg.Labels)) @@ -600,6 +625,7 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig lset := relabel.Process(labels.New(lbls...), cfg.RelabelConfigs...) if lset == nil { + droppedAlertManagers = append(droppedAlertManagers, alertmanagerLabels{lbls}) continue } @@ -627,13 +653,13 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig case "https": addr = addr + ":443" default: - return nil, fmt.Errorf("invalid scheme: %q", cfg.Scheme) + return nil, nil, fmt.Errorf("invalid scheme: %q", cfg.Scheme) } lb.Set(model.AddressLabel, addr) } if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { - return nil, err + return nil, nil, err } // Meta labels are deleted after relabelling. Other internal labels propagate to @@ -646,5 +672,5 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig res = append(res, alertmanagerLabels{lset}) } - return res, nil + return res, droppedAlertManagers, nil } diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index 80bffaf225..5fab5cab6a 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -442,7 +442,7 @@ func (a alertmanagerMock) url() *url.URL { func TestLabelSetNotReused(t *testing.T) { tg := makeInputTargetGroup() - _, err := alertmanagerFromGroup(tg, &config.AlertmanagerConfig{}) + _, _, err := alertmanagerFromGroup(tg, &config.AlertmanagerConfig{}) if err != nil { t.Fatal(err) } @@ -503,6 +503,61 @@ alerting: } +func TestDroppedAlertmanagers(t *testing.T) { + var tests = []struct { + in *targetgroup.Group + out string + }{ + { + in: &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": "alertmanager:9093", + }, + }, + }, + out: "http://alertmanager:9093/api/v1/alerts", + }, + } + + n := NewManager(&Options{}, nil) + + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - static_configs: + relabel_configs: + - source_labels: ['__address__'] + regex: 'alertmanager:9093' + action: drop +` + if err := yaml.Unmarshal([]byte(s), cfg); err != nil { + t.Fatalf("Unable to load YAML config: %s", err) + } + + if err := n.ApplyConfig(cfg); err != nil { + t.Fatalf("Error Applying the config:%v", err) + } + + tgs := make(map[string][]*targetgroup.Group) + for _, tt := range tests { + + b, err := json.Marshal(cfg.AlertingConfig.AlertmanagerConfigs[0]) + if err != nil { + t.Fatalf("Error creating config hash:%v", err) + } + tgs[fmt.Sprintf("%x", md5.Sum(b))] = []*targetgroup.Group{ + tt.in, + } + n.reload(tgs) + res := n.DroppedAlertmanagers()[0].String() + + testutil.Equals(t, res, tt.out) + } + +} + func makeInputTargetGroup() *targetgroup.Group { return &targetgroup.Group{ Targets: []model.LabelSet{ diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 4f9eb2bfed..5d3fb2cb97 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -87,6 +87,7 @@ type targetRetriever interface { type alertmanagerRetriever interface { Alertmanagers() []*url.URL + DroppedAlertmanagers() []*url.URL } type response struct { @@ -468,7 +469,8 @@ func (api *API) targets(r *http.Request) (interface{}, *apiError) { // AlertmanagerDiscovery has all the active Alertmanagers. type AlertmanagerDiscovery struct { - ActiveAlertmanagers []*AlertmanagerTarget `json:"activeAlertmanagers"` + ActiveAlertmanagers []*AlertmanagerTarget `json:"activeAlertmanagers"` + DroppedAlertmanagers []*AlertmanagerTarget `json:"droppedAlertmanagers"` } // AlertmanagerTarget has info on one AM. @@ -478,12 +480,14 @@ type AlertmanagerTarget struct { func (api *API) alertmanagers(r *http.Request) (interface{}, *apiError) { urls := api.alertmanagerRetriever.Alertmanagers() - ams := &AlertmanagerDiscovery{ActiveAlertmanagers: make([]*AlertmanagerTarget, len(urls))} - + droppedURLS := api.alertmanagerRetriever.DroppedAlertmanagers() + ams := &AlertmanagerDiscovery{ActiveAlertmanagers: make([]*AlertmanagerTarget, len(urls)), DroppedAlertmanagers: make([]*AlertmanagerTarget, len(droppedURLS))} for i, url := range urls { ams.ActiveAlertmanagers[i] = &AlertmanagerTarget{URL: url.String()} } - + for i, url := range droppedURLS { + ams.DroppedAlertmanagers[i] = &AlertmanagerTarget{URL: url.String()} + } return ams, nil } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 6716f94a21..f919188528 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -48,10 +48,26 @@ func (f targetRetrieverFunc) Targets() []*scrape.Target { return f() } -type alertmanagerRetrieverFunc func() []*url.URL +type testAlertmanagerRetriever struct{} -func (f alertmanagerRetrieverFunc) Alertmanagers() []*url.URL { - return f() +func (t testAlertmanagerRetriever) Alertmanagers() []*url.URL { + return []*url.URL{ + { + Scheme: "http", + Host: "alertmanager.example.com:8080", + Path: "/api/v1/alerts", + }, + } +} + +func (t testAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL { + return []*url.URL{ + { + Scheme: "http", + Host: "dropped.alertmanager.example.com:8080", + Path: "/api/v1/alerts", + }, + } } var samplePrometheusCfg = config.Config{ @@ -100,13 +116,7 @@ func TestEndpoints(t *testing.T) { } }) - ar := alertmanagerRetrieverFunc(func() []*url.URL { - return []*url.URL{{ - Scheme: "http", - Host: "alertmanager.example.com:8080", - Path: "/api/v1/alerts", - }} - }) + var ar testAlertmanagerRetriever api := &API{ Queryable: suite.Storage(), @@ -447,6 +457,11 @@ func TestEndpoints(t *testing.T) { URL: "http://alertmanager.example.com:8080/api/v1/alerts", }, }, + DroppedAlertmanagers: []*AlertmanagerTarget{ + { + URL: "http://dropped.alertmanager.example.com:8080/api/v1/alerts", + }, + }, }, }, {