mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
notifier: unit test for dropping throughput on stuck AM
Ref: https://github.com/prometheus/prometheus/issues/7676 Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
parent
dc926527ae
commit
d13d7b26cf
|
@ -811,3 +811,80 @@ func TestHangingNotifier(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Regression test for https://github.com/prometheus/prometheus/issues/7676
|
||||
// The test creates a black hole alertmanager that never responds to any requests.
|
||||
// The alertmanager_config.timeout is set to infinite (1 year).
|
||||
// We check that the notifier does not hang and throughput is not affected.
|
||||
func TestNotifierQueueIndependentOfFailedAlertmanager(t *testing.T) {
|
||||
stopBlackHole := make(chan struct{})
|
||||
blackHoleAM := newBlackHoleAlertmanager(stopBlackHole)
|
||||
defer close(stopBlackHole)
|
||||
|
||||
doneAlertReceive := make(chan struct{})
|
||||
immediateAM := newImmediateAlertManager(doneAlertReceive)
|
||||
|
||||
h := NewManager(&Options{}, nil)
|
||||
|
||||
h.alertmanagers = make(map[string]*alertmanagerSet)
|
||||
|
||||
amCfg := config.DefaultAlertmanagerConfig
|
||||
amCfg.Timeout = model.Duration(time.Hour * 24 * 365)
|
||||
|
||||
h.alertmanagers["1"] = &alertmanagerSet{
|
||||
ams: []alertmanager{
|
||||
alertmanagerMock{
|
||||
urlf: func() string { return blackHoleAM.URL },
|
||||
},
|
||||
},
|
||||
cfg: &amCfg,
|
||||
}
|
||||
|
||||
h.alertmanagers["2"] = &alertmanagerSet{
|
||||
ams: []alertmanager{
|
||||
alertmanagerMock{
|
||||
urlf: func() string { return immediateAM.URL },
|
||||
},
|
||||
},
|
||||
cfg: &amCfg,
|
||||
}
|
||||
|
||||
h.queue = append(h.queue, &Alert{
|
||||
Labels: labels.FromStrings("alertname", "test"),
|
||||
})
|
||||
|
||||
doneSendAll := make(chan struct{})
|
||||
go func() {
|
||||
h.sendAll(h.queue...)
|
||||
close(doneSendAll)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-doneAlertReceive:
|
||||
// This is the happy case, the alert was received by the immediate alertmanager.
|
||||
case <-time.After(30 * time.Second):
|
||||
t.Fatal("Timeout waiting for alert to be received by immediate alertmanager")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-doneSendAll:
|
||||
// This is the happy case, the sendAll function returned.
|
||||
case <-time.After(30 * time.Second):
|
||||
t.Fatal("Timeout waiting for sendAll to return")
|
||||
}
|
||||
}
|
||||
|
||||
func newBlackHoleAlertmanager(stop <-chan struct{}) *httptest.Server {
|
||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Do nothing, wait to be canceled.
|
||||
<-stop
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
}
|
||||
|
||||
func newImmediateAlertManager(done chan<- struct{}) *httptest.Server {
|
||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
close(done)
|
||||
}))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue