From 9b85354acd12834d24cf711c188d4c2246615d97 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 11 Aug 2023 16:37:53 +0000 Subject: [PATCH 1/3] remote-write: respect Retry-After header on 5xx errors If the server sent it to us, we should assume it knows better than we do and respect it. Signed-off-by: Bryan Boreham --- storage/remote/client.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/storage/remote/client.go b/storage/remote/client.go index d493e414f..fbb680498 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -236,10 +236,8 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) error { } err = fmt.Errorf("server returned HTTP status %s: %s", httpResp.Status, line) } - if httpResp.StatusCode/100 == 5 { - return RecoverableError{err, defaultBackoff} - } - if c.retryOnRateLimit && httpResp.StatusCode == http.StatusTooManyRequests { + if httpResp.StatusCode/100 == 5 || + (c.retryOnRateLimit && httpResp.StatusCode == http.StatusTooManyRequests) { return RecoverableError{err, retryAfterDuration(httpResp.Header.Get("Retry-After"))} } return err From febd62a23eaf866dff98b991c7727bafaf4743aa Mon Sep 17 00:00:00 2001 From: William Dumont Date: Thu, 7 Sep 2023 16:36:29 +0200 Subject: [PATCH 2/3] remote-write: refactor TestClientRetryAfter The new version features a set of test cases that simplify the addition of new HTTP status codes. Signed-off-by: William Dumont --- storage/remote/client_test.go | 60 ++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/storage/remote/client_test.go b/storage/remote/client_test.go index a42a52e9a..45bd7ee04 100644 --- a/storage/remote/client_test.go +++ b/storage/remote/client_test.go @@ -85,12 +85,21 @@ func TestStoreHTTPErrorHandling(t *testing.T) { } func TestClientRetryAfter(t *testing.T) { - server := httptest.NewServer( - http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - http.Error(w, longErrMessage, http.StatusTooManyRequests) - }), - ) - defer server.Close() + setupServer := func(statusCode int) *httptest.Server { + return httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, longErrMessage, statusCode) + }), + ) + } + + getClientConfig := func(serverURL *url.URL, retryOnRateLimit bool) *ClientConfig { + return &ClientConfig{ + URL: &config_util.URL{URL: serverURL}, + Timeout: model.Duration(time.Second), + RetryOnRateLimit: retryOnRateLimit, + } + } getClient := func(conf *ClientConfig) WriteClient { hash, err := toHash(conf) @@ -100,30 +109,31 @@ func TestClientRetryAfter(t *testing.T) { return c } - serverURL, err := url.Parse(server.URL) - require.NoError(t, err) - - conf := &ClientConfig{ - URL: &config_util.URL{URL: serverURL}, - Timeout: model.Duration(time.Second), - RetryOnRateLimit: false, + testCases := []struct { + name string + statusCode int + retryOnRateLimit bool + expectedRecoverable bool + }{ + {"TooManyRequests - No Retry", http.StatusTooManyRequests, false, false}, + {"TooManyRequests - With Retry", http.StatusTooManyRequests, true, true}, } - var recErr RecoverableError + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + server := setupServer(tc.statusCode) + defer server.Close() - c := getClient(conf) - err = c.Store(context.Background(), []byte{}, 0) - require.False(t, errors.As(err, &recErr), "Recoverable error not expected.") + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) - conf = &ClientConfig{ - URL: &config_util.URL{URL: serverURL}, - Timeout: model.Duration(time.Second), - RetryOnRateLimit: true, + c := getClient(getClientConfig(serverURL, tc.retryOnRateLimit)) + + var recErr RecoverableError + err = c.Store(context.Background(), []byte{}, 0) + require.Equal(t, tc.expectedRecoverable, errors.As(err, &recErr), "Mismatch in expected recoverable error status.") + }) } - - c = getClient(conf) - err = c.Store(context.Background(), []byte{}, 0) - require.True(t, errors.As(err, &recErr), "Recoverable error was expected.") } func TestRetryAfterDuration(t *testing.T) { From ce6ad15422c597bf4a14a74f560d143aadafe50a Mon Sep 17 00:00:00 2001 From: William Dumont Date: Thu, 7 Sep 2023 16:42:00 +0200 Subject: [PATCH 3/3] remote-write: TestClientRetryAfter status code 500 and compare the retryAfter values. Signed-off-by: William Dumont --- storage/remote/client_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/storage/remote/client_test.go b/storage/remote/client_test.go index 45bd7ee04..9217e1c7e 100644 --- a/storage/remote/client_test.go +++ b/storage/remote/client_test.go @@ -88,6 +88,7 @@ func TestClientRetryAfter(t *testing.T) { setupServer := func(statusCode int) *httptest.Server { return httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "5") http.Error(w, longErrMessage, statusCode) }), ) @@ -114,9 +115,11 @@ func TestClientRetryAfter(t *testing.T) { statusCode int retryOnRateLimit bool expectedRecoverable bool + expectedRetryAfter model.Duration }{ - {"TooManyRequests - No Retry", http.StatusTooManyRequests, false, false}, - {"TooManyRequests - With Retry", http.StatusTooManyRequests, true, true}, + {"TooManyRequests - No Retry", http.StatusTooManyRequests, false, false, 0}, + {"TooManyRequests - With Retry", http.StatusTooManyRequests, true, true, 5 * model.Duration(time.Second)}, + {"InternalServerError", http.StatusInternalServerError, false, true, 5 * model.Duration(time.Second)}, // HTTP 5xx errors do not depend on retryOnRateLimit. } for _, tc := range testCases { @@ -132,6 +135,9 @@ func TestClientRetryAfter(t *testing.T) { var recErr RecoverableError err = c.Store(context.Background(), []byte{}, 0) require.Equal(t, tc.expectedRecoverable, errors.As(err, &recErr), "Mismatch in expected recoverable error status.") + if tc.expectedRecoverable { + require.Equal(t, tc.expectedRetryAfter, err.(RecoverableError).retryAfter) + } }) } }