Merge pull request #12677 from bboreham/retry-on-500

remote-write: respect Retry-After header on 5xx errors
This commit is contained in:
Bryan Boreham 2023-09-20 14:02:34 +00:00 committed by GitHub
commit 2b77ad4591
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 29 deletions

View file

@ -236,10 +236,8 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) error {
}
err = fmt.Errorf("server returned HTTP status %s: %s", httpResp.Status, line)
}
if httpResp.StatusCode/100 == 5 {
return RecoverableError{err, defaultBackoff}
}
if c.retryOnRateLimit && httpResp.StatusCode == http.StatusTooManyRequests {
if httpResp.StatusCode/100 == 5 ||
(c.retryOnRateLimit && httpResp.StatusCode == http.StatusTooManyRequests) {
return RecoverableError{err, retryAfterDuration(httpResp.Header.Get("Retry-After"))}
}
return err

View file

@ -85,12 +85,22 @@ func TestStoreHTTPErrorHandling(t *testing.T) {
}
func TestClientRetryAfter(t *testing.T) {
server := httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, longErrMessage, http.StatusTooManyRequests)
}),
)
defer server.Close()
setupServer := func(statusCode int) *httptest.Server {
return httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Retry-After", "5")
http.Error(w, longErrMessage, statusCode)
}),
)
}
getClientConfig := func(serverURL *url.URL, retryOnRateLimit bool) *ClientConfig {
return &ClientConfig{
URL: &config_util.URL{URL: serverURL},
Timeout: model.Duration(time.Second),
RetryOnRateLimit: retryOnRateLimit,
}
}
getClient := func(conf *ClientConfig) WriteClient {
hash, err := toHash(conf)
@ -100,30 +110,36 @@ func TestClientRetryAfter(t *testing.T) {
return c
}
serverURL, err := url.Parse(server.URL)
require.NoError(t, err)
conf := &ClientConfig{
URL: &config_util.URL{URL: serverURL},
Timeout: model.Duration(time.Second),
RetryOnRateLimit: false,
testCases := []struct {
name string
statusCode int
retryOnRateLimit bool
expectedRecoverable bool
expectedRetryAfter model.Duration
}{
{"TooManyRequests - No Retry", http.StatusTooManyRequests, false, false, 0},
{"TooManyRequests - With Retry", http.StatusTooManyRequests, true, true, 5 * model.Duration(time.Second)},
{"InternalServerError", http.StatusInternalServerError, false, true, 5 * model.Duration(time.Second)}, // HTTP 5xx errors do not depend on retryOnRateLimit.
}
var recErr RecoverableError
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
server := setupServer(tc.statusCode)
defer server.Close()
c := getClient(conf)
err = c.Store(context.Background(), []byte{}, 0)
require.False(t, errors.As(err, &recErr), "Recoverable error not expected.")
serverURL, err := url.Parse(server.URL)
require.NoError(t, err)
conf = &ClientConfig{
URL: &config_util.URL{URL: serverURL},
Timeout: model.Duration(time.Second),
RetryOnRateLimit: true,
c := getClient(getClientConfig(serverURL, tc.retryOnRateLimit))
var recErr RecoverableError
err = c.Store(context.Background(), []byte{}, 0)
require.Equal(t, tc.expectedRecoverable, errors.As(err, &recErr), "Mismatch in expected recoverable error status.")
if tc.expectedRecoverable {
require.Equal(t, tc.expectedRetryAfter, err.(RecoverableError).retryAfter)
}
})
}
c = getClient(conf)
err = c.Store(context.Background(), []byte{}, 0)
require.True(t, errors.As(err, &recErr), "Recoverable error was expected.")
}
func TestRetryAfterDuration(t *testing.T) {