diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 13e732f96..24e666cb6 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -983,7 +983,7 @@ func main() { }, func(err error) { close(cancel) - webHandler.SetReady(false) + webHandler.SetReady(web.Stopping) }, ) } @@ -1162,7 +1162,7 @@ func main() { reloadReady.Close() - webHandler.SetReady(true) + webHandler.SetReady(web.Ready) level.Info(logger).Log("msg", "Server is ready to receive web requests.") <-cancel return nil diff --git a/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx index 52ae0485c..dbfcba555 100644 --- a/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx +++ b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx @@ -1,17 +1,23 @@ import { FC, PropsWithChildren, useEffect, useState } from "react"; +import { IconAlertTriangle } from "@tabler/icons-react"; import { useAppDispatch } from "../state/hooks"; import { updateSettings, useSettings } from "../state/settingsSlice"; import { useSuspenseAPIQuery } from "../api/api"; import { WALReplayStatus } from "../api/responseTypes/walreplay"; -import { Progress, Stack, Title } from "@mantine/core"; +import { Progress, Alert } from "@mantine/core"; import { useSuspenseQuery } from "@tanstack/react-query"; +const STATUS_STARTING = "is starting up..."; +const STATUS_STOPPING = "is shutting down..."; +const STATUS_LOADING = "is not ready..."; + const ReadinessLoader: FC = () => { - const { pathPrefix } = useSettings(); + const { pathPrefix, agentMode } = useSettings(); const dispatch = useAppDispatch(); // Query key is incremented every second to retrigger the status fetching. const [queryKey, setQueryKey] = useState(0); + const [statusMessage, setStatusMessage] = useState(""); // Query readiness status. const { data: ready } = useSuspenseQuery({ @@ -28,8 +34,16 @@ const ReadinessLoader: FC = () => { }); switch (res.status) { case 200: + setStatusMessage(""); // Clear any status message when ready. return true; case 503: + // Check the custom header `X-Prometheus-Stopping` for stopping information. + if (res.headers.get("X-Prometheus-Stopping") === "true") { + setStatusMessage(STATUS_STOPPING); + } else { + setStatusMessage(STATUS_STARTING); + } + return false; default: throw new Error(res.statusText); @@ -40,14 +54,16 @@ const ReadinessLoader: FC = () => { }, }); - // Query WAL replay status. + // Only call WAL replay status API if the service is starting up. + const shouldQueryWALReplay = statusMessage === STATUS_STARTING; + const { - data: { - data: { min, max, current }, - }, + data: walData, + isSuccess: walSuccess, } = useSuspenseAPIQuery({ path: "/status/walreplay", key: ["walreplay", queryKey], + enabled: shouldQueryWALReplay, // Only enabled when service is starting up. }); useEffect(() => { @@ -62,21 +78,28 @@ const ReadinessLoader: FC = () => { }, []); return ( - - Starting up... - {max > 0 && ( + } + maw={500} + mx="auto" + mt="lg" + > + {shouldQueryWALReplay && walSuccess && walData && ( <> -

- Replaying WAL ({current}/{max}) -

+ + Replaying WAL ({walData.data.current}/{walData.data.max}) + )} -
+ ); }; diff --git a/web/web.go b/web/web.go index b4d285108..6b0d9cd18 100644 --- a/web/web.go +++ b/web/web.go @@ -102,6 +102,14 @@ var newUIReactRouterServerPaths = []string{ "/tsdb-status", } +type ReadyStatus uint32 + +const ( + NotReady ReadyStatus = iota + Ready + Stopping +) + // withStackTrace logs the stack trace in case the request panics. The function // will re-raise the error which will then be handled by the net/http package. // It is needed because the go-kit log package doesn't manage properly the @@ -331,7 +339,7 @@ func New(logger log.Logger, o *Options) *Handler { now: model.Now, } - h.SetReady(false) + h.SetReady(NotReady) factorySPr := func(_ context.Context) api_v1.ScrapePoolsRetriever { return h.scrapeManager } factoryTr := func(_ context.Context) api_v1.TargetRetriever { return h.scrapeManager } @@ -572,30 +580,39 @@ func serveDebug(w http.ResponseWriter, req *http.Request) { } // SetReady sets the ready status of our web Handler. -func (h *Handler) SetReady(v bool) { - if v { - h.ready.Store(1) +func (h *Handler) SetReady(v ReadyStatus) { + if v == Ready { + h.ready.Store(uint32(Ready)) h.metrics.readyStatus.Set(1) return } - h.ready.Store(0) + h.ready.Store(uint32(v)) h.metrics.readyStatus.Set(0) } // Verifies whether the server is ready or not. func (h *Handler) isReady() bool { - return h.ready.Load() > 0 + return ReadyStatus(h.ready.Load()) == Ready } // Checks if server is ready, calls f if it is, returns 503 if it is not. func (h *Handler) testReady(f http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - if h.isReady() { + switch ReadyStatus(h.ready.Load()) { + case Ready: f(w, r) - } else { + case NotReady: + w.WriteHeader(http.StatusServiceUnavailable) + w.Header().Set("X-Prometheus-Stopping", "false") + fmt.Fprintf(w, "Service Unavailable") + case Stopping: + w.Header().Set("X-Prometheus-Stopping", "true") w.WriteHeader(http.StatusServiceUnavailable) fmt.Fprintf(w, "Service Unavailable") + default: + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintf(w, "Unknown state") } } } diff --git a/web/web_test.go b/web/web_test.go index b660746b1..696ba80d1 100644 --- a/web/web_test.go +++ b/web/web_test.go @@ -156,7 +156,7 @@ func TestReadyAndHealthy(t *testing.T) { cleanupTestResponse(t, resp) // Set to ready. - webHandler.SetReady(true) + webHandler.SetReady(Ready) for _, u := range []string{ baseURL + "/-/healthy", @@ -260,7 +260,7 @@ func TestRoutePrefix(t *testing.T) { cleanupTestResponse(t, resp) // Set to ready. - webHandler.SetReady(true) + webHandler.SetReady(Ready) resp, err = http.Get(baseURL + opts.RoutePrefix + "/-/healthy") require.NoError(t, err) @@ -307,7 +307,7 @@ func TestDebugHandler(t *testing.T) { }, } handler := New(nil, opts) - handler.SetReady(true) + handler.SetReady(Ready) w := httptest.NewRecorder() @@ -349,7 +349,7 @@ func TestHTTPMetrics(t *testing.T) { counter := handler.metrics.requestCounter require.Equal(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusServiceUnavailable))))) - handler.SetReady(true) + handler.SetReady(Ready) for range [2]int{} { code = getReady() require.Equal(t, http.StatusOK, code) @@ -358,7 +358,7 @@ func TestHTTPMetrics(t *testing.T) { require.Equal(t, 2, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusOK))))) require.Equal(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusServiceUnavailable))))) - handler.SetReady(false) + handler.SetReady(NotReady) for range [2]int{} { code = getReady() require.Equal(t, http.StatusServiceUnavailable, code) @@ -537,7 +537,7 @@ func TestAgentAPIEndPoints(t *testing.T) { opts.Flags = map[string]string{} webHandler := New(nil, opts) - webHandler.SetReady(true) + webHandler.SetReady(Ready) webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} l, err := webHandler.Listeners() @@ -692,7 +692,7 @@ func TestMultipleListenAddresses(t *testing.T) { time.Sleep(5 * time.Second) // Set to ready. - webHandler.SetReady(true) + webHandler.SetReady(Ready) for _, port := range []string{port1, port2} { baseURL := "http://localhost" + port