mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-24 21:24:05 -08:00
Add prometheus_ready metric (#10682)
When Prometheus starts it can take a long time before WAL is replayed and it can do anything useful. While it's starting it exposes metrics and other Prometheus servers can scrape it. We do have alerts that fire if any Prometheus server is not ingesting samples and so far we've been dealing with instances that are starting for a long time by adding a check on Prometheus process uptime. Relying on uptime isn't ideal because the time needed to start depends on the number of metrics scraped, and so on the amount of data in WAL. To help write better alerts it would be great if Prometheus exposed a metric that tells us it's fully started, that way any alert that suppose to notify us about any runtime issue can filter out starting instances. Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
parent
8a01943abc
commit
070e409dba
|
@ -86,6 +86,10 @@ var (
|
|||
Name: "prometheus_config_last_reload_success_timestamp_seconds",
|
||||
Help: "Timestamp of the last successful configuration reload.",
|
||||
})
|
||||
readyStatus = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "prometheus_ready",
|
||||
Help: "Whether Prometheus startup was fully completed and the server is ready for normal operation.",
|
||||
})
|
||||
|
||||
defaultRetentionString = "15d"
|
||||
defaultRetentionDuration model.Duration
|
||||
|
@ -752,6 +756,7 @@ func main() {
|
|||
|
||||
prometheus.MustRegister(configSuccess)
|
||||
prometheus.MustRegister(configSuccessTime)
|
||||
prometheus.MustRegister(readyStatus)
|
||||
|
||||
// Start all components while we wait for TSDB to open but only load
|
||||
// initial config and mark ourselves as ready after it completed.
|
||||
|
@ -946,6 +951,7 @@ func main() {
|
|||
|
||||
webHandler.Ready()
|
||||
level.Info(logger).Log("msg", "Server is ready to receive web requests.")
|
||||
readyStatus.Set(1)
|
||||
<-cancel
|
||||
return nil
|
||||
},
|
||||
|
|
Loading…
Reference in a new issue