mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Clean up quitting behavior and add quit trigger.
The closing of Prometheus now using a sync.Once wrapper to prevent any accidental multiple invocations of it, which could trigger corruption or a race condition. The shutdown process is made more verbose through logging. A not-enabled by default web handler has been provided to trigger a remote shutdown if requested for debugging purposes. Change-Id: If4fee75196bbff1fb1e4a4ef7e1cfa53fef88f2e
This commit is contained in:
parent
58ef638e72
commit
2064f32662
57
main.go
57
main.go
|
@ -17,6 +17,7 @@ import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -68,6 +69,8 @@ var (
|
||||||
concurrentRetrievalAllowance = flag.Int("concurrentRetrievalAllowance", 15, "The number of concurrent metrics retrieval requests allowed.")
|
concurrentRetrievalAllowance = flag.Int("concurrentRetrievalAllowance", 15, "The number of concurrent metrics retrieval requests allowed.")
|
||||||
|
|
||||||
printVersion = flag.Bool("version", false, "print version information")
|
printVersion = flag.Bool("version", false, "print version information")
|
||||||
|
|
||||||
|
shutdownTimeout = flag.Duration("shutdownGracePeriod", 0*time.Second, "The amount of time Prometheus gives background services to finish running when shutdown is requested.")
|
||||||
)
|
)
|
||||||
|
|
||||||
type prometheus struct {
|
type prometheus struct {
|
||||||
|
@ -86,6 +89,8 @@ type prometheus struct {
|
||||||
remoteTSDBQueue *remote.TSDBQueueManager
|
remoteTSDBQueue *remote.TSDBQueueManager
|
||||||
|
|
||||||
curationState metric.CurationStateUpdater
|
curationState metric.CurationStateUpdater
|
||||||
|
|
||||||
|
closeOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *prometheus) interruptHandler() {
|
func (p *prometheus) interruptHandler() {
|
||||||
|
@ -95,7 +100,9 @@ func (p *prometheus) interruptHandler() {
|
||||||
<-notifier
|
<-notifier
|
||||||
|
|
||||||
glog.Warning("Received SIGINT/SIGTERM; Exiting gracefully...")
|
glog.Warning("Received SIGINT/SIGTERM; Exiting gracefully...")
|
||||||
p.close()
|
|
||||||
|
p.Close()
|
||||||
|
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,7 +173,23 @@ func (p *prometheus) delete(olderThan time.Duration, batchSize int) error {
|
||||||
return curator.Run(olderThan, clientmodel.Now(), processor, p.storage.DiskStorage.CurationRemarks, p.storage.DiskStorage.MetricSamples, p.storage.DiskStorage.MetricHighWatermarks, p.curationState)
|
return curator.Run(olderThan, clientmodel.Now(), processor, p.storage.DiskStorage.CurationRemarks, p.storage.DiskStorage.MetricSamples, p.storage.DiskStorage.MetricHighWatermarks, p.curationState)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *prometheus) Close() {
|
||||||
|
p.closeOnce.Do(p.close)
|
||||||
|
}
|
||||||
|
|
||||||
func (p *prometheus) close() {
|
func (p *prometheus) close() {
|
||||||
|
// The "Done" remarks are a misnomer for some subsystems due to lack of
|
||||||
|
// blocking and synchronization.
|
||||||
|
glog.Info("Shutdown has been requested; subsytems are closing:")
|
||||||
|
p.targetManager.Stop()
|
||||||
|
glog.Info("Remote Target Manager: Done")
|
||||||
|
p.ruleManager.Stop()
|
||||||
|
glog.Info("Rule Executor: Done")
|
||||||
|
|
||||||
|
// Stop any currently active curation (deletion or compaction).
|
||||||
|
close(p.stopBackgroundOperations)
|
||||||
|
glog.Info("Current Curation Workers: Requested")
|
||||||
|
|
||||||
// Disallow further curation work.
|
// Disallow further curation work.
|
||||||
close(p.curationSema)
|
close(p.curationSema)
|
||||||
|
|
||||||
|
@ -177,21 +200,27 @@ func (p *prometheus) close() {
|
||||||
if p.deletionTimer != nil {
|
if p.deletionTimer != nil {
|
||||||
p.deletionTimer.Stop()
|
p.deletionTimer.Stop()
|
||||||
}
|
}
|
||||||
|
glog.Info("Future Curation Workers: Done")
|
||||||
|
|
||||||
// Stop any currently active curation (deletion or compaction).
|
glog.Infof("Waiting %s for background systems to exit and flush before finalizing (DO NOT INTERRUPT THE PROCESS) ...", *shutdownTimeout)
|
||||||
close(p.stopBackgroundOperations)
|
|
||||||
|
// Wart: We should have a concrete form of synchronization for this, not a
|
||||||
|
// hokey sleep statement.
|
||||||
|
time.Sleep(*shutdownTimeout)
|
||||||
|
|
||||||
p.ruleManager.Stop()
|
|
||||||
p.targetManager.Stop()
|
|
||||||
close(p.unwrittenSamples)
|
close(p.unwrittenSamples)
|
||||||
|
|
||||||
p.storage.Close()
|
p.storage.Close()
|
||||||
|
glog.Info("Local Storage: Done")
|
||||||
|
|
||||||
if p.remoteTSDBQueue != nil {
|
if p.remoteTSDBQueue != nil {
|
||||||
p.remoteTSDBQueue.Close()
|
p.remoteTSDBQueue.Close()
|
||||||
|
glog.Info("Remote Storage: Done")
|
||||||
}
|
}
|
||||||
|
|
||||||
close(p.notifications)
|
close(p.notifications)
|
||||||
|
glog.Info("Sundry Queues: Done")
|
||||||
|
glog.Info("See you next time!")
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -288,13 +317,6 @@ func main() {
|
||||||
Storage: ts,
|
Storage: ts,
|
||||||
}
|
}
|
||||||
|
|
||||||
webService := &web.WebService{
|
|
||||||
StatusHandler: prometheusStatus,
|
|
||||||
MetricsHandler: metricsService,
|
|
||||||
DatabasesHandler: databasesHandler,
|
|
||||||
AlertsHandler: alertsHandler,
|
|
||||||
}
|
|
||||||
|
|
||||||
prometheus := &prometheus{
|
prometheus := &prometheus{
|
||||||
compactionTimer: compactionTimer,
|
compactionTimer: compactionTimer,
|
||||||
|
|
||||||
|
@ -313,7 +335,16 @@ func main() {
|
||||||
storage: ts,
|
storage: ts,
|
||||||
remoteTSDBQueue: remoteTSDBQueue,
|
remoteTSDBQueue: remoteTSDBQueue,
|
||||||
}
|
}
|
||||||
defer prometheus.close()
|
defer prometheus.Close()
|
||||||
|
|
||||||
|
webService := &web.WebService{
|
||||||
|
StatusHandler: prometheusStatus,
|
||||||
|
MetricsHandler: metricsService,
|
||||||
|
DatabasesHandler: databasesHandler,
|
||||||
|
AlertsHandler: alertsHandler,
|
||||||
|
|
||||||
|
QuitDelegate: prometheus.Close,
|
||||||
|
}
|
||||||
|
|
||||||
prometheus.curationSema <- struct{}{}
|
prometheus.curationSema <- struct{}{}
|
||||||
|
|
||||||
|
|
19
web/web.go
19
web/web.go
|
@ -38,6 +38,7 @@ var (
|
||||||
listenAddress = flag.String("listenAddress", ":9090", "Address to listen on for web interface.")
|
listenAddress = flag.String("listenAddress", ":9090", "Address to listen on for web interface.")
|
||||||
useLocalAssets = flag.Bool("useLocalAssets", false, "Read assets/templates from file instead of binary.")
|
useLocalAssets = flag.Bool("useLocalAssets", false, "Read assets/templates from file instead of binary.")
|
||||||
userAssetsPath = flag.String("userAssets", "", "Path to static asset directory, available at /user")
|
userAssetsPath = flag.String("userAssets", "", "Path to static asset directory, available at /user")
|
||||||
|
enableQuit = flag.Bool("web.enableRemoteShutdown", false, "Enable remote service shutdown")
|
||||||
)
|
)
|
||||||
|
|
||||||
type WebService struct {
|
type WebService struct {
|
||||||
|
@ -45,6 +46,8 @@ type WebService struct {
|
||||||
DatabasesHandler *DatabasesHandler
|
DatabasesHandler *DatabasesHandler
|
||||||
MetricsHandler *api.MetricsService
|
MetricsHandler *api.MetricsService
|
||||||
AlertsHandler *AlertsHandler
|
AlertsHandler *AlertsHandler
|
||||||
|
|
||||||
|
QuitDelegate func()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w WebService) ServeForever() error {
|
func (w WebService) ServeForever() error {
|
||||||
|
@ -77,11 +80,27 @@ func (w WebService) ServeForever() error {
|
||||||
exp.Handle("/user/", http.StripPrefix("/user/", http.FileServer(http.Dir(*userAssetsPath))))
|
exp.Handle("/user/", http.StripPrefix("/user/", http.FileServer(http.Dir(*userAssetsPath))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if *enableQuit {
|
||||||
|
exp.HandleFunc("/-/quit", w.quitHandler)
|
||||||
|
}
|
||||||
|
|
||||||
glog.Info("listening on ", *listenAddress)
|
glog.Info("listening on ", *listenAddress)
|
||||||
|
|
||||||
return http.ListenAndServe(*listenAddress, exp.DefaultCoarseMux)
|
return http.ListenAndServe(*listenAddress, exp.DefaultCoarseMux)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s WebService) quitHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != "POST" {
|
||||||
|
w.Header().Add("Allow", "POST")
|
||||||
|
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "Requesting termination... Goodbye!")
|
||||||
|
|
||||||
|
s.QuitDelegate()
|
||||||
|
}
|
||||||
|
|
||||||
func getLocalTemplate(name string) (*template.Template, error) {
|
func getLocalTemplate(name string) (*template.Template, error) {
|
||||||
return template.ParseFiles(
|
return template.ParseFiles(
|
||||||
"web/templates/_base.html",
|
"web/templates/_base.html",
|
||||||
|
|
Loading…
Reference in a new issue