From 68e8b80ffe5f7271861034c92125a3aefee992f8 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Wed, 4 Jul 2018 13:41:16 +0100 Subject: [PATCH] Reorder startup and shutdown to prevent panics. (#4321) Start rule manager only after tsdb and config is loaded. Stop rule manager before tsdb to avoid writing to closed storage. Wait for any in-progress reloads to complete before shutting down rule manager, so that rule manager doesn't get updated after being shut down. Remove incorrect comment around shutting down query enginge. Log when config reload is completed. Fixes #4133 Fixes #4262 Signed-off-by: Brian Brazil --- cmd/prometheus/main.go | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index a5985d5434..fc9e707b9f 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -472,7 +472,9 @@ func main() { }, func(err error) { - close(cancel) + // Wait for any in-progress reloads to complete to avoid + // reloading things after they have been shutdown. + cancel <- struct{}{} }, ) } @@ -506,6 +508,23 @@ func main() { }, ) } + { + // Rule manager. + // TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel. + cancel := make(chan struct{}) + g.Add( + func() error { + <-reloadReady.C + ruleManager.Run() + <-cancel + return nil + }, + func(err error) { + ruleManager.Stop() + close(cancel) + }, + ) + } { // TSDB. cancel := make(chan struct{}) @@ -547,30 +566,10 @@ func main() { return nil }, func(err error) { - // Keep this interrupt before the ruleManager.Stop(). - // Shutting down the query engine before the rule manager will cause pending queries - // to be canceled and ensures a quick shutdown of the rule manager. cancelWeb() }, ) } - { - // Rule manager. - - // TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel. - cancel := make(chan struct{}) - g.Add( - func() error { - ruleManager.Run() - <-cancel - return nil - }, - func(err error) { - ruleManager.Stop() - close(cancel) - }, - ) - } { // Notifier. @@ -627,6 +626,7 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config if failed { return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%s)", filename) } + level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename) return nil }