From 983a8eae4f796e75a582c201ecc4ec7de09c087b Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Thu, 12 Nov 2020 16:06:25 +0000 Subject: [PATCH 1/2] Protect sp.loops from concurrent access. (#8176) Manager.reload takes the mutex that would make it safe, however releases it before the goroutines spawned are finished with it. Thus more explicit locking of scrapePool.Sync/stop/reload is needed. Signed-off-by: Brian Brazil --- scrape/scrape.go | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index f6aa49810..bd89b69f6 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -191,23 +191,20 @@ func init() { type scrapePool struct { appendable storage.Appendable logger log.Logger + cancel context.CancelFunc + + // mtx must not be taken after targetMtx. + mtx sync.Mutex + config *config.ScrapeConfig + client *http.Client + loops map[uint64]loop + targetLimitHit bool // Internal state to speed up the target_limit checks. - // targetMtx protects activeTargets and droppedTargets from concurrent reads - // and writes. Only one of Sync/stop/reload may be called at once due to - // manager.mtxScrape so we only need to protect from concurrent reads from - // the ActiveTargets and DroppedTargets methods. This allows those two - // methods to always complete without having to wait on scrape loops to gracefull stop. targetMtx sync.Mutex - - config *config.ScrapeConfig - client *http.Client - // Targets and loops must always be synchronized to have the same + // activeTargets and loops must always be synchronized to have the same // set of hashes. activeTargets map[uint64]*Target droppedTargets []*Target - loops map[uint64]loop - cancel context.CancelFunc - targetLimitHit bool // Internal state to speed up the target_limit checks. // Constructor for new scrape loops. This is settable for testing convenience. newLoop func(scrapeLoopOptions) loop @@ -297,6 +294,8 @@ func (sp *scrapePool) DroppedTargets() []*Target { // stop terminates all scrape loops and returns after they all terminated. func (sp *scrapePool) stop() { + sp.mtx.Lock() + defer sp.mtx.Unlock() sp.cancel() var wg sync.WaitGroup @@ -331,6 +330,8 @@ func (sp *scrapePool) stop() { // but all scrape loops are restarted with the new scrape configuration. // This method returns after all scrape loops that were stopped have stopped scraping. func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { + sp.mtx.Lock() + defer sp.mtx.Unlock() targetScrapePoolReloads.Inc() start := time.Now() @@ -407,6 +408,8 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { // Sync converts target groups into actual scrape targets and synchronizes // the currently running scraper with the resulting set and returns all scraped and dropped targets. func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { + sp.mtx.Lock() + defer sp.mtx.Unlock() start := time.Now() sp.targetMtx.Lock() From 84b186b3fd7f23939c6b1b232366ea35e92f88b0 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Mon, 16 Nov 2020 10:48:30 +0100 Subject: [PATCH 2/2] *: Cut v2.22.2 Signed-off-by: Frederic Branczyk --- CHANGELOG.md | 4 ++++ VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f61d38c..ff8cf8d08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.22.2 / 2020-11-16 + +* [BUGFIX] Fix race condition in syncing/stopping/reloading scrapers. #8176 + ## 2.22.1 / 2020-11-03 * [BUGFIX] Fix potential "mmap: invalid argument" errors in loading the head chunks, after an unclean shutdown, by performing read repairs. #8061 diff --git a/VERSION b/VERSION index d93847fab..acbef7695 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.22.1 +2.22.2