From 1b29975865a6545e2416c2134164ad9a7e8bcead Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 17 Apr 2014 12:28:50 +0200 Subject: [PATCH] Fix RWLock memory storage deadlock. This fixes https://github.com/prometheus/prometheus/issues/390 The cause for the deadlock was a lock semantic in Go that wasn't obvious to me when introducing this bug: http://golang.org/pkg/sync/#RWMutex.Lock Key phrase: "To ensure that the lock eventually becomes available, a blocked Lock call excludes new readers from acquiring the lock." In the memory series storage, we have one function (GetFingerprintsForLabelMatchers) acquiring an RLock(), which calls another function also acquiring the same RLock() (GetLabelValuesForLabelName). That normally doesn't deadlock, unless a Lock() call from another goroutine happens right in between the two RLock() calls, blocking both the Lock() and the second RLock() call from ever completing. GoRoutine 1 GoRoutine 2 ====================================== RLock() ... Lock() [DEADLOCK] RLock() [DEADLOCK] Unlock() RUnlock() RUnlock() Testing deadlocks is tricky, but the regression test I added does reliably detect the deadlock in the original code on my machine within a normal concurrent reader/writer run duration of 250ms. Change-Id: Ib34c2bb8df1a80af44550cc2bf5007055cdef413 --- storage/metric/tiered/memory.go | 6 ++- storage/metric/tiered/memory_test.go | 63 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/storage/metric/tiered/memory.go b/storage/metric/tiered/memory.go index 896c1f8e74..76b2ff1ada 100644 --- a/storage/metric/tiered/memory.go +++ b/storage/metric/tiered/memory.go @@ -367,7 +367,7 @@ func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metr } sets = append(sets, set) default: - values, err := s.GetLabelValuesForLabelName(matcher.Name) + values, err := s.getLabelValuesForLabelName(matcher.Name) if err != nil { return nil, err } @@ -414,6 +414,10 @@ func (s *memorySeriesStorage) GetLabelValuesForLabelName(labelName clientmodel.L s.RLock() defer s.RUnlock() + return s.getLabelValuesForLabelName(labelName) +} + +func (s *memorySeriesStorage) getLabelValuesForLabelName(labelName clientmodel.LabelName) (clientmodel.LabelValues, error) { set, ok := s.labelNameToLabelValues[labelName] if !ok { return nil, nil diff --git a/storage/metric/tiered/memory_test.go b/storage/metric/tiered/memory_test.go index e948d12675..19a157880e 100644 --- a/storage/metric/tiered/memory_test.go +++ b/storage/metric/tiered/memory_test.go @@ -16,6 +16,7 @@ package tiered import ( "fmt" "runtime" + "sync" "testing" "time" @@ -154,3 +155,65 @@ func TestDroppedSeriesIndexRegression(t *testing.T) { t.Fatalf("Got %d fingerprints, expected 1", len(fps)) } } + +func TestReaderWriterDeadlockRegression(t *testing.T) { + mp := runtime.GOMAXPROCS(2) + defer func(mp int) { + runtime.GOMAXPROCS(mp) + }(mp) + + s := NewMemorySeriesStorage(MemorySeriesOptions{}) + lms := metric.LabelMatchers{} + + for i := 0; i < 100; i++ { + lm, err := metric.NewLabelMatcher(metric.NotEqual, clientmodel.MetricNameLabel, "testmetric") + if err != nil { + t.Fatal(err) + } + lms = append(lms, lm) + } + + wg := sync.WaitGroup{} + wg.Add(2) + + start := time.Now() + runDuration := 250 * time.Millisecond + + writer := func() { + for time.Since(start) < runDuration { + s.AppendSamples(clientmodel.Samples{ + &clientmodel.Sample{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: "testmetric", + }, + Value: 1, + Timestamp: 0, + }, + }) + } + wg.Done() + } + + reader := func() { + for time.Since(start) < runDuration { + s.GetFingerprintsForLabelMatchers(lms) + } + wg.Done() + } + + go reader() + go writer() + + allDone := make(chan struct{}) + go func() { + wg.Wait() + allDone <- struct{}{} + }() + + select { + case <-allDone: + break + case <-time.NewTimer(5 * time.Second).C: + t.Fatalf("Deadlock timeout") + } +}