prometheus/storage/metric/tiered/memory_test.go
Julius Volz 1b29975865 Fix RWLock memory storage deadlock.
This fixes https://github.com/prometheus/prometheus/issues/390

The cause for the deadlock was a lock semantic in Go that wasn't
obvious to me when introducing this bug:

http://golang.org/pkg/sync/#RWMutex.Lock

Key phrase: "To ensure that the lock eventually becomes available, a
blocked Lock call excludes new readers from acquiring the lock."

In the memory series storage, we have one function
(GetFingerprintsForLabelMatchers) acquiring an RLock(), which calls
another function also acquiring the same RLock()
(GetLabelValuesForLabelName). That normally doesn't deadlock, unless a
Lock() call from another goroutine happens right in between the two
RLock() calls, blocking both the Lock() and the second RLock() call from
ever completing.

  GoRoutine 1          GoRoutine 2
  ======================================
  RLock()
  ...                  Lock() [DEADLOCK]
  RLock() [DEADLOCK]   Unlock()
  RUnlock()
  RUnlock()

Testing deadlocks is tricky, but the regression test I added does
reliably detect the deadlock in the original code on my machine within a
normal concurrent reader/writer run duration of 250ms.

Change-Id: Ib34c2bb8df1a80af44550cc2bf5007055cdef413
2014-04-17 13:43:13 +02:00

220 lines
5.6 KiB
Go

// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tiered
import (
"fmt"
"runtime"
"sync"
"testing"
"time"
clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/storage/metric"
)
func BenchmarkStreamAdd(b *testing.B) {
b.StopTimer()
s := newArrayStream(clientmodel.Metric{})
samples := make(metric.Values, b.N)
for i := 0; i < b.N; i++ {
samples = append(samples, metric.SamplePair{
Timestamp: clientmodel.TimestampFromTime(time.Date(i, 0, 0, 0, 0, 0, 0, time.UTC)),
Value: clientmodel.SampleValue(i),
})
}
b.StartTimer()
var pre runtime.MemStats
runtime.ReadMemStats(&pre)
s.add(samples)
var post runtime.MemStats
runtime.ReadMemStats(&post)
b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
}
func benchmarkAppendSamples(b *testing.B, labels int) {
b.StopTimer()
s := NewMemorySeriesStorage(MemorySeriesOptions{})
metric := clientmodel.Metric{}
for i := 0; i < labels; i++ {
metric[clientmodel.LabelName(fmt.Sprintf("label_%d", i))] = clientmodel.LabelValue(fmt.Sprintf("value_%d", i))
}
samples := make(clientmodel.Samples, 0, b.N)
for i := 0; i < b.N; i++ {
samples = append(samples, &clientmodel.Sample{
Metric: metric,
Value: clientmodel.SampleValue(i),
Timestamp: clientmodel.TimestampFromTime(time.Date(i, 0, 0, 0, 0, 0, 0, time.UTC)),
})
}
b.StartTimer()
var pre runtime.MemStats
runtime.ReadMemStats(&pre)
for i := 0; i < b.N; i++ {
s.AppendSample(samples[i])
}
var post runtime.MemStats
runtime.ReadMemStats(&post)
b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
}
func BenchmarkAppendSample1(b *testing.B) {
benchmarkAppendSamples(b, 1)
}
func BenchmarkAppendSample10(b *testing.B) {
benchmarkAppendSamples(b, 10)
}
func BenchmarkAppendSample100(b *testing.B) {
benchmarkAppendSamples(b, 100)
}
func BenchmarkAppendSample1000(b *testing.B) {
benchmarkAppendSamples(b, 1000)
}
// Regression test for https://github.com/prometheus/prometheus/issues/381.
//
// 1. Creates samples for two timeseries with one common labelpair.
// 2. Flushes memory storage such that only one series is dropped from memory.
// 3. Gets fingerprints for common labelpair.
// 4. Checks that exactly one fingerprint remains.
func TestDroppedSeriesIndexRegression(t *testing.T) {
samples := clientmodel.Samples{
&clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: "testmetric",
"different": "differentvalue1",
"common": "samevalue",
},
Value: 1,
Timestamp: clientmodel.TimestampFromTime(time.Date(2000, 0, 0, 0, 0, 0, 0, time.UTC)),
},
&clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: "testmetric",
"different": "differentvalue2",
"common": "samevalue",
},
Value: 2,
Timestamp: clientmodel.TimestampFromTime(time.Date(2002, 0, 0, 0, 0, 0, 0, time.UTC)),
},
}
s := NewMemorySeriesStorage(MemorySeriesOptions{})
s.AppendSamples(samples)
common := clientmodel.LabelSet{"common": "samevalue"}
fps, err := s.GetFingerprintsForLabelMatchers(labelMatchersFromLabelSet(common))
if err != nil {
t.Fatal(err)
}
if len(fps) != 2 {
t.Fatalf("Got %d fingerprints, expected 2", len(fps))
}
toDisk := make(chan clientmodel.Samples, 2)
s.Flush(clientmodel.TimestampFromTime(time.Date(2001, 0, 0, 0, 0, 0, 0, time.UTC)), toDisk)
if len(toDisk) != 1 {
t.Fatalf("Got %d disk sample lists, expected 1", len(toDisk))
}
diskSamples := <-toDisk
if len(diskSamples) != 1 {
t.Fatalf("Got %d disk samples, expected 1", len(diskSamples))
}
fps, err = s.GetFingerprintsForLabelMatchers(labelMatchersFromLabelSet(common))
if err != nil {
t.Fatal(err)
}
if len(fps) != 1 {
t.Fatalf("Got %d fingerprints, expected 1", len(fps))
}
}
func TestReaderWriterDeadlockRegression(t *testing.T) {
mp := runtime.GOMAXPROCS(2)
defer func(mp int) {
runtime.GOMAXPROCS(mp)
}(mp)
s := NewMemorySeriesStorage(MemorySeriesOptions{})
lms := metric.LabelMatchers{}
for i := 0; i < 100; i++ {
lm, err := metric.NewLabelMatcher(metric.NotEqual, clientmodel.MetricNameLabel, "testmetric")
if err != nil {
t.Fatal(err)
}
lms = append(lms, lm)
}
wg := sync.WaitGroup{}
wg.Add(2)
start := time.Now()
runDuration := 250 * time.Millisecond
writer := func() {
for time.Since(start) < runDuration {
s.AppendSamples(clientmodel.Samples{
&clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: "testmetric",
},
Value: 1,
Timestamp: 0,
},
})
}
wg.Done()
}
reader := func() {
for time.Since(start) < runDuration {
s.GetFingerprintsForLabelMatchers(lms)
}
wg.Done()
}
go reader()
go writer()
allDone := make(chan struct{})
go func() {
wg.Wait()
allDone <- struct{}{}
}()
select {
case <-allDone:
break
case <-time.NewTimer(5 * time.Second).C:
t.Fatalf("Deadlock timeout")
}
}