From 4df5c7ab18cf5e2ba697bcec69ac0efb4657a74e Mon Sep 17 00:00:00 2001
From: Julius Volz <julius@soundcloud.com>
Date: Sun, 4 May 2014 01:03:18 -0400
Subject: [PATCH] Optimize label matcher memory and runtime behavior.

This optimizes the runtime and memory allocation behavior for label matchers
other than type "Equal". Instead of creating a new set for every union of
fingerprints, this simply adds new fingerprints to the existing set to achieve
the same effect.

The current behavior made a production Prometheus unresponsive when running a
NotEqual match against the "instance" label (a label with high value
cardinality).

BEFORE:
BenchmarkGetFingerprintsForNotEqualMatcher        10   170430297 ns/op  39229944 B/op    40709 allocs/op

AFTER:
BenchmarkGetFingerprintsForNotEqualMatcher      5000      706260 ns/op    217717 B/op     1116 allocs/op

Change-Id: Ifd78e81e7dfbf5d7249e50ad1903a5d9c42c347a
---
 storage/metric/tiered/memory.go      |  4 ++-
 storage/metric/tiered/memory_test.go | 47 ++++++++++++++++++----------
 utility/set.go                       | 14 ---------
 3 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/storage/metric/tiered/memory.go b/storage/metric/tiered/memory.go
index 2015a35147..9b636bb889 100644
--- a/storage/metric/tiered/memory.go
+++ b/storage/metric/tiered/memory.go
@@ -403,7 +403,9 @@ func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metr
 				if !ok {
 					return nil, nil
 				}
-				set = set.Union(subset)
+				for fp := range subset {
+					set.Add(fp)
+				}
 			}
 			sets = append(sets, set)
 		}
diff --git a/storage/metric/tiered/memory_test.go b/storage/metric/tiered/memory_test.go
index 1d45cbca43..b3b52c8fe9 100644
--- a/storage/metric/tiered/memory_test.go
+++ b/storage/metric/tiered/memory_test.go
@@ -38,16 +38,7 @@ func BenchmarkStreamAdd(b *testing.B) {
 	}
 
 	b.StartTimer()
-
-	var pre runtime.MemStats
-	runtime.ReadMemStats(&pre)
-
 	s.add(samples)
-
-	var post runtime.MemStats
-	runtime.ReadMemStats(&post)
-
-	b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
 }
 
 func TestStreamAdd(t *testing.T) {
@@ -118,17 +109,9 @@ func benchmarkAppendSamples(b *testing.B, labels int) {
 	}
 
 	b.StartTimer()
-	var pre runtime.MemStats
-	runtime.ReadMemStats(&pre)
-
 	for i := 0; i < b.N; i++ {
 		s.AppendSample(samples[i])
 	}
-
-	var post runtime.MemStats
-	runtime.ReadMemStats(&post)
-
-	b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
 }
 
 func BenchmarkAppendSample1(b *testing.B) {
@@ -267,3 +250,33 @@ func TestReaderWriterDeadlockRegression(t *testing.T) {
 		t.Fatalf("Deadlock timeout")
 	}
 }
+
+func BenchmarkGetFingerprintsForNotEqualMatcher1000(b *testing.B) {
+	numSeries := 1000
+	samples := make(clientmodel.Samples, 0, numSeries)
+	for i := 0; i < numSeries; i++ {
+		samples = append(samples, &clientmodel.Sample{
+			Metric: clientmodel.Metric{
+				clientmodel.MetricNameLabel: "testmetric",
+				"instance":                  clientmodel.LabelValue(fmt.Sprint("instance_", i)),
+			},
+			Value:     1,
+			Timestamp: clientmodel.TimestampFromTime(time.Date(2000, 0, 0, 0, 0, 0, 0, time.UTC)),
+		})
+	}
+
+	s := NewMemorySeriesStorage(MemorySeriesOptions{})
+	if err := s.AppendSamples(samples); err != nil {
+		b.Fatal(err)
+	}
+
+	m, err := metric.NewLabelMatcher(metric.NotEqual, "instance", "foo")
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		s.GetFingerprintsForLabelMatchers(metric.LabelMatchers{m})
+	}
+}
diff --git a/utility/set.go b/utility/set.go
index eae156891b..437368fe9f 100644
--- a/utility/set.go
+++ b/utility/set.go
@@ -56,17 +56,3 @@ func (s Set) Intersection(o Set) Set {
 
 	return result
 }
-
-// Union returns a new set with all items in both sets.
-func (s Set) Union(o Set) Set {
-	result := Set{}
-
-	for k := range s {
-		result[k] = struct{}{}
-	}
-	for k := range o {
-		result[k] = struct{}{}
-	}
-
-	return result
-}