Optimize label matcher memory and runtime behavior.

This optimizes the runtime and memory allocation behavior for label matchers other than type "Equal". Instead of creating a new set for every union of fingerprints, this simply adds new fingerprints to the existing set to achieve the same effect. The current behavior made a production Prometheus unresponsive when running a NotEqual match against the "instance" label (a label with high value cardinality). BEFORE: BenchmarkGetFingerprintsForNotEqualMatcher 10 170430297 ns/op 39229944 B/op 40709 allocs/op AFTER: BenchmarkGetFingerprintsForNotEqualMatcher 5000 706260 ns/op 217717 B/op 1116 allocs/op Change-Id: Ifd78e81e7dfbf5d7249e50ad1903a5d9c42c347a
2025-03-05 20:59:13 -08:00 · 2014-05-04 01:03:18 -04:00 · 2014-05-04 01:03:18 -04:00 · 4df5c7ab18
parent 64811caaec
commit 4df5c7ab18
3 changed files with 33 additions and 32 deletions
--- a/storage/metric/tiered/memory.go
+++ b/storage/metric/tiered/memory.go
@ -403,7 +403,9 @@ func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metr
 				if !ok {
 					return nil, nil
 				}
-				set = set.Union(subset)
+				for fp := range subset {
+					set.Add(fp)
+				}
 			}
 			sets = append(sets, set)
 		}
--- a/storage/metric/tiered/memory_test.go
+++ b/storage/metric/tiered/memory_test.go
@ -38,16 +38,7 @@ func BenchmarkStreamAdd(b *testing.B) {
 	}

 	b.StartTimer()
-
-	var pre runtime.MemStats
-	runtime.ReadMemStats(&pre)
-
 	s.add(samples)
-
-	var post runtime.MemStats
-	runtime.ReadMemStats(&post)
-
-	b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
 }

 func TestStreamAdd(t *testing.T) {
@ -118,17 +109,9 @@ func benchmarkAppendSamples(b *testing.B, labels int) {
 	}

 	b.StartTimer()
-	var pre runtime.MemStats
-	runtime.ReadMemStats(&pre)
-
 	for i := 0; i < b.N; i++ {
 		s.AppendSample(samples[i])
 	}
-
-	var post runtime.MemStats
-	runtime.ReadMemStats(&post)
-
-	b.Logf("%d cycles with %f bytes per cycle, totalling %d", b.N, float32(post.TotalAlloc-pre.TotalAlloc)/float32(b.N), post.TotalAlloc-pre.TotalAlloc)
 }

 func BenchmarkAppendSample1(b *testing.B) {
@ -267,3 +250,33 @@ func TestReaderWriterDeadlockRegression(t *testing.T) {
 		t.Fatalf("Deadlock timeout")
 	}
 }
+
+func BenchmarkGetFingerprintsForNotEqualMatcher1000(b *testing.B) {
+	numSeries := 1000
+	samples := make(clientmodel.Samples, 0, numSeries)
+	for i := 0; i < numSeries; i++ {
+		samples = append(samples, &clientmodel.Sample{
+			Metric: clientmodel.Metric{
+				clientmodel.MetricNameLabel: "testmetric",
+				"instance":                  clientmodel.LabelValue(fmt.Sprint("instance_", i)),
+			},
+			Value:     1,
+			Timestamp: clientmodel.TimestampFromTime(time.Date(2000, 0, 0, 0, 0, 0, 0, time.UTC)),
+		})
+	}
+
+	s := NewMemorySeriesStorage(MemorySeriesOptions{})
+	if err := s.AppendSamples(samples); err != nil {
+		b.Fatal(err)
+	}
+
+	m, err := metric.NewLabelMatcher(metric.NotEqual, "instance", "foo")
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		s.GetFingerprintsForLabelMatchers(metric.LabelMatchers{m})
+	}
+}
--- a/utility/set.go
+++ b/utility/set.go
@ -56,17 +56,3 @@ func (s Set) Intersection(o Set) Set {

 	return result
 }
-
-// Union returns a new set with all items in both sets.
-func (s Set) Union(o Set) Set {
-	result := Set{}
-
-	for k := range s {
-		result[k] = struct{}{}
-	}
-	for k := range o {
-		result[k] = struct{}{}
-	}
-
-	return result
-}