From 05a3a7901590965129a154e1767b735eebffb708 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 31 Mar 2023 04:05:26 +0200 Subject: [PATCH] Cache optimized regexp matchers (#465) * Cache optimized regexp matchers Signed-off-by: Marco Pracucci * Added BenchmarkNewFastRegexMatcher_CacheMisses Signed-off-by: Marco Pracucci * Improved benchmark Signed-off-by: Marco Pracucci * Improved benchmark Signed-off-by: Marco Pracucci * Use LRU cache v2 Signed-off-by: Marco Pracucci * Run gofumpt Signed-off-by: Marco Pracucci --------- Signed-off-by: Marco Pracucci --- go.mod | 1 + go.sum | 2 ++ model/labels/regexp.go | 27 +++++++++++++++++++++++ model/labels/regexp_test.go | 43 ++++++++++++++++++++++++++++++++++--- 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index edad464114..be208069fc 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/hashicorp/consul/api v1.20.0 + github.com/hashicorp/golang-lru/v2 v2.0.2 github.com/hashicorp/nomad/api v0.0.0-20230308192510-48e7d70fcd4b github.com/hetznercloud/hcloud-go v1.41.0 github.com/ionos-cloud/sdk-go/v6 v6.1.4 diff --git a/go.sum b/go.sum index 2414991588..4ace51e8f8 100644 --- a/go.sum +++ b/go.sum @@ -444,6 +444,8 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4= github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU= +github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 8173f7e421..5c0d760286 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -18,6 +18,7 @@ import ( "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" + lru "github.com/hashicorp/golang-lru/v2" ) const ( @@ -29,6 +30,14 @@ const ( optimizeEqualStringMatchersThreshold = 16 ) +var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher] + +func init() { + // Ignore error because it can only return error if size is invalid, + // but we're using an hardcoded size here. + fastRegexMatcherCache, _ = lru.New[string, *FastRegexMatcher](10000) +} + type FastRegexMatcher struct { re *regexp.Regexp @@ -43,6 +52,24 @@ type FastRegexMatcher struct { } func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { + // Check the cache. + if matcher, ok := fastRegexMatcherCache.Get(v); ok { + return matcher, nil + } + + // Create a new matcher. + matcher, err := newFastRegexMatcherWithoutCache(v) + if err != nil { + return nil, err + } + + // Cache it. + fastRegexMatcherCache.Add(v, matcher) + + return matcher, nil +} + +func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) { parsed, err := syntax.Parse(v, syntax.Perl) if err != nil { return nil, err diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index ead22a741a..9ebe10ee21 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -18,6 +18,7 @@ import ( "fmt" "math/rand" "os" + "strconv" "strings" "testing" "time" @@ -89,10 +90,46 @@ func TestNewFastRegexMatcher(t *testing.T) { } func BenchmarkNewFastRegexMatcher(b *testing.B) { - for _, r := range regexes { - b.Run(getTestNameFromRegexp(r), func(b *testing.B) { + runBenchmark := func(newFunc func(v string) (*FastRegexMatcher, error)) func(b *testing.B) { + return func(b *testing.B) { + for _, r := range regexes { + b.Run(getTestNameFromRegexp(r), func(b *testing.B) { + for n := 0; n < b.N; n++ { + _, err := newFunc(r) + if err != nil { + b.Fatal(err) + } + } + }) + } + } + } + + b.Run("with cache", runBenchmark(NewFastRegexMatcher)) + b.Run("without cache", runBenchmark(newFastRegexMatcherWithoutCache)) +} + +func BenchmarkNewFastRegexMatcher_CacheMisses(b *testing.B) { + // Init the random seed with a constant, so that it doesn't change between runs. + randGenerator := rand.New(rand.NewSource(1)) + + tests := map[string]string{ + "simple regexp": randString(randGenerator, 10), + "complex regexp": strings.Join(randStrings(randGenerator, 100, 10), "|"), + } + + for testName, regexpPrefix := range tests { + b.Run(testName, func(b *testing.B) { + // Ensure the cache is empty. + fastRegexMatcherCache.Purge() + + b.ResetTimer() + for n := 0; n < b.N; n++ { - _, err := NewFastRegexMatcher(r) + // Unique regexp to emulate 100% cache misses. + regexp := regexpPrefix + strconv.Itoa(n) + + _, err := NewFastRegexMatcher(regexp) if err != nil { b.Fatal(err) }