Cache optimized regexp matchers (#465)

* Cache optimized regexp matchers

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added BenchmarkNewFastRegexMatcher_CacheMisses

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Improved benchmark

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Improved benchmark

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Use LRU cache v2

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Run gofumpt

Signed-off-by: Marco Pracucci <marco@pracucci.com>

---------

Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
Marco Pracucci 2023-03-31 04:05:26 +02:00 committed by GitHub
parent 174b26a5c0
commit 05a3a79015
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 70 additions and 3 deletions

1
go.mod
View file

@ -28,6 +28,7 @@ require (
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.20.0
github.com/hashicorp/golang-lru/v2 v2.0.2
github.com/hashicorp/nomad/api v0.0.0-20230308192510-48e7d70fcd4b
github.com/hetznercloud/hcloud-go v1.41.0
github.com/ionos-cloud/sdk-go/v6 v6.1.4

2
go.sum
View file

@ -444,6 +444,8 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=

View file

@ -18,6 +18,7 @@ import (
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
lru "github.com/hashicorp/golang-lru/v2"
)
const (
@ -29,6 +30,14 @@ const (
optimizeEqualStringMatchersThreshold = 16
)
var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher]
func init() {
// Ignore error because it can only return error if size is invalid,
// but we're using an hardcoded size here.
fastRegexMatcherCache, _ = lru.New[string, *FastRegexMatcher](10000)
}
type FastRegexMatcher struct {
re *regexp.Regexp
@ -43,6 +52,24 @@ type FastRegexMatcher struct {
}
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
// Check the cache.
if matcher, ok := fastRegexMatcherCache.Get(v); ok {
return matcher, nil
}
// Create a new matcher.
matcher, err := newFastRegexMatcherWithoutCache(v)
if err != nil {
return nil, err
}
// Cache it.
fastRegexMatcherCache.Add(v, matcher)
return matcher, nil
}
func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) {
parsed, err := syntax.Parse(v, syntax.Perl)
if err != nil {
return nil, err

View file

@ -18,6 +18,7 @@ import (
"fmt"
"math/rand"
"os"
"strconv"
"strings"
"testing"
"time"
@ -89,10 +90,46 @@ func TestNewFastRegexMatcher(t *testing.T) {
}
func BenchmarkNewFastRegexMatcher(b *testing.B) {
for _, r := range regexes {
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
runBenchmark := func(newFunc func(v string) (*FastRegexMatcher, error)) func(b *testing.B) {
return func(b *testing.B) {
for _, r := range regexes {
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
for n := 0; n < b.N; n++ {
_, err := newFunc(r)
if err != nil {
b.Fatal(err)
}
}
})
}
}
}
b.Run("with cache", runBenchmark(NewFastRegexMatcher))
b.Run("without cache", runBenchmark(newFastRegexMatcherWithoutCache))
}
func BenchmarkNewFastRegexMatcher_CacheMisses(b *testing.B) {
// Init the random seed with a constant, so that it doesn't change between runs.
randGenerator := rand.New(rand.NewSource(1))
tests := map[string]string{
"simple regexp": randString(randGenerator, 10),
"complex regexp": strings.Join(randStrings(randGenerator, 100, 10), "|"),
}
for testName, regexpPrefix := range tests {
b.Run(testName, func(b *testing.B) {
// Ensure the cache is empty.
fastRegexMatcherCache.Purge()
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := NewFastRegexMatcher(r)
// Unique regexp to emulate 100% cache misses.
regexp := regexpPrefix + strconv.Itoa(n)
_, err := NewFastRegexMatcher(regexp)
if err != nil {
b.Fatal(err)
}