mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Optimized very long case insensitive alternations (#444)
* Optimized very long case insensitive alternations Signed-off-by: Marco Pracucci <marco@pracucci.com> * Run common regexps in BenchmarkFastRegexMatcher Signed-off-by: Marco Pracucci <marco@pracucci.com> * Modify BenchmarkNewFastRegexMatcher to benchmark the NewFastRegexMatcher() function Signed-off-by: Marco Pracucci <marco@pracucci.com> * Reduced allocations by optimizeEqualStringMatchers() Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed typo in comments Signed-off-by: Marco Pracucci <marco@pracucci.com> * Fixed typo in test case name Signed-off-by: Marco Pracucci <marco@pracucci.com> --------- Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
parent
383ea59ce1
commit
1e7ad0ec11
|
@ -20,7 +20,14 @@ import (
|
||||||
"github.com/grafana/regexp/syntax"
|
"github.com/grafana/regexp/syntax"
|
||||||
)
|
)
|
||||||
|
|
||||||
const maxSetMatches = 256
|
const (
|
||||||
|
maxSetMatches = 256
|
||||||
|
|
||||||
|
// The minimum number of alternate values a regex should have to trigger
|
||||||
|
// the optimization done by optimizeEqualStringMatchers(). This value has
|
||||||
|
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
||||||
|
optimizeEqualStringMatchersThreshold = 16
|
||||||
|
)
|
||||||
|
|
||||||
type FastRegexMatcher struct {
|
type FastRegexMatcher struct {
|
||||||
re *regexp.Regexp
|
re *regexp.Regexp
|
||||||
|
@ -326,7 +333,10 @@ type StringMatcher interface {
|
||||||
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||||
clearBeginEndText(re)
|
clearBeginEndText(re)
|
||||||
|
|
||||||
return stringMatcherFromRegexpInternal(re)
|
m := stringMatcherFromRegexpInternal(re)
|
||||||
|
m = optimizeEqualStringMatchers(m, optimizeEqualStringMatchersThreshold)
|
||||||
|
|
||||||
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
||||||
|
@ -503,6 +513,24 @@ func (m *equalStringMatcher) Matches(s string) bool {
|
||||||
return strings.EqualFold(m.s, s)
|
return strings.EqualFold(m.s, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// equalMultiStringMatcher matches a string exactly against a set of valid values.
|
||||||
|
type equalMultiStringMatcher struct {
|
||||||
|
// values to match a string against. If the matching is case insensitive,
|
||||||
|
// the values here must be lowercase.
|
||||||
|
values map[string]struct{}
|
||||||
|
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMatcher) Matches(s string) bool {
|
||||||
|
if !m.caseSensitive {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, ok := m.values[s]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
// anyStringMatcher is a matcher that matches any string.
|
// anyStringMatcher is a matcher that matches any string.
|
||||||
// It is used for the + and * operator. matchNL tells if it should matches newlines or not.
|
// It is used for the + and * operator. matchNL tells if it should matches newlines or not.
|
||||||
type anyStringMatcher struct {
|
type anyStringMatcher struct {
|
||||||
|
@ -519,3 +547,92 @@ func (m *anyStringMatcher) Matches(s string) bool {
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
|
||||||
|
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
|
||||||
|
// this specific case, when we have many strings to match against we can use a map instead
|
||||||
|
// of iterating over the list of strings.
|
||||||
|
func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
|
||||||
|
var (
|
||||||
|
caseSensitive bool
|
||||||
|
caseSensitiveSet bool
|
||||||
|
numValues int
|
||||||
|
)
|
||||||
|
|
||||||
|
// Analyse the input StringMatcher to count the number of occurrences
|
||||||
|
// and ensure all of them have the same case sensitivity.
|
||||||
|
analyseCallback := func(matcher *equalStringMatcher) bool {
|
||||||
|
// Ensure we don't have mixed case sensitivity.
|
||||||
|
if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
|
||||||
|
return false
|
||||||
|
} else if !caseSensitiveSet {
|
||||||
|
caseSensitive = matcher.caseSensitive
|
||||||
|
caseSensitiveSet = true
|
||||||
|
}
|
||||||
|
|
||||||
|
numValues++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if !findEqualStringMatchers(input, analyseCallback) {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the number of values found is less than the threshold, then we should skip the optimization.
|
||||||
|
if numValues < threshold {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse again the input StringMatcher to extract all values and storing them.
|
||||||
|
// We can skip the case sensitivity check because we've already checked it and
|
||||||
|
// if the code reach this point then it means all matchers have the same case sensitivity.
|
||||||
|
values := make(map[string]struct{}, numValues)
|
||||||
|
|
||||||
|
// Ignore the return value because we already iterated over the input StringMatcher
|
||||||
|
// and it was all good.
|
||||||
|
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
||||||
|
if caseSensitive {
|
||||||
|
values[matcher.s] = struct{}{}
|
||||||
|
} else {
|
||||||
|
values[strings.ToLower(matcher.s)] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
return &equalMultiStringMatcher{
|
||||||
|
values: values,
|
||||||
|
caseSensitive: caseSensitive,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
|
||||||
|
// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
|
||||||
|
// composed by an alternation of equalStringMatcher.
|
||||||
|
func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
|
||||||
|
orInput, ok := input.(orStringMatcher)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, m := range orInput {
|
||||||
|
switch casted := m.(type) {
|
||||||
|
case orStringMatcher:
|
||||||
|
if !findEqualStringMatchers(m, callback) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
case *equalStringMatcher:
|
||||||
|
if !callback(casted) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
// It's not an equal string matcher, so we have to stop searching
|
||||||
|
// cause this optimization can't be applied.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ package labels
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -33,6 +34,8 @@ func init() {
|
||||||
var (
|
var (
|
||||||
letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
regexes = []string{
|
regexes = []string{
|
||||||
|
"foo",
|
||||||
|
"^foo",
|
||||||
"(foo|bar)",
|
"(foo|bar)",
|
||||||
"foo.*",
|
"foo.*",
|
||||||
".*foo",
|
".*foo",
|
||||||
|
@ -46,17 +49,24 @@ var (
|
||||||
"foo\n.*",
|
"foo\n.*",
|
||||||
".*foo.*",
|
".*foo.*",
|
||||||
".+foo.+",
|
".+foo.+",
|
||||||
"",
|
|
||||||
"(?s:.*)",
|
"(?s:.*)",
|
||||||
"(?s:.+)",
|
"(?s:.+)",
|
||||||
"(?s:^.*foo$)",
|
"(?s:^.*foo$)",
|
||||||
|
"(?i:foo)",
|
||||||
|
"(?i:(foo|bar))",
|
||||||
|
"(?i:(foo1|foo2|bar))",
|
||||||
"^(?i:foo|oo)|(bar)$",
|
"^(?i:foo|oo)|(bar)$",
|
||||||
|
"(?i:(foo1|foo2|aaa|bbb|ccc|ddd|eee|fff|ggg|hhh|iii|lll|mmm|nnn|ooo|ppp|qqq|rrr|sss|ttt|uuu|vvv|www|xxx|yyy|zzz))",
|
||||||
"((.*)(bar|b|buzz)(.+)|foo)$",
|
"((.*)(bar|b|buzz)(.+)|foo)$",
|
||||||
"^$",
|
"^$",
|
||||||
"(prometheus|api_prom)_api_v1_.+",
|
"(prometheus|api_prom)_api_v1_.+",
|
||||||
"10\\.0\\.(1|2)\\.+",
|
"10\\.0\\.(1|2)\\.+",
|
||||||
"10\\.0\\.(1|2).+",
|
"10\\.0\\.(1|2).+",
|
||||||
"((fo(bar))|.+foo)",
|
"((fo(bar))|.+foo)",
|
||||||
|
// A long case sensitive alternation.
|
||||||
|
"zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb",
|
||||||
|
// A long case insensitive alternation.
|
||||||
|
"(?i:(zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb))",
|
||||||
}
|
}
|
||||||
values = []string{
|
values = []string{
|
||||||
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
||||||
|
@ -83,27 +93,15 @@ func TestNewFastRegexMatcher(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkNewFastRegexMatcher(b *testing.B) {
|
func BenchmarkNewFastRegexMatcher(b *testing.B) {
|
||||||
benchValues := values
|
|
||||||
for _, v := range values {
|
|
||||||
for i := 5; i < 50; i = i + 5 {
|
|
||||||
benchValues = append(benchValues, v+RandStringRunes(i))
|
|
||||||
benchValues = append(benchValues, RandStringRunes(i)+v+RandStringRunes(i))
|
|
||||||
benchValues = append(benchValues, RandStringRunes(i)+v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, r := range regexes {
|
for _, r := range regexes {
|
||||||
r := r
|
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
|
||||||
b.Run(r, func(b *testing.B) {
|
for n := 0; n < b.N; n++ {
|
||||||
m, err := NewFastRegexMatcher(r)
|
_, err := NewFastRegexMatcher(r)
|
||||||
require.NoError(b, err)
|
if err != nil {
|
||||||
b.ResetTimer()
|
b.Fatal(err)
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
for _, v := range benchValues {
|
|
||||||
_ = m.MatchString(v)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,29 +230,8 @@ func BenchmarkFastRegexMatcher(b *testing.B) {
|
||||||
y = "foo" + x
|
y = "foo" + x
|
||||||
z = x + "foo"
|
z = x + "foo"
|
||||||
)
|
)
|
||||||
regexes := []string{
|
|
||||||
"foo",
|
|
||||||
"^foo",
|
|
||||||
"(foo|bar)",
|
|
||||||
"foo.*",
|
|
||||||
".*foo",
|
|
||||||
"^.*foo$",
|
|
||||||
"^.+foo$",
|
|
||||||
".*",
|
|
||||||
".+",
|
|
||||||
"foo.+",
|
|
||||||
".+foo",
|
|
||||||
".*foo.*",
|
|
||||||
"(?i:foo)",
|
|
||||||
"(?i:(foo|bar))",
|
|
||||||
"(?i:(foo1|foo2|bar))",
|
|
||||||
"(?i:(foo1|foo2|aaa|bbb|ccc|ddd|eee|fff|ggg|hhh|iii|lll|mmm|nnn|ooo|ppp|qqq|rrr|sss|ttt|uuu|vvv|www|xxx|yyy|zzz))",
|
|
||||||
"(prometheus|api_prom)_api_v1_.+",
|
|
||||||
"((fo(bar))|.+foo)",
|
|
||||||
}
|
|
||||||
for _, r := range regexes {
|
for _, r := range regexes {
|
||||||
r := r
|
b.Run(getTestNameFromRegexp(r), func(b *testing.B) {
|
||||||
b.Run(r, func(b *testing.B) {
|
|
||||||
m, err := NewFastRegexMatcher(r)
|
m, err := NewFastRegexMatcher(r)
|
||||||
require.NoError(b, err)
|
require.NoError(b, err)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
|
@ -331,14 +308,22 @@ func Test_OptimizeRegex(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func RandStringRunes(n int) string {
|
func randString(length int) string {
|
||||||
b := make([]rune, n)
|
b := make([]rune, length)
|
||||||
for i := range b {
|
for i := range b {
|
||||||
b[i] = letterRunes[rand.Intn(len(letterRunes))]
|
b[i] = letterRunes[rand.Intn(len(letterRunes))]
|
||||||
}
|
}
|
||||||
return string(b)
|
return string(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func randStrings(many, length int) []string {
|
||||||
|
out := make([]string, 0, many)
|
||||||
|
for i := 0; i < many; i++ {
|
||||||
|
out = append(out, randString(length))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func FuzzFastRegexMatcher_WithStaticallyDefinedRegularExpressions(f *testing.F) {
|
func FuzzFastRegexMatcher_WithStaticallyDefinedRegularExpressions(f *testing.F) {
|
||||||
// Create all matchers.
|
// Create all matchers.
|
||||||
matchers := make([]*FastRegexMatcher, 0, len(regexes))
|
matchers := make([]*FastRegexMatcher, 0, len(regexes))
|
||||||
|
@ -428,3 +413,163 @@ func TestAnalyzeRealQueries(t *testing.T) {
|
||||||
|
|
||||||
t.Logf("Found %d (%.2f%%) optimized matchers out of %d", numOptimized, (float64(numOptimized)/float64(numChecked))*100, numChecked)
|
t.Logf("Found %d (%.2f%%) optimized matchers out of %d", numOptimized, (float64(numOptimized)/float64(numChecked))*100, numChecked)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOptimizeEqualStringMatchers(t *testing.T) {
|
||||||
|
tests := map[string]struct {
|
||||||
|
input StringMatcher
|
||||||
|
expectedValues map[string]struct{}
|
||||||
|
expectedCaseSensitive bool
|
||||||
|
}{
|
||||||
|
"should skip optimization on orStringMatcher with containsStringMatcher": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
&containsStringMatcher{substrings: []string{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
expectedValues: nil,
|
||||||
|
},
|
||||||
|
"should run optimization on orStringMatcher with equalStringMatcher and same case sensitivity": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
||||||
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
||||||
|
},
|
||||||
|
expectedValues: map[string]struct{}{
|
||||||
|
"FOO": {},
|
||||||
|
"bar": {},
|
||||||
|
"baz": {},
|
||||||
|
},
|
||||||
|
expectedCaseSensitive: true,
|
||||||
|
},
|
||||||
|
"should skip optimization on orStringMatcher with equalStringMatcher but different case sensitivity": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
&equalStringMatcher{s: "bar", caseSensitive: false},
|
||||||
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
||||||
|
},
|
||||||
|
expectedValues: nil,
|
||||||
|
},
|
||||||
|
"should run optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, and same case sensitivity": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
||||||
|
&equalStringMatcher{s: "xxx", caseSensitive: true},
|
||||||
|
},
|
||||||
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
||||||
|
},
|
||||||
|
expectedValues: map[string]struct{}{
|
||||||
|
"FOO": {},
|
||||||
|
"bar": {},
|
||||||
|
"xxx": {},
|
||||||
|
"baz": {},
|
||||||
|
},
|
||||||
|
expectedCaseSensitive: true,
|
||||||
|
},
|
||||||
|
"should skip optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, but different case sensitivity": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
orStringMatcher{
|
||||||
|
// Case sensitivity is different within items at the same level.
|
||||||
|
&equalStringMatcher{s: "bar", caseSensitive: true},
|
||||||
|
&equalStringMatcher{s: "xxx", caseSensitive: false},
|
||||||
|
},
|
||||||
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
||||||
|
},
|
||||||
|
expectedValues: nil,
|
||||||
|
},
|
||||||
|
"should skip optimization on orStringMatcher with nested orStringMatcher and equalStringMatcher, but different case sensitivity in the nested one": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: true},
|
||||||
|
// Case sensitivity is different between the parent and child.
|
||||||
|
orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "bar", caseSensitive: false},
|
||||||
|
&equalStringMatcher{s: "xxx", caseSensitive: false},
|
||||||
|
},
|
||||||
|
&equalStringMatcher{s: "baz", caseSensitive: true},
|
||||||
|
},
|
||||||
|
expectedValues: nil,
|
||||||
|
},
|
||||||
|
"should return lowercase values on case insensitive matchers": {
|
||||||
|
input: orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "FOO", caseSensitive: false},
|
||||||
|
orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "bAr", caseSensitive: false},
|
||||||
|
},
|
||||||
|
&equalStringMatcher{s: "baZ", caseSensitive: false},
|
||||||
|
},
|
||||||
|
expectedValues: map[string]struct{}{
|
||||||
|
"foo": {},
|
||||||
|
"bar": {},
|
||||||
|
"baz": {},
|
||||||
|
},
|
||||||
|
expectedCaseSensitive: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for testName, testData := range tests {
|
||||||
|
t.Run(testName, func(t *testing.T) {
|
||||||
|
actualMatcher := optimizeEqualStringMatchers(testData.input, 0)
|
||||||
|
|
||||||
|
if testData.expectedValues == nil {
|
||||||
|
require.IsType(t, testData.input, actualMatcher)
|
||||||
|
} else {
|
||||||
|
require.IsType(t, &equalMultiStringMatcher{}, actualMatcher)
|
||||||
|
require.Equal(t, testData.expectedValues, actualMatcher.(*equalMultiStringMatcher).values)
|
||||||
|
require.Equal(t, testData.expectedCaseSensitive, actualMatcher.(*equalMultiStringMatcher).caseSensitive)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This benchmark is used to find a good threshold to use to apply the optimization
|
||||||
|
// done by optimizeEqualStringMatchers()
|
||||||
|
func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
|
// Generate variable lengths random texts to match against.
|
||||||
|
texts := append([]string{}, randStrings(10, 10)...)
|
||||||
|
texts = append(texts, randStrings(5, 30)...)
|
||||||
|
texts = append(texts, randStrings(1, 100)...)
|
||||||
|
|
||||||
|
for numAlternations := 2; numAlternations <= 256; numAlternations *= 2 {
|
||||||
|
for _, caseSensitive := range []bool{true, false} {
|
||||||
|
b.Run(fmt.Sprintf("alternations: %d case sensitive: %t", numAlternations, caseSensitive), func(b *testing.B) {
|
||||||
|
// Generate a regex with the expected number of alternations.
|
||||||
|
re := strings.Join(randStrings(numAlternations, 10), "|")
|
||||||
|
if !caseSensitive {
|
||||||
|
re = "(?i:(" + re + "))"
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed, err := syntax.Parse(re, syntax.Perl)
|
||||||
|
require.NoError(b, err)
|
||||||
|
|
||||||
|
unoptimized := stringMatcherFromRegexpInternal(parsed)
|
||||||
|
require.IsType(b, orStringMatcher{}, unoptimized)
|
||||||
|
|
||||||
|
optimized := optimizeEqualStringMatchers(unoptimized, 0)
|
||||||
|
require.IsType(b, &equalMultiStringMatcher{}, optimized)
|
||||||
|
|
||||||
|
b.Run("without optimizeEqualStringMatchers()", func(b *testing.B) {
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
for _, t := range texts {
|
||||||
|
unoptimized.Matches(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("with optimizeEqualStringMatchers()", func(b *testing.B) {
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
for _, t := range texts {
|
||||||
|
optimized.Matches(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTestNameFromRegexp(re string) string {
|
||||||
|
if len(re) > 32 {
|
||||||
|
return re[:32]
|
||||||
|
}
|
||||||
|
return re
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue