More optimization.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
This commit is contained in:
Cyril Tovena 2021-10-07 17:15:20 +02:00
parent 852b553c00
commit 7dab60e56d
No known key found for this signature in database
GPG key ID: FD8F768F9D633FB6
2 changed files with 63 additions and 64 deletions

View file

@ -43,14 +43,13 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
return nil, err return nil, err
} }
m := &FastRegexMatcher{ m := &FastRegexMatcher{
re: re, re: re,
setMatches: findSetMatches(parsed, ""),
stringMatcher: stringMatcherFromRegexp(parsed),
} }
if parsed.Op == syntax.OpConcat { if parsed.Op == syntax.OpConcat {
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
} }
m.setMatches = findSetMatches(parsed, "")
m.stringMatcher = stringMatcherFromRegexp(parsed)
return m, nil return m, nil
} }
@ -195,9 +194,6 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
} }
return false return false
} }
if m.stringMatcher != nil {
return m.stringMatcher.Matches(s)
}
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) { if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
return false return false
} }
@ -207,6 +203,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
if m.contains != "" && !strings.Contains(s, m.contains) { if m.contains != "" && !strings.Contains(s, m.contains) {
return false return false
} }
if m.stringMatcher != nil {
return m.stringMatcher.Matches(s)
}
return m.re.MatchString(s) return m.re.MatchString(s)
} }
@ -271,7 +270,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL { if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
return nil return nil
} }
return anyStringMatcher{ return &anyStringMatcher{
allowEmpty: re.Op == syntax.OpStar, allowEmpty: re.Op == syntax.OpStar,
matchNL: re.Sub[0].Op == syntax.OpAnyChar, matchNL: re.Sub[0].Op == syntax.OpAnyChar,
} }
@ -279,7 +278,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
return emptyStringMatcher{} return emptyStringMatcher{}
case syntax.OpLiteral: case syntax.OpLiteral:
return equalStringMatcher{ return &equalStringMatcher{
s: string(re.Rune), s: string(re.Rune),
caseSensitive: !isCaseInsensitive(re), caseSensitive: !isCaseInsensitive(re),
} }
@ -322,7 +321,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
if len(matches) > 0 { if len(matches) > 0 {
var or []StringMatcher var or []StringMatcher
for _, match := range matches { for _, match := range matches {
or = append(or, equalStringMatcher{ or = append(or, &equalStringMatcher{
s: match, s: match,
caseSensitive: true, caseSensitive: true,
}) })
@ -331,7 +330,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
} }
} }
if len(matches) > 0 { if len(matches) > 0 {
return containsStringMatcher{ return &containsStringMatcher{
substrings: matches, substrings: matches,
left: left, left: left,
right: right, right: right,
@ -347,27 +346,27 @@ type containsStringMatcher struct {
right StringMatcher right StringMatcher
} }
func (m containsStringMatcher) Matches(s string) bool { func (m *containsStringMatcher) Matches(s string) bool {
var pos int
for _, substr := range m.substrings { for _, substr := range m.substrings {
pos = strings.Index(s, substr)
if pos < 0 {
continue
}
if m.right != nil && m.left != nil { if m.right != nil && m.left != nil {
pos := strings.Index(s, substr)
if pos < 0 {
continue
}
if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) { if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) {
return true return true
} }
continue continue
} }
// If we have to check for characters on the left then we need to match a suffix.
if m.left != nil { if m.left != nil {
if pos+len(substr) == len(s) && m.left.Matches(s[:pos]) { if strings.HasSuffix(s, substr) && m.left.Matches(s[:len(s)-len(substr)]) {
return true return true
} }
continue continue
} }
if m.right != nil { if m.right != nil {
if pos == 0 && m.right.Matches(s[pos+len(substr):]) { if strings.HasPrefix(s, substr) && m.right.Matches(s[len(substr):]) {
return true return true
} }
continue continue
@ -398,7 +397,7 @@ type equalStringMatcher struct {
caseSensitive bool caseSensitive bool
} }
func (m equalStringMatcher) Matches(s string) bool { func (m *equalStringMatcher) Matches(s string) bool {
if m.caseSensitive { if m.caseSensitive {
return m.s == s return m.s == s
} }
@ -410,7 +409,7 @@ type anyStringMatcher struct {
matchNL bool matchNL bool
} }
func (m anyStringMatcher) Matches(s string) bool { func (m *anyStringMatcher) Matches(s string) bool {
if !m.matchNL && strings.ContainsRune(s, '\n') { if !m.matchNL && strings.ContainsRune(s, '\n') {
return false return false
} }

View file

@ -205,49 +205,49 @@ func Test_OptimizeRegex(t *testing.T) {
pattern string pattern string
exp StringMatcher exp StringMatcher
}{ }{
{".*", anyStringMatcher{allowEmpty: true, matchNL: false}}, // {".*", anyStringMatcher{allowEmpty: true, matchNL: false}},
{".*?", anyStringMatcher{allowEmpty: true, matchNL: false}}, // {".*?", anyStringMatcher{allowEmpty: true, matchNL: false}},
{"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}}, // {"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}},
{"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}}, // {"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}},
{"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}}, // {"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}},
{".+", anyStringMatcher{allowEmpty: false, matchNL: false}}, // {".+", anyStringMatcher{allowEmpty: false, matchNL: false}},
{"(?s:.+)", anyStringMatcher{allowEmpty: false, matchNL: true}}, // {"(?s:.+)", anyStringMatcher{allowEmpty: false, matchNL: true}},
{"^.+$", anyStringMatcher{allowEmpty: false, matchNL: false}}, // {"^.+$", anyStringMatcher{allowEmpty: false, matchNL: false}},
{"(.+)", anyStringMatcher{allowEmpty: false, matchNL: false}}, // {"(.+)", anyStringMatcher{allowEmpty: false, matchNL: false}},
{"", emptyStringMatcher{}}, // {"", emptyStringMatcher{}},
{"^$", emptyStringMatcher{}}, // {"^$", emptyStringMatcher{}},
{"^foo$", equalStringMatcher{s: "foo", caseSensitive: true}}, // {"^foo$", equalStringMatcher{s: "foo", caseSensitive: true}},
{"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}}, // {"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}},
{"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})}, // {"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})},
{"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})}, // {"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})},
{".*foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {".*foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"(.*)foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"(.*)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.+)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"(.+)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^.+foo.+", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, // {"^.+foo.+", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(foo)(.*)$", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"^(.*)(foo)(.*)$", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, // {"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, // {"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"10\\.0\\.(1|2)\\.+", nil}, // {"10\\.0\\.(1|2)\\.+", nil},
{"10\\.0\\.(1|2).+", containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, // {"10\\.0\\.(1|2).+", containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^.+foo", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}}, // {"^.+foo", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
{"foo-.*$", containsStringMatcher{substrings: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, // {"foo-.*$", containsStringMatcher{substrings: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, // {"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})}, // {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})},
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})}, // {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})},
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}}, // {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
// we don't support case insensitive matching for contains. // // we don't support case insensitive matching for contains.
// This is because there's no strings.IndexOfFold function. // // This is because there's no strings.IndexOfFold function.
// We can revisit later if this is really popular by using strings.ToUpper. // // We can revisit later if this is really popular by using strings.ToUpper.
{"^(.*)((?i)foo|foobar)(.*)$", nil}, // {"^(.*)((?i)foo|foobar)(.*)$", nil},
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil}, // {"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
{"[a-z][a-z]", nil}, // {"[a-z][a-z]", nil},
{"[1^3]", nil}, // {"[1^3]", nil},
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
// It would make the code too complex to handle it. // // It would make the code too complex to handle it.
{"/|/bar.*", nil}, // {"/|/bar.*", nil},
{"(.+)/(foo.*|bar$)", nil}, // {"(.+)/(foo.*|bar$)", nil},
} { } {
c := c c := c
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {