Merge pull request #577 from grafana/fix-regexp-optimization-with-double-capture-group

Fix FastRegexMatcher to skip nested capture groups
This commit is contained in:
Marco Pracucci 2023-12-20 13:57:36 +01:00 committed by GitHub
commit 12d2c1038b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 1 deletions

View file

@ -284,7 +284,8 @@ func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []stri
// clearCapture removes capture operation as they are not used for matching.
func clearCapture(regs ...*syntax.Regexp) {
for _, r := range regs {
if r.Op == syntax.OpCapture {
// Iterate on the regexp because capture groups could be nested.
for r.Op == syntax.OpCapture {
*r = *r.Sub[0]
}
}

View file

@ -286,6 +286,8 @@ func TestFindSetMatches(t *testing.T) {
// Simple sets alternate and concat and alternates with empty matches
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
{"bar|b|buzz", []string{"bar", "b", "buzz"}, true},
// Skip nested capture groups.
{"^((bar|b|buzz))$", []string{"bar", "b", "buzz"}, true},
// Skip outer anchors (it's enforced anyway at the root).
{"^(bar|b|buzz)$", []string{"bar", "b", "buzz"}, true},
{"^(?:prod|production)$", []string{"prod", "production"}, true},
@ -395,6 +397,8 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}},
{"^(?i:foo)$", &equalStringMatcher{s: "FOO", caseSensitive: false}},
{"^((?i:foo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
{`(?i:((foo|bar)))`, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
{"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}},