Fixes the code and improve tests.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
This commit is contained in:
Cyril Tovena 2021-10-07 15:10:26 +02:00
parent 3c13a5e9bd
commit 852b553c00
No known key found for this signature in database
GPG key ID: FD8F768F9D633FB6
2 changed files with 51 additions and 42 deletions

View file

@ -25,6 +25,7 @@ type FastRegexMatcher struct {
re *regexp.Regexp re *regexp.Regexp
setMatches []string setMatches []string
stringMatcher StringMatcher
prefix string prefix string
suffix string suffix string
contains string contains string
@ -44,6 +45,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
m := &FastRegexMatcher{ m := &FastRegexMatcher{
re: re, re: re,
setMatches: findSetMatches(parsed, ""), setMatches: findSetMatches(parsed, ""),
stringMatcher: stringMatcherFromRegexp(parsed),
} }
if parsed.Op == syntax.OpConcat { if parsed.Op == syntax.OpConcat {
@ -193,6 +195,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
} }
return false return false
} }
if m.stringMatcher != nil {
return m.stringMatcher.Matches(s)
}
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) { if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
return false return false
} }
@ -262,18 +267,17 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
clearCapture(re) clearCapture(re)
clearBeginEndText(re) clearBeginEndText(re)
switch re.Op { switch re.Op {
case syntax.OpStar: case syntax.OpPlus, syntax.OpStar:
if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
return nil
}
return anyStringMatcher{ return anyStringMatcher{
allowEmpty: true, allowEmpty: re.Op == syntax.OpStar,
matchNL: re.Flags&syntax.DotNL != 0, matchNL: re.Sub[0].Op == syntax.OpAnyChar,
} }
case syntax.OpEmptyMatch: case syntax.OpEmptyMatch:
return emptyStringMatcher{} return emptyStringMatcher{}
case syntax.OpPlus:
return anyStringMatcher{
allowEmpty: false,
matchNL: re.Flags&syntax.DotNL != 0,
}
case syntax.OpLiteral: case syntax.OpLiteral:
return equalStringMatcher{ return equalStringMatcher{
s: string(re.Rune), s: string(re.Rune),
@ -328,7 +332,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
} }
if len(matches) > 0 { if len(matches) > 0 {
return containsStringMatcher{ return containsStringMatcher{
substr: matches, substrings: matches,
left: left, left: left,
right: right, right: right,
} }
@ -338,32 +342,32 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
} }
type containsStringMatcher struct { type containsStringMatcher struct {
substr []string substrings []string
left StringMatcher left StringMatcher
right StringMatcher right StringMatcher
} }
func (m containsStringMatcher) Matches(s string) bool { func (m containsStringMatcher) Matches(s string) bool {
var pos int var pos int
for _, substr := range m.substr { for _, substr := range m.substrings {
pos = strings.Index(s, substr) pos = strings.Index(s, substr)
if pos < 0 { if pos < 0 {
continue continue
} }
if m.right != nil && m.left != nil { if m.right != nil && m.left != nil {
if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(m.substr):]) { if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) {
return true return true
} }
continue continue
} }
if m.left != nil { if m.left != nil {
if m.left.Matches(s[:pos]) { if pos+len(substr) == len(s) && m.left.Matches(s[:pos]) {
return true return true
} }
continue continue
} }
if m.right != nil { if m.right != nil {
if m.right.Matches(s[pos+len(m.substr):]) { if pos == 0 && m.right.Matches(s[pos+len(substr):]) {
return true return true
} }
continue continue
@ -395,7 +399,7 @@ type equalStringMatcher struct {
} }
func (m equalStringMatcher) Matches(s string) bool { func (m equalStringMatcher) Matches(s string) bool {
if !m.caseSensitive { if m.caseSensitive {
return m.s == s return m.s == s
} }
return strings.EqualFold(m.s, s) return strings.EqualFold(m.s, s)

View file

@ -48,14 +48,17 @@ var (
"(?s:.*)", "(?s:.*)",
"(?s:.+)", "(?s:.+)",
"(?s:^.*foo$)", "(?s:^.*foo$)",
"^(?i:foo|oo)|(bar)$",
"((.*)(bar|b|buzz)(.+)|foo)$", "((.*)(bar|b|buzz)(.+)|foo)$",
"^$", "^$",
"(prometheus|api_prom)_api_v1_.+", "(prometheus|api_prom)_api_v1_.+",
"10\\.0\\.(1|2)\\.+", "10\\.0\\.(1|2)\\.+",
"10\\.0\\.(1|2).+",
"((fo(bar))|.+foo)",
} }
values = []string{ values = []string{
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "", "foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
"\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
} }
) )
@ -203,6 +206,7 @@ func Test_OptimizeRegex(t *testing.T) {
exp StringMatcher exp StringMatcher
}{ }{
{".*", anyStringMatcher{allowEmpty: true, matchNL: false}}, {".*", anyStringMatcher{allowEmpty: true, matchNL: false}},
{".*?", anyStringMatcher{allowEmpty: true, matchNL: false}},
{"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}}, {"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}},
{"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}}, {"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}},
{"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}}, {"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}},
@ -216,22 +220,23 @@ func Test_OptimizeRegex(t *testing.T) {
{"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}}, {"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}},
{"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})}, {"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})},
{"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})}, {"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})},
{".*foo.*", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {".*foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo.*", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"(.*)foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo(.*)", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"(.*)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.+)foo(.*)", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"(.+)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^.+foo.+", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, {"^.+foo.+", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(foo)(.*)$", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"^(.*)(foo)(.*)$", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substr: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substr: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, {"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substr: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, {"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"10\\.0\\.(1|2)\\.+", containsStringMatcher{substr: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, {"10\\.0\\.(1|2)\\.+", nil},
{"^.+foo", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}}, {"10\\.0\\.(1|2).+", containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"foo-.*$", containsStringMatcher{substr: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}}, {"^.+foo", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
{"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substr: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}}, {"foo-.*$", containsStringMatcher{substrings: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substr: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})}, {"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})}, {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})},
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", containsStringMatcher{substr: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}}, {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})},
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
// we don't support case insensitive matching for contains. // we don't support case insensitive matching for contains.
// This is because there's no strings.IndexOfFold function. // This is because there's no strings.IndexOfFold function.
// We can revisit later if this is really popular by using strings.ToUpper. // We can revisit later if this is really popular by using strings.ToUpper.