Add more comments.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
This commit is contained in:
Cyril Tovena 2021-10-11 10:10:51 +02:00
parent 9c33f392e6
commit 5c5b38712f
No known key found for this signature in database
GPG key ID: FD8F768F9D633FB6
2 changed files with 19 additions and 1 deletions

View file

@ -258,10 +258,14 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
return return
} }
// StringMatcher is a matcher that matches a string in place of a regular expression.
type StringMatcher interface { type StringMatcher interface {
Matches(s string) bool Matches(s string) bool
} }
// stringMatcherFromRegexp attempts to replace a common regexp with a string matcher.
// It returns nil if the regexp is not supported.
// For examples, it will replace `.*foo` with `foo.*` and `.*foo.*` with `(?i)foo`.
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher { func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
clearCapture(re) clearCapture(re)
clearBeginEndText(re) clearBeginEndText(re)
@ -301,7 +305,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
return stringMatcherFromRegexp(re.Sub[0]) return stringMatcherFromRegexp(re.Sub[0])
} }
var left, right StringMatcher var left, right StringMatcher
// Let's try to find if there's a first and last any matchers.
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar { if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar {
left = stringMatcherFromRegexp(re.Sub[0]) left = stringMatcherFromRegexp(re.Sub[0])
if left == nil { if left == nil {
@ -318,6 +322,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
} }
matches := findSetMatches(re, "") matches := findSetMatches(re, "")
if left == nil && right == nil { if left == nil && right == nil {
// if there's no any matchers on both side it's a concat of literals
if len(matches) > 0 { if len(matches) > 0 {
var or []StringMatcher var or []StringMatcher
for _, match := range matches { for _, match := range matches {
@ -329,6 +334,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
return orStringMatcher(or) return orStringMatcher(or)
} }
} }
// others we found literals in the middle.
if len(matches) > 0 { if len(matches) > 0 {
return &containsStringMatcher{ return &containsStringMatcher{
substrings: matches, substrings: matches,
@ -340,6 +346,10 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
return nil return nil
} }
// containsStringMatcher matches a string if it contains any of the substrings.
// if left and right are not nil, it's a contains operation where left and right must match any strings.
// if left is nil, it's a hasPrefix operation and right must match any strings.
// Finally if right is nil it's a hasSuffix operation and left must match any strings.
type containsStringMatcher struct { type containsStringMatcher struct {
substrings []string substrings []string
left StringMatcher left StringMatcher
@ -375,12 +385,14 @@ func (m *containsStringMatcher) Matches(s string) bool {
return false return false
} }
// emptyStringMatcher matches an empty string.
type emptyStringMatcher struct{} type emptyStringMatcher struct{}
func (m emptyStringMatcher) Matches(s string) bool { func (m emptyStringMatcher) Matches(s string) bool {
return len(s) == 0 return len(s) == 0
} }
// orStringMatcher matches any of the sub-matchers.
type orStringMatcher []StringMatcher type orStringMatcher []StringMatcher
func (m orStringMatcher) Matches(s string) bool { func (m orStringMatcher) Matches(s string) bool {
@ -392,6 +404,7 @@ func (m orStringMatcher) Matches(s string) bool {
return false return false
} }
// equalStringMatcher matches a string exactly and support case insensitive.
type equalStringMatcher struct { type equalStringMatcher struct {
s string s string
caseSensitive bool caseSensitive bool
@ -404,6 +417,8 @@ func (m *equalStringMatcher) Matches(s string) bool {
return strings.EqualFold(m.s, s) return strings.EqualFold(m.s, s)
} }
// anyStringMatcher is a matcher that matches any string.
// It is used for the + and * operator. matchNL tells if it should matches newlines or not.
type anyStringMatcher struct { type anyStringMatcher struct {
allowEmpty bool allowEmpty bool
matchNL bool matchNL bool

View file

@ -250,6 +250,9 @@ func Test_OptimizeRegex(t *testing.T) {
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil}, {"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
{"[a-z][a-z]", nil}, {"[a-z][a-z]", nil},
{"[1^3]", nil}, {"[1^3]", nil},
{".*foo.*bar.*", nil},
{`\d*`, nil},
{".", nil},
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
// It would make the code too complex to handle it. // It would make the code too complex to handle it.
{"/|/bar.*", nil}, {"/|/bar.*", nil},