mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-21 18:50:52 -08:00
Do not optimize regexps with begin/end text anchors inside (#433)
* Do not optimize regexps with being/end text anchors inside Signed-off-by: Marco Pracucci <marco@pracucci.com> * Explicit case for begin/end text in stringMatcherFromRegexpInternal() Signed-off-by: Marco Pracucci <marco@pracucci.com> * Added more test cases Signed-off-by: Marco Pracucci <marco@pracucci.com> --------- Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
parent
2e0ecc013f
commit
eeecfee885
|
@ -49,7 +49,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|||
if parsed.Op == syntax.OpConcat {
|
||||
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
||||
}
|
||||
if matches, caseSensitive := findSetMatches(parsed, ""); caseSensitive {
|
||||
if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
|
||||
m.setMatches = matches
|
||||
}
|
||||
m.stringMatcher = stringMatcherFromRegexp(parsed)
|
||||
|
@ -60,10 +60,22 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|||
// findSetMatches extract equality matches from a regexp.
|
||||
// Returns nil if we can't replace the regexp by only equality matchers or the regexp contains
|
||||
// a mix of case sensitive and case insensitive matchers.
|
||||
func findSetMatches(re *syntax.Regexp, base string) (matches []string, caseSensitive bool) {
|
||||
func findSetMatches(re *syntax.Regexp) (matches []string, caseSensitive bool) {
|
||||
clearBeginEndText(re)
|
||||
|
||||
return findSetMatchesInternal(re, "")
|
||||
}
|
||||
|
||||
func findSetMatchesInternal(re *syntax.Regexp, base string) (matches []string, caseSensitive bool) {
|
||||
switch re.Op {
|
||||
case syntax.OpBeginText:
|
||||
// Correctly handling the begin text operator inside a regex is tricky,
|
||||
// so in this case we fallback to the regex engine.
|
||||
return nil, false
|
||||
case syntax.OpEndText:
|
||||
// Correctly handling the end text operator inside a regex is tricky,
|
||||
// so in this case we fallback to the regex engine.
|
||||
return nil, false
|
||||
case syntax.OpLiteral:
|
||||
return []string{base + string(re.Rune)}, isCaseSensitive(re)
|
||||
case syntax.OpEmptyMatch:
|
||||
|
@ -74,7 +86,7 @@ func findSetMatches(re *syntax.Regexp, base string) (matches []string, caseSensi
|
|||
return findSetMatchesFromAlternate(re, base)
|
||||
case syntax.OpCapture:
|
||||
clearCapture(re)
|
||||
return findSetMatches(re, base)
|
||||
return findSetMatchesInternal(re, base)
|
||||
case syntax.OpConcat:
|
||||
return findSetMatchesFromConcat(re, base)
|
||||
case syntax.OpCharClass:
|
||||
|
@ -116,7 +128,7 @@ func findSetMatchesFromConcat(re *syntax.Regexp, base string) (matches []string,
|
|||
for i := 0; i < len(re.Sub); i++ {
|
||||
var newMatches []string
|
||||
for j, b := range matches {
|
||||
m, caseSensitive := findSetMatches(re.Sub[i], b)
|
||||
m, caseSensitive := findSetMatchesInternal(re.Sub[i], b)
|
||||
if m == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
@ -144,7 +156,7 @@ func findSetMatchesFromConcat(re *syntax.Regexp, base string) (matches []string,
|
|||
|
||||
func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) {
|
||||
for i, sub := range re.Sub {
|
||||
found, caseSensitive := findSetMatches(sub, base)
|
||||
found, caseSensitive := findSetMatchesInternal(sub, base)
|
||||
if found == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
@ -179,6 +191,12 @@ func clearCapture(regs ...*syntax.Regexp) {
|
|||
|
||||
// clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string.
|
||||
func clearBeginEndText(re *syntax.Regexp) {
|
||||
// Do not clear begin/end text from an alternate operator because it could
|
||||
// change the actual regexp properties.
|
||||
if re.Op == syntax.OpAlternate {
|
||||
return
|
||||
}
|
||||
|
||||
if len(re.Sub) == 0 {
|
||||
return
|
||||
}
|
||||
|
@ -298,10 +316,23 @@ type StringMatcher interface {
|
|||
// It returns nil if the regexp is not supported.
|
||||
// For examples, it will replace `.*foo` with `foo.*` and `.*foo.*` with `(?i)foo`.
|
||||
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||
clearCapture(re)
|
||||
clearBeginEndText(re)
|
||||
|
||||
return stringMatcherFromRegexpInternal(re)
|
||||
}
|
||||
|
||||
func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
||||
clearCapture(re)
|
||||
|
||||
switch re.Op {
|
||||
case syntax.OpBeginText:
|
||||
// Correctly handling the begin text operator inside a regex is tricky,
|
||||
// so in this case we fallback to the regex engine.
|
||||
return nil
|
||||
case syntax.OpEndText:
|
||||
// Correctly handling the end text operator inside a regex is tricky,
|
||||
// so in this case we fallback to the regex engine.
|
||||
return nil
|
||||
case syntax.OpPlus, syntax.OpStar:
|
||||
if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
|
||||
return nil
|
||||
|
@ -321,7 +352,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
|||
case syntax.OpAlternate:
|
||||
or := make([]StringMatcher, 0, len(re.Sub))
|
||||
for _, sub := range re.Sub {
|
||||
m := stringMatcherFromRegexp(sub)
|
||||
m := stringMatcherFromRegexpInternal(sub)
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
|
@ -334,26 +365,26 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
|||
return emptyStringMatcher{}
|
||||
}
|
||||
if len(re.Sub) == 1 {
|
||||
return stringMatcherFromRegexp(re.Sub[0])
|
||||
return stringMatcherFromRegexpInternal(re.Sub[0])
|
||||
}
|
||||
var left, right StringMatcher
|
||||
// Let's try to find if there's a first and last any matchers.
|
||||
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar {
|
||||
left = stringMatcherFromRegexp(re.Sub[0])
|
||||
left = stringMatcherFromRegexpInternal(re.Sub[0])
|
||||
if left == nil {
|
||||
return nil
|
||||
}
|
||||
re.Sub = re.Sub[1:]
|
||||
}
|
||||
if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar {
|
||||
right = stringMatcherFromRegexp(re.Sub[len(re.Sub)-1])
|
||||
right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1])
|
||||
if right == nil {
|
||||
return nil
|
||||
}
|
||||
re.Sub = re.Sub[:len(re.Sub)-1]
|
||||
}
|
||||
|
||||
matches, matchesCaseSensitive := findSetMatches(re, "")
|
||||
matches, matchesCaseSensitive := findSetMatchesInternal(re, "")
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -129,6 +129,9 @@ func TestOptimizeConcatRegex(t *testing.T) {
|
|||
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: "abc"},
|
||||
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: "abc"},
|
||||
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: "bc"},
|
||||
{regex: "^5..$", prefix: "5", suffix: "", contains: ""},
|
||||
{regex: "^release.*", prefix: "release", suffix: "", contains: ""},
|
||||
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: "laio"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
|
@ -162,8 +165,13 @@ func TestFindSetMatches(t *testing.T) {
|
|||
// Simple sets alternate and concat and alternates with empty matches
|
||||
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
|
||||
{"bar|b|buzz", []string{"bar", "b", "buzz"}, true},
|
||||
// Skip anchors it's enforced anyway at the root.
|
||||
{"(^bar$)|(b$)|(^buzz)", []string{"bar", "b", "buzz"}, true},
|
||||
// Skip outer anchors (it's enforced anyway at the root).
|
||||
{"^(bar|b|buzz)$", []string{"bar", "b", "buzz"}, true},
|
||||
{"^(?:prod|production)$", []string{"prod", "production"}, true},
|
||||
// Do not optimize regexp with inner anchors.
|
||||
{"(bar|b|b^uz$z)", nil, false},
|
||||
// Do not optimize regexp with empty string matcher.
|
||||
{"^$|Running", nil, false},
|
||||
// Simple sets containing escaped characters.
|
||||
{"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}, true},
|
||||
// using charclass
|
||||
|
@ -208,7 +216,7 @@ func TestFindSetMatches(t *testing.T) {
|
|||
t.Parallel()
|
||||
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
||||
require.NoError(t, err)
|
||||
matches, actualCaseSensitive := findSetMatches(parsed, "")
|
||||
matches, actualCaseSensitive := findSetMatches(parsed)
|
||||
require.Equal(t, c.expMatches, matches)
|
||||
require.Equal(t, c.expCaseSensitive, actualCaseSensitive)
|
||||
})
|
||||
|
@ -274,8 +282,8 @@ func Test_OptimizeRegex(t *testing.T) {
|
|||
{"^$", emptyStringMatcher{}},
|
||||
{"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}},
|
||||
{"^(?i:foo)$", &equalStringMatcher{s: "FOO", caseSensitive: false}},
|
||||
{"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||
{"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}}), &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||
{"^((?i:foo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||
{"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||
{"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
|
||||
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||
{"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||
|
@ -348,3 +356,21 @@ func FuzzFastRegexMatcher_WithStaticallyDefinedRegularExpressions(f *testing.F)
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
func FuzzFastRegexMatcher_WithFuzzyRegularExpressions(f *testing.F) {
|
||||
for _, re := range regexes {
|
||||
for _, text := range values {
|
||||
f.Add(re, text)
|
||||
}
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, re, text string) {
|
||||
m, err := NewFastRegexMatcher(re)
|
||||
if err != nil {
|
||||
// Ignore invalid regexes.
|
||||
return
|
||||
}
|
||||
|
||||
require.Equalf(t, m.re.MatchString(text), m.MatchString(text), "regexp: %s text: %s", m.re.String(), text)
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue