mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Merge pull request #537 from grafana/more-regexp-optimizations-with-dot-quest-support
Optimize .? in the FastRegexMatcher
This commit is contained in:
commit
c7c149e1d9
|
@ -480,6 +480,15 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
||||||
|
|
||||||
// Any string is fine (including an empty one), as far as it doesn't contain any newline.
|
// Any string is fine (including an empty one), as far as it doesn't contain any newline.
|
||||||
return anyStringWithoutNewlineMatcher{}
|
return anyStringWithoutNewlineMatcher{}
|
||||||
|
case syntax.OpQuest:
|
||||||
|
// Only optimize for ".?".
|
||||||
|
if len(re.Sub) != 1 || (re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &zeroOrOneCharacterStringMatcher{
|
||||||
|
matchNL: re.Sub[0].Op == syntax.OpAnyChar,
|
||||||
|
}
|
||||||
case syntax.OpEmptyMatch:
|
case syntax.OpEmptyMatch:
|
||||||
return emptyStringMatcher{}
|
return emptyStringMatcher{}
|
||||||
|
|
||||||
|
@ -511,14 +520,14 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
||||||
var left, right StringMatcher
|
var left, right StringMatcher
|
||||||
|
|
||||||
// Let's try to find if there's a first and last any matchers.
|
// Let's try to find if there's a first and last any matchers.
|
||||||
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar {
|
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar || re.Sub[0].Op == syntax.OpQuest {
|
||||||
left = stringMatcherFromRegexpInternal(re.Sub[0])
|
left = stringMatcherFromRegexpInternal(re.Sub[0])
|
||||||
if left == nil {
|
if left == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
re.Sub = re.Sub[1:]
|
re.Sub = re.Sub[1:]
|
||||||
}
|
}
|
||||||
if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar {
|
if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar || re.Sub[len(re.Sub)-1].Op == syntax.OpQuest {
|
||||||
right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1])
|
right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1])
|
||||||
if right == nil {
|
if right == nil {
|
||||||
return nil
|
return nil
|
||||||
|
@ -845,6 +854,26 @@ func (m *anyNonEmptyStringMatcher) Matches(s string) bool {
|
||||||
return len(s) > 0 && strings.IndexByte(s, '\n') == -1
|
return len(s) > 0 && strings.IndexByte(s, '\n') == -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// zeroOrOneCharacterStringMatcher is a StringMatcher which matches zero or one occurrence
|
||||||
|
// of any character. The newline character is matches only if matchNL is set to true.
|
||||||
|
type zeroOrOneCharacterStringMatcher struct {
|
||||||
|
matchNL bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
|
||||||
|
// Zero or one.
|
||||||
|
if len(s) > 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// No need to check for the newline if the string is empty or matching a newline is OK.
|
||||||
|
if m.matchNL || len(s) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return s[0] != '\n'
|
||||||
|
}
|
||||||
|
|
||||||
// trueMatcher is a stringMatcher which matches any string (always returns true).
|
// trueMatcher is a stringMatcher which matches any string (always returns true).
|
||||||
type trueMatcher struct{}
|
type trueMatcher struct{}
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,13 @@ var (
|
||||||
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*|qbvKMimS.*|IecrXtPa.*|seTckYqt.*|NxnyHkgB.*|fIDlOgKb.*|UhlWIygH.*|OtNoJxHG.*|cUTkFVIV.*|mTgFIHjr.*|jQkoIDtE.*|PPMKxRXl.*|AwMfwVkQ.*|CQyMrTQJ.*|BzrqxVSi.*|nTpcWuhF.*|PertdywG.*|ZZDgCtXN.*|WWdDPyyE.*|uVtNQsKk.*|BdeCHvPZ.*|wshRnFlH.*|aOUIitIp.*|RxZeCdXT.*|CFZMslCj.*|AVBZRDxl.*|IzIGCnhw.*|ythYuWiz.*|oztXVXhl.*|VbLkwqQx.*|qvaUgyVC.*|VawUjPWC.*|ecloYJuj.*|boCLTdSU.*|uPrKeAZx.*|hrMWLWBq.*|JOnUNHRM.*|rYnujkPq.*|dDEdZhIj.*|DRrfvugG.*|yEGfDxVV.*|YMYdJWuP.*|PHUQZNWM.*|AmKNrLis.*|zTxndVfn.*|FPsHoJnc.*|EIulZTua.*|KlAPhdzg.*|ScHJJCLt.*|NtTfMzME.*|eMCwuFdo.*|SEpJVJbR.*|cdhXZeCx.*|sAVtBwRh.*|kVFEVcMI.*|jzJrxraA.*|tGLHTell.*|NNWoeSaw.*|DcOKSetX.*|UXZAJyka.*|THpMphDP.*|rizheevl.*|kDCBRidd.*|pCZZRqyu.*|pSygkitl.*|SwZGkAaW.*|wILOrfNX.*|QkwVOerj.*|kHOMxPDr.*|EwOVycJv.*|AJvtzQFS.*|yEOjKYYB.*|LizIINLL.*|JBRSsfcG.*|YPiUqqNl.*|IsdEbvee.*|MjEpGcBm.*|OxXZVgEQ.*|xClXGuxa.*|UzRCGFEb.*|buJbvfvA.*|IPZQxRet.*|oFYShsMc.*|oBHffuHO.*|bzzKrcBR.*|KAjzrGCl.*|IPUsAVls.*|OGMUMbIU.*|gyDccHuR.*|bjlalnDd.*|ZLWjeMna.*|fdsuIlxQ.*|dVXtiomV.*|XxedTjNg.*|XWMHlNoA.*|nnyqArQX.*|opfkWGhb.*|wYtnhdYb.*))",
|
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*|qbvKMimS.*|IecrXtPa.*|seTckYqt.*|NxnyHkgB.*|fIDlOgKb.*|UhlWIygH.*|OtNoJxHG.*|cUTkFVIV.*|mTgFIHjr.*|jQkoIDtE.*|PPMKxRXl.*|AwMfwVkQ.*|CQyMrTQJ.*|BzrqxVSi.*|nTpcWuhF.*|PertdywG.*|ZZDgCtXN.*|WWdDPyyE.*|uVtNQsKk.*|BdeCHvPZ.*|wshRnFlH.*|aOUIitIp.*|RxZeCdXT.*|CFZMslCj.*|AVBZRDxl.*|IzIGCnhw.*|ythYuWiz.*|oztXVXhl.*|VbLkwqQx.*|qvaUgyVC.*|VawUjPWC.*|ecloYJuj.*|boCLTdSU.*|uPrKeAZx.*|hrMWLWBq.*|JOnUNHRM.*|rYnujkPq.*|dDEdZhIj.*|DRrfvugG.*|yEGfDxVV.*|YMYdJWuP.*|PHUQZNWM.*|AmKNrLis.*|zTxndVfn.*|FPsHoJnc.*|EIulZTua.*|KlAPhdzg.*|ScHJJCLt.*|NtTfMzME.*|eMCwuFdo.*|SEpJVJbR.*|cdhXZeCx.*|sAVtBwRh.*|kVFEVcMI.*|jzJrxraA.*|tGLHTell.*|NNWoeSaw.*|DcOKSetX.*|UXZAJyka.*|THpMphDP.*|rizheevl.*|kDCBRidd.*|pCZZRqyu.*|pSygkitl.*|SwZGkAaW.*|wILOrfNX.*|QkwVOerj.*|kHOMxPDr.*|EwOVycJv.*|AJvtzQFS.*|yEOjKYYB.*|LizIINLL.*|JBRSsfcG.*|YPiUqqNl.*|IsdEbvee.*|MjEpGcBm.*|OxXZVgEQ.*|xClXGuxa.*|UzRCGFEb.*|buJbvfvA.*|IPZQxRet.*|oFYShsMc.*|oBHffuHO.*|bzzKrcBR.*|KAjzrGCl.*|IPUsAVls.*|OGMUMbIU.*|gyDccHuR.*|bjlalnDd.*|ZLWjeMna.*|fdsuIlxQ.*|dVXtiomV.*|XxedTjNg.*|XWMHlNoA.*|nnyqArQX.*|opfkWGhb.*|wYtnhdYb.*))",
|
||||||
// A long case insensitive alternation where each entry starts with ".*".
|
// A long case insensitive alternation where each entry starts with ".*".
|
||||||
"(?i:(.*zQPbMkNO|.*NNSPdvMi|.*iWuuSoAl|.*qbvKMimS|.*IecrXtPa|.*seTckYqt|.*NxnyHkgB|.*fIDlOgKb|.*UhlWIygH|.*OtNoJxHG|.*cUTkFVIV|.*mTgFIHjr|.*jQkoIDtE|.*PPMKxRXl|.*AwMfwVkQ|.*CQyMrTQJ|.*BzrqxVSi|.*nTpcWuhF|.*PertdywG|.*ZZDgCtXN|.*WWdDPyyE|.*uVtNQsKk|.*BdeCHvPZ|.*wshRnFlH|.*aOUIitIp|.*RxZeCdXT|.*CFZMslCj|.*AVBZRDxl|.*IzIGCnhw|.*ythYuWiz|.*oztXVXhl|.*VbLkwqQx|.*qvaUgyVC|.*VawUjPWC|.*ecloYJuj|.*boCLTdSU|.*uPrKeAZx|.*hrMWLWBq|.*JOnUNHRM|.*rYnujkPq|.*dDEdZhIj|.*DRrfvugG|.*yEGfDxVV|.*YMYdJWuP|.*PHUQZNWM|.*AmKNrLis|.*zTxndVfn|.*FPsHoJnc|.*EIulZTua|.*KlAPhdzg|.*ScHJJCLt|.*NtTfMzME|.*eMCwuFdo|.*SEpJVJbR|.*cdhXZeCx|.*sAVtBwRh|.*kVFEVcMI|.*jzJrxraA|.*tGLHTell|.*NNWoeSaw|.*DcOKSetX|.*UXZAJyka|.*THpMphDP|.*rizheevl|.*kDCBRidd|.*pCZZRqyu|.*pSygkitl|.*SwZGkAaW|.*wILOrfNX|.*QkwVOerj|.*kHOMxPDr|.*EwOVycJv|.*AJvtzQFS|.*yEOjKYYB|.*LizIINLL|.*JBRSsfcG|.*YPiUqqNl|.*IsdEbvee|.*MjEpGcBm|.*OxXZVgEQ|.*xClXGuxa|.*UzRCGFEb|.*buJbvfvA|.*IPZQxRet|.*oFYShsMc|.*oBHffuHO|.*bzzKrcBR|.*KAjzrGCl|.*IPUsAVls|.*OGMUMbIU|.*gyDccHuR|.*bjlalnDd|.*ZLWjeMna|.*fdsuIlxQ|.*dVXtiomV|.*XxedTjNg|.*XWMHlNoA|.*nnyqArQX|.*opfkWGhb|.*wYtnhdYb))",
|
"(?i:(.*zQPbMkNO|.*NNSPdvMi|.*iWuuSoAl|.*qbvKMimS|.*IecrXtPa|.*seTckYqt|.*NxnyHkgB|.*fIDlOgKb|.*UhlWIygH|.*OtNoJxHG|.*cUTkFVIV|.*mTgFIHjr|.*jQkoIDtE|.*PPMKxRXl|.*AwMfwVkQ|.*CQyMrTQJ|.*BzrqxVSi|.*nTpcWuhF|.*PertdywG|.*ZZDgCtXN|.*WWdDPyyE|.*uVtNQsKk|.*BdeCHvPZ|.*wshRnFlH|.*aOUIitIp|.*RxZeCdXT|.*CFZMslCj|.*AVBZRDxl|.*IzIGCnhw|.*ythYuWiz|.*oztXVXhl|.*VbLkwqQx|.*qvaUgyVC|.*VawUjPWC|.*ecloYJuj|.*boCLTdSU|.*uPrKeAZx|.*hrMWLWBq|.*JOnUNHRM|.*rYnujkPq|.*dDEdZhIj|.*DRrfvugG|.*yEGfDxVV|.*YMYdJWuP|.*PHUQZNWM|.*AmKNrLis|.*zTxndVfn|.*FPsHoJnc|.*EIulZTua|.*KlAPhdzg|.*ScHJJCLt|.*NtTfMzME|.*eMCwuFdo|.*SEpJVJbR|.*cdhXZeCx|.*sAVtBwRh|.*kVFEVcMI|.*jzJrxraA|.*tGLHTell|.*NNWoeSaw|.*DcOKSetX|.*UXZAJyka|.*THpMphDP|.*rizheevl|.*kDCBRidd|.*pCZZRqyu|.*pSygkitl|.*SwZGkAaW|.*wILOrfNX|.*QkwVOerj|.*kHOMxPDr|.*EwOVycJv|.*AJvtzQFS|.*yEOjKYYB|.*LizIINLL|.*JBRSsfcG|.*YPiUqqNl|.*IsdEbvee|.*MjEpGcBm|.*OxXZVgEQ|.*xClXGuxa|.*UzRCGFEb|.*buJbvfvA|.*IPZQxRet|.*oFYShsMc|.*oBHffuHO|.*bzzKrcBR|.*KAjzrGCl|.*IPUsAVls|.*OGMUMbIU|.*gyDccHuR|.*bjlalnDd|.*ZLWjeMna|.*fdsuIlxQ|.*dVXtiomV|.*XxedTjNg|.*XWMHlNoA|.*nnyqArQX|.*opfkWGhb|.*wYtnhdYb))",
|
||||||
|
// Quest ".?".
|
||||||
|
"fo.?",
|
||||||
|
"foo.?",
|
||||||
|
"f.?o",
|
||||||
|
".*foo.?",
|
||||||
|
".?foo.+",
|
||||||
|
"foo.?|bar",
|
||||||
}
|
}
|
||||||
values = []string{
|
values = []string{
|
||||||
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
||||||
|
@ -418,6 +425,13 @@ func TestStringMatcherFromRegexp(t *testing.T) {
|
||||||
{"foo.+.+", nil},
|
{"foo.+.+", nil},
|
||||||
{".*.*foo", nil},
|
{".*.*foo", nil},
|
||||||
{".+.+foo", nil},
|
{".+.+foo", nil},
|
||||||
|
{"aaa.?.?", nil},
|
||||||
|
{"aaa.?.*", nil},
|
||||||
|
// Regexps with ".?".
|
||||||
|
{"ext.?|xfs", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
|
||||||
|
{"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
|
||||||
|
{"foo.?", &literalPrefixStringMatcher{prefix: "foo", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}},
|
||||||
|
{"f.?o", nil},
|
||||||
} {
|
} {
|
||||||
c := c
|
c := c
|
||||||
t.Run(c.pattern, func(t *testing.T) {
|
t.Run(c.pattern, func(t *testing.T) {
|
||||||
|
@ -588,6 +602,91 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStringMatcherFromRegexp_Quest(t *testing.T) {
|
||||||
|
for _, c := range []struct {
|
||||||
|
pattern string
|
||||||
|
expectedZeroOrOneMatchers int
|
||||||
|
expectedMatches []string
|
||||||
|
expectedNotMatches []string
|
||||||
|
}{
|
||||||
|
// Not match newline.
|
||||||
|
{
|
||||||
|
pattern: "test.?",
|
||||||
|
expectedZeroOrOneMatchers: 1,
|
||||||
|
expectedMatches: []string{"test", "test!"},
|
||||||
|
expectedNotMatches: []string{"test\n", "tes", "test!!"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: ".?test",
|
||||||
|
expectedZeroOrOneMatchers: 1,
|
||||||
|
expectedMatches: []string{"test", "!test"},
|
||||||
|
expectedNotMatches: []string{"\ntest", "tes", "test!"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "(aaa.?|bbb.?)",
|
||||||
|
expectedZeroOrOneMatchers: 2,
|
||||||
|
expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"},
|
||||||
|
expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: ".*aaa.?",
|
||||||
|
expectedZeroOrOneMatchers: 1,
|
||||||
|
expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"},
|
||||||
|
expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"},
|
||||||
|
},
|
||||||
|
|
||||||
|
// Match newline.
|
||||||
|
{
|
||||||
|
pattern: "(?s)test.?",
|
||||||
|
expectedZeroOrOneMatchers: 1,
|
||||||
|
expectedMatches: []string{"test", "test!", "test\n"},
|
||||||
|
expectedNotMatches: []string{"tes", "test!!", "test\n\n"},
|
||||||
|
},
|
||||||
|
|
||||||
|
// Mixed flags (a part matches newline another doesn't).
|
||||||
|
{
|
||||||
|
pattern: "(aaa.?|((?s).?bbb.+))",
|
||||||
|
expectedZeroOrOneMatchers: 2,
|
||||||
|
expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"},
|
||||||
|
expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(c.pattern, func(t *testing.T) {
|
||||||
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
matcher := stringMatcherFromRegexp(parsed)
|
||||||
|
require.NotNil(t, matcher)
|
||||||
|
|
||||||
|
re := regexp.MustCompile("^" + c.pattern + "$")
|
||||||
|
|
||||||
|
// Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher.
|
||||||
|
numZeroOrOneMatchers := 0
|
||||||
|
visitStringMatcher(matcher, func(matcher StringMatcher) {
|
||||||
|
if _, ok := matcher.(*zeroOrOneCharacterStringMatcher); ok {
|
||||||
|
numZeroOrOneMatchers++
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
require.Equal(t, c.expectedZeroOrOneMatchers, numZeroOrOneMatchers)
|
||||||
|
|
||||||
|
for _, value := range c.expectedMatches {
|
||||||
|
assert.Truef(t, matcher.Matches(value), "Value: %s", value)
|
||||||
|
|
||||||
|
// Ensure the golang regexp engine would return the same.
|
||||||
|
assert.Truef(t, re.MatchString(value), "Value: %s", value)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, value := range c.expectedNotMatches {
|
||||||
|
assert.Falsef(t, matcher.Matches(value), "Value: %s", value)
|
||||||
|
|
||||||
|
// Ensure the golang regexp engine would return the same.
|
||||||
|
assert.Falsef(t, re.MatchString(value), "Value: %s", value)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func randString(randGenerator *rand.Rand, length int) string {
|
func randString(randGenerator *rand.Rand, length int) string {
|
||||||
b := make([]rune, length)
|
b := make([]rune, length)
|
||||||
for i := range b {
|
for i := range b {
|
||||||
|
@ -1106,6 +1205,22 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestZeroOrOneCharacterStringMatcher(t *testing.T) {
|
||||||
|
matcher := &zeroOrOneCharacterStringMatcher{matchNL: true}
|
||||||
|
assert.True(t, matcher.Matches(""))
|
||||||
|
assert.True(t, matcher.Matches("x"))
|
||||||
|
assert.True(t, matcher.Matches("\n"))
|
||||||
|
assert.False(t, matcher.Matches("xx"))
|
||||||
|
assert.False(t, matcher.Matches("\n\n"))
|
||||||
|
|
||||||
|
matcher = &zeroOrOneCharacterStringMatcher{matchNL: false}
|
||||||
|
assert.True(t, matcher.Matches(""))
|
||||||
|
assert.True(t, matcher.Matches("x"))
|
||||||
|
assert.False(t, matcher.Matches("\n"))
|
||||||
|
assert.False(t, matcher.Matches("xx"))
|
||||||
|
assert.False(t, matcher.Matches("\n\n"))
|
||||||
|
}
|
||||||
|
|
||||||
func TestLiteralPrefixStringMatcher(t *testing.T) {
|
func TestLiteralPrefixStringMatcher(t *testing.T) {
|
||||||
m := &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &emptyStringMatcher{}}
|
m := &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &emptyStringMatcher{}}
|
||||||
assert.True(t, m.Matches("mar"))
|
assert.True(t, m.Matches("mar"))
|
||||||
|
|
Loading…
Reference in a new issue