mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 13:44:05 -08:00
Merge pull request #14173 from pracucci/fastregexmatcher-optimize-contains
Improve contains check done by FastRegexMatcher
This commit is contained in:
commit
6fb738af51
|
@ -42,7 +42,7 @@ type FastRegexMatcher struct {
|
|||
stringMatcher StringMatcher
|
||||
prefix string
|
||||
suffix string
|
||||
contains string
|
||||
contains []string
|
||||
|
||||
// matchString is the "compiled" function to run by MatchString().
|
||||
matchString func(string) bool
|
||||
|
@ -87,7 +87,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|||
// compileMatchStringFunction returns the function to run by MatchString().
|
||||
func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
|
||||
// If the only optimization available is the string matcher, then we can just run it.
|
||||
if len(m.setMatches) == 0 && m.prefix == "" && m.suffix == "" && m.contains == "" && m.stringMatcher != nil {
|
||||
if len(m.setMatches) == 0 && m.prefix == "" && m.suffix == "" && len(m.contains) == 0 && m.stringMatcher != nil {
|
||||
return m.stringMatcher.Matches
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
|
|||
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
||||
return false
|
||||
}
|
||||
if m.contains != "" && !strings.Contains(s, m.contains) {
|
||||
if len(m.contains) > 0 && !containsInOrder(s, m.contains) {
|
||||
return false
|
||||
}
|
||||
if m.stringMatcher != nil {
|
||||
|
@ -119,7 +119,7 @@ func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
|
|||
// IsOptimized returns true if any fast-path optimization is applied to the
|
||||
// regex matcher.
|
||||
func (m *FastRegexMatcher) IsOptimized() bool {
|
||||
return len(m.setMatches) > 0 || m.stringMatcher != nil || m.prefix != "" || m.suffix != "" || m.contains != ""
|
||||
return len(m.setMatches) > 0 || m.stringMatcher != nil || m.prefix != "" || m.suffix != "" || len(m.contains) > 0
|
||||
}
|
||||
|
||||
// findSetMatches extract equality matches from a regexp.
|
||||
|
@ -361,8 +361,9 @@ func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
|
|||
|
||||
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||
// checked against the label value before running the regexp matcher.
|
||||
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
|
||||
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []string) {
|
||||
sub := r.Sub
|
||||
clearCapture(sub...)
|
||||
|
||||
// We can safely remove begin and end text matchers respectively
|
||||
// at the beginning and end of the regexp.
|
||||
|
@ -387,13 +388,11 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
|
|||
suffix = string(sub[last].Rune)
|
||||
}
|
||||
|
||||
// If contains any literal which is not a prefix/suffix, we keep the
|
||||
// 1st one. We do not keep the whole list of literals to simplify the
|
||||
// fast path.
|
||||
// If contains any literal which is not a prefix/suffix, we keep track of
|
||||
// all the ones which are case-sensitive.
|
||||
for i := 1; i < len(sub)-1; i++ {
|
||||
if sub[i].Op == syntax.OpLiteral && (sub[i].Flags&syntax.FoldCase) == 0 {
|
||||
contains = string(sub[i].Rune)
|
||||
break
|
||||
contains = append(contains, string(sub[i].Rune))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -940,3 +939,27 @@ func hasPrefixCaseInsensitive(s, prefix string) bool {
|
|||
func hasSuffixCaseInsensitive(s, suffix string) bool {
|
||||
return len(s) >= len(suffix) && strings.EqualFold(s[len(s)-len(suffix):], suffix)
|
||||
}
|
||||
|
||||
func containsInOrder(s string, contains []string) bool {
|
||||
// Optimization for the case we only have to look for 1 substring.
|
||||
if len(contains) == 1 {
|
||||
return strings.Contains(s, contains[0])
|
||||
}
|
||||
|
||||
return containsInOrderMulti(s, contains)
|
||||
}
|
||||
|
||||
func containsInOrderMulti(s string, contains []string) bool {
|
||||
offset := 0
|
||||
|
||||
for _, substr := range contains {
|
||||
at := strings.Index(s[offset:], substr)
|
||||
if at == -1 {
|
||||
return false
|
||||
}
|
||||
|
||||
offset += at + len(substr)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -81,10 +81,15 @@ var (
|
|||
".*foo.?",
|
||||
".?foo.+",
|
||||
"foo.?|bar",
|
||||
// Concat of literals and wildcards.
|
||||
".*-.*-.*-.*-.*",
|
||||
"(.+)-(.+)-(.+)-(.+)-(.+)",
|
||||
"((.*))(?i:f)((.*))o((.*))o((.*))",
|
||||
"((.*))f((.*))(?i:o)((.*))o((.*))",
|
||||
}
|
||||
values = []string{
|
||||
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
||||
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
||||
"FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
||||
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
||||
"foofoo0", "foofoo", "😀foo0",
|
||||
|
||||
|
@ -132,29 +137,29 @@ func TestOptimizeConcatRegex(t *testing.T) {
|
|||
regex string
|
||||
prefix string
|
||||
suffix string
|
||||
contains string
|
||||
contains []string
|
||||
}{
|
||||
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
|
||||
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
|
||||
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
|
||||
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
|
||||
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
|
||||
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
|
||||
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
|
||||
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
|
||||
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
|
||||
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
|
||||
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: ""},
|
||||
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
||||
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
||||
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: ""},
|
||||
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: "def"},
|
||||
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: "abc"},
|
||||
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: "abc"},
|
||||
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: "bc"},
|
||||
{regex: "^5..$", prefix: "5", suffix: "", contains: ""},
|
||||
{regex: "^release.*", prefix: "release", suffix: "", contains: ""},
|
||||
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: "laio"},
|
||||
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: nil},
|
||||
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: nil},
|
||||
{regex: "foo.*", prefix: "foo", suffix: "", contains: nil},
|
||||
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: []string{"hello"}},
|
||||
{regex: ".*foo", prefix: "", suffix: "foo", contains: nil},
|
||||
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: nil},
|
||||
{regex: ".*foo.*", prefix: "", suffix: "", contains: []string{"foo"}},
|
||||
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: []string{"foo", "bar"}},
|
||||
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*[abc].*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: []string{"def"}},
|
||||
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: []string{"abc"}},
|
||||
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: []string{"abc"}},
|
||||
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: []string{"bc"}},
|
||||
{regex: "^5..$", prefix: "5", suffix: "", contains: nil},
|
||||
{regex: "^release.*", prefix: "release", suffix: "", contains: nil},
|
||||
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
|
@ -1089,6 +1094,15 @@ func TestHasSuffixCaseInsensitive(t *testing.T) {
|
|||
require.False(t, hasSuffixCaseInsensitive("marco", "abcdefghi"))
|
||||
}
|
||||
|
||||
func TestContainsInOrder(t *testing.T) {
|
||||
require.True(t, containsInOrder("abcdefghilmno", []string{"ab", "cd", "no"}))
|
||||
require.True(t, containsInOrder("abcdefghilmno", []string{"def", "hil"}))
|
||||
|
||||
require.False(t, containsInOrder("abcdefghilmno", []string{"ac"}))
|
||||
require.False(t, containsInOrder("abcdefghilmno", []string{"ab", "cd", "de"}))
|
||||
require.False(t, containsInOrder("abcdefghilmno", []string{"cd", "ab"}))
|
||||
}
|
||||
|
||||
func getTestNameFromRegexp(re string) string {
|
||||
if len(re) > 32 {
|
||||
return re[:32]
|
||||
|
|
Loading…
Reference in a new issue