Optimise labels regex matchers containing a literal within the pattern (#7503)

* Added labels matchers regex fast path for literals within the regex

Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
Marco Pracucci 2020-07-07 10:38:04 +02:00 committed by GitHub
parent 9875afc491
commit 2f6bf7de4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 44 additions and 15 deletions

View file

@ -23,6 +23,7 @@ type FastRegexMatcher struct {
re *regexp.Regexp
prefix string
suffix string
contains string
}
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
@ -41,7 +42,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
}
if parsed.Op == syntax.OpConcat {
m.prefix, m.suffix = optimizeConcatRegex(parsed)
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
}
return m, nil
@ -54,6 +55,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
return false
}
if m.contains != "" && !strings.Contains(s, m.contains) {
return false
}
return m.re.MatchString(s)
}
@ -63,7 +67,7 @@ func (m *FastRegexMatcher) GetRegexString() string {
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
// checked against the label value before running the regexp matcher.
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
sub := r.Sub
// We can safely remove begin and end text matchers respectively
@ -89,5 +93,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
suffix = string(sub[last].Rune)
}
// If contains any literal which is not a prefix/suffix, we keep the
// 1st one. We do not keep the whole list of literals to simplify the
// fast path.
for i := 1; i < len(sub)-1; i++ {
if sub[i].Op == syntax.OpLiteral {
contains = string(sub[i].Rune)
break
}
}
return
}

View file

@ -42,6 +42,11 @@ func TestNewFastRegexMatcher(t *testing.T) {
{regex: ".*foo", value: "\nfoo", expected: false},
{regex: "foo.*", value: "foo\n", expected: false},
{regex: "foo\n.*", value: "foo\n", expected: true},
{regex: ".*foo.*", value: "foo", expected: true},
{regex: ".*foo.*", value: "foo bar", expected: true},
{regex: ".*foo.*", value: "hello foo world", expected: true},
{regex: ".*foo.*", value: "hello foo\n world", expected: false},
{regex: ".*foo\n.*", value: "hello foo\n world", expected: true},
{regex: ".*", value: "foo", expected: true},
{regex: "", value: "foo", expected: false},
{regex: "", value: "", expected: true},
@ -59,21 +64,27 @@ func TestOptimizeConcatRegex(t *testing.T) {
regex string
prefix string
suffix string
contains string
}{
{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
{regex: "foo.*", prefix: "foo", suffix: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
{regex: ".*foo", prefix: "", suffix: "foo"},
{regex: "^.*foo$", prefix: "", suffix: "foo"},
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
}
for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl)
testutil.Ok(t, err)
prefix, suffix := optimizeConcatRegex(parsed)
prefix, suffix, contains := optimizeConcatRegex(parsed)
testutil.Equals(t, c.prefix, prefix)
testutil.Equals(t, c.suffix, suffix)
testutil.Equals(t, c.contains, contains)
}
}

View file

@ -93,12 +93,14 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$")
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "")
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$")
cases := []struct {
name string
@ -120,8 +122,10 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
{`n="1",i!="",j="foo"`, []*labels.Matcher{n1, iNotEmpty, jFoo}},
{`n="1",i=~".+",j="foo"`, []*labels.Matcher{n1, iPlus, jFoo}},
{`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}},
{`n="1",i=~".*1.*",j="foo"`, []*labels.Matcher{n1, iStar1Star, jFoo}},
{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}},
{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}},
{`n="1",i=~".+",i!~".*2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNotStar2Star, jFoo}},
}
for _, c := range cases {