Optimise labels regex matchers containing a literal within the pattern (#7503)

* Added labels matchers regex fast path for literals within the regex Signed-off-by: Marco Pracucci <marco@pracucci.com>
2025-03-05 20:59:13 -08:00 · 2020-07-07 10:38:04 +02:00 · 2020-07-07 10:38:04 +02:00 · 2f6bf7de4c
parent 9875afc491
commit 2f6bf7de4c
3 changed files with 44 additions and 15 deletions
--- a/pkg/labels/regexp.go
+++ b/pkg/labels/regexp.go
@ -23,6 +23,7 @@ type FastRegexMatcher struct {
 	re       *regexp.Regexp
 	prefix   string
 	suffix   string
+	contains string
 }

 func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
@ -41,7 +42,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
 	}

 	if parsed.Op == syntax.OpConcat {
-		m.prefix, m.suffix = optimizeConcatRegex(parsed)
+		m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
 	}

 	return m, nil
@ -54,6 +55,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
 	if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
 		return false
 	}
+	if m.contains != "" && !strings.Contains(s, m.contains) {
+		return false
+	}
 	return m.re.MatchString(s)
 }

@ -63,7 +67,7 @@ func (m *FastRegexMatcher) GetRegexString() string {

 // optimizeConcatRegex returns literal prefix/suffix text that can be safely
 // checked against the label value before running the regexp matcher.
-func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
+func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
 	sub := r.Sub

 	// We can safely remove begin and end text matchers respectively
@ -89,5 +93,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
 		suffix = string(sub[last].Rune)
 	}

+	// If contains any literal which is not a prefix/suffix, we keep the
+	// 1st one. We do not keep the whole list of literals to simplify the
+	// fast path.
+	for i := 1; i < len(sub)-1; i++ {
+		if sub[i].Op == syntax.OpLiteral {
+			contains = string(sub[i].Rune)
+			break
+		}
+	}
+
 	return
 }
--- a/pkg/labels/regexp_test.go
+++ b/pkg/labels/regexp_test.go
@ -42,6 +42,11 @@ func TestNewFastRegexMatcher(t *testing.T) {
 		{regex: ".*foo", value: "\nfoo", expected: false},
 		{regex: "foo.*", value: "foo\n", expected: false},
 		{regex: "foo\n.*", value: "foo\n", expected: true},
+		{regex: ".*foo.*", value: "foo", expected: true},
+		{regex: ".*foo.*", value: "foo bar", expected: true},
+		{regex: ".*foo.*", value: "hello foo world", expected: true},
+		{regex: ".*foo.*", value: "hello foo\n world", expected: false},
+		{regex: ".*foo\n.*", value: "hello foo\n world", expected: true},
 		{regex: ".*", value: "foo", expected: true},
 		{regex: "", value: "foo", expected: false},
 		{regex: "", value: "", expected: true},
@ -59,21 +64,27 @@ func TestOptimizeConcatRegex(t *testing.T) {
 		regex    string
 		prefix   string
 		suffix   string
+		contains string
 	}{
-		{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
-		{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
-		{regex: "foo.*", prefix: "foo", suffix: ""},
-		{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
-		{regex: ".*foo", prefix: "", suffix: "foo"},
-		{regex: "^.*foo$", prefix: "", suffix: "foo"},
+		{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
+		{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
+		{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
+		{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
+		{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
+		{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
+		{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
+		{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
+		{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
+		{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
 	}

 	for _, c := range cases {
 		parsed, err := syntax.Parse(c.regex, syntax.Perl)
 		testutil.Ok(t, err)

-		prefix, suffix := optimizeConcatRegex(parsed)
+		prefix, suffix, contains := optimizeConcatRegex(parsed)
 		testutil.Equals(t, c.prefix, prefix)
 		testutil.Equals(t, c.suffix, suffix)
+		testutil.Equals(t, c.contains, contains)
 	}
 }
--- a/tsdb/querier_bench_test.go
+++ b/tsdb/querier_bench_test.go
@ -93,12 +93,14 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
 	iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
 	i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
 	iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
+	iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$")
 	iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
 	i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
 	iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
 	iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "")
 	iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
 	iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
+	iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$")

 	cases := []struct {
 		name     string
@ -120,8 +122,10 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
 		{`n="1",i!="",j="foo"`, []*labels.Matcher{n1, iNotEmpty, jFoo}},
 		{`n="1",i=~".+",j="foo"`, []*labels.Matcher{n1, iPlus, jFoo}},
 		{`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}},
+		{`n="1",i=~".*1.*",j="foo"`, []*labels.Matcher{n1, iStar1Star, jFoo}},
 		{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}},
 		{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}},
+		{`n="1",i=~".+",i!~".*2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNotStar2Star, jFoo}},
 	}

 	for _, c := range cases {