Optimized label regex matcher with literal prefix and/or suffix (#7453)

* Optimized label regex matcher with literal prefix and/or suffix

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added license

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added more tests cases with newlines

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Restored deleted test

Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
Marco Pracucci 2020-06-26 11:49:09 +02:00 committed by GitHub
parent d78656c244
commit cef4dd6fff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 179 additions and 4 deletions

View file

@ -15,7 +15,6 @@ package labels
import (
"fmt"
"regexp"
)
// MatchType is an enum for label matching types.
@ -48,7 +47,7 @@ type Matcher struct {
Name string
Value string
re *regexp.Regexp
re *FastRegexMatcher
}
// NewMatcher returns a matcher object.
@ -59,7 +58,7 @@ func NewMatcher(t MatchType, n, v string) (*Matcher, error) {
Value: v,
}
if t == MatchRegexp || t == MatchNotRegexp {
re, err := regexp.Compile("^(?:" + v + ")$")
re, err := NewFastRegexMatcher(v)
if err != nil {
return nil, err
}
@ -116,5 +115,5 @@ func (m *Matcher) GetRegexString() string {
if m.re == nil {
return ""
}
return m.re.String()
return m.re.GetRegexString()
}

93
pkg/labels/regexp.go Normal file
View file

@ -0,0 +1,93 @@
// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package labels
import (
"regexp"
"regexp/syntax"
"strings"
)
type FastRegexMatcher struct {
re *regexp.Regexp
prefix string
suffix string
}
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
re, err := regexp.Compile("^(?:" + v + ")$")
if err != nil {
return nil, err
}
parsed, err := syntax.Parse(v, syntax.Perl)
if err != nil {
return nil, err
}
m := &FastRegexMatcher{
re: re,
}
if parsed.Op == syntax.OpConcat {
m.prefix, m.suffix = optimizeConcatRegex(parsed)
}
return m, nil
}
func (m *FastRegexMatcher) MatchString(s string) bool {
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
return false
}
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
return false
}
return m.re.MatchString(s)
}
func (m *FastRegexMatcher) GetRegexString() string {
return m.re.String()
}
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
// checked against the label value before running the regexp matcher.
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
sub := r.Sub
// We can safely remove begin and end text matchers respectively
// at the beginning and end of the regexp.
if len(sub) > 0 && sub[0].Op == syntax.OpBeginText {
sub = sub[1:]
}
if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText {
sub = sub[:len(sub)-1]
}
if len(sub) == 0 {
return
}
// Given Prometheus regex matchers are always anchored to the begin/end
// of the text, if the first/last operations are literals, we can safely
// treat them as prefix/suffix.
if sub[0].Op == syntax.OpLiteral {
prefix = string(sub[0].Rune)
}
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral {
suffix = string(sub[last].Rune)
}
return
}

79
pkg/labels/regexp_test.go Normal file
View file

@ -0,0 +1,79 @@
// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package labels
import (
"regexp/syntax"
"testing"
"github.com/prometheus/prometheus/util/testutil"
)
func TestNewFastRegexMatcher(t *testing.T) {
cases := []struct {
regex string
value string
expected bool
}{
{regex: "(foo|bar)", value: "foo", expected: true},
{regex: "(foo|bar)", value: "foo bar", expected: false},
{regex: "(foo|bar)", value: "bar", expected: true},
{regex: "foo.*", value: "foo bar", expected: true},
{regex: "foo.*", value: "bar foo", expected: false},
{regex: ".*foo", value: "foo bar", expected: false},
{regex: ".*foo", value: "bar foo", expected: true},
{regex: ".*foo", value: "foo", expected: true},
{regex: "^.*foo$", value: "foo", expected: true},
{regex: "^.+foo$", value: "foo", expected: false},
{regex: "^.+foo$", value: "bfoo", expected: true},
{regex: ".*", value: "\n", expected: false},
{regex: ".*", value: "\nfoo", expected: false},
{regex: ".*foo", value: "\nfoo", expected: false},
{regex: "foo.*", value: "foo\n", expected: false},
{regex: "foo\n.*", value: "foo\n", expected: true},
{regex: ".*", value: "foo", expected: true},
{regex: "", value: "foo", expected: false},
{regex: "", value: "", expected: true},
}
for _, c := range cases {
m, err := NewFastRegexMatcher(c.regex)
testutil.Ok(t, err)
testutil.Equals(t, c.expected, m.MatchString(c.value))
}
}
func TestOptimizeConcatRegex(t *testing.T) {
cases := []struct {
regex string
prefix string
suffix string
}{
{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
{regex: "foo.*", prefix: "foo", suffix: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
{regex: ".*foo", prefix: "", suffix: "foo"},
{regex: "^.*foo$", prefix: "", suffix: "foo"},
}
for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl)
testutil.Ok(t, err)
prefix, suffix := optimizeConcatRegex(parsed)
testutil.Equals(t, c.prefix, prefix)
testutil.Equals(t, c.suffix, suffix)
}
}

View file

@ -91,6 +91,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
@ -107,6 +109,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
{`i=~".*"`, []*labels.Matcher{iStar}},
{`i=~"1.*"`, []*labels.Matcher{i1Star}},
{`i=~".*1"`, []*labels.Matcher{iStar1}},
{`i=~".+"`, []*labels.Matcher{iPlus}},
{`i=~""`, []*labels.Matcher{iEmptyRe}},
{`i!=""`, []*labels.Matcher{iNotEmpty}},