mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 14:27:27 -08:00
Optimized label regex matcher with literal prefix and/or suffix (#7453)
* Optimized label regex matcher with literal prefix and/or suffix Signed-off-by: Marco Pracucci <marco@pracucci.com> * Added license Signed-off-by: Marco Pracucci <marco@pracucci.com> * Added more tests cases with newlines Signed-off-by: Marco Pracucci <marco@pracucci.com> * Restored deleted test Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
parent
d78656c244
commit
cef4dd6fff
|
@ -15,7 +15,6 @@ package labels
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// MatchType is an enum for label matching types.
|
// MatchType is an enum for label matching types.
|
||||||
|
@ -48,7 +47,7 @@ type Matcher struct {
|
||||||
Name string
|
Name string
|
||||||
Value string
|
Value string
|
||||||
|
|
||||||
re *regexp.Regexp
|
re *FastRegexMatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMatcher returns a matcher object.
|
// NewMatcher returns a matcher object.
|
||||||
|
@ -59,7 +58,7 @@ func NewMatcher(t MatchType, n, v string) (*Matcher, error) {
|
||||||
Value: v,
|
Value: v,
|
||||||
}
|
}
|
||||||
if t == MatchRegexp || t == MatchNotRegexp {
|
if t == MatchRegexp || t == MatchNotRegexp {
|
||||||
re, err := regexp.Compile("^(?:" + v + ")$")
|
re, err := NewFastRegexMatcher(v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -116,5 +115,5 @@ func (m *Matcher) GetRegexString() string {
|
||||||
if m.re == nil {
|
if m.re == nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
return m.re.String()
|
return m.re.GetRegexString()
|
||||||
}
|
}
|
||||||
|
|
93
pkg/labels/regexp.go
Normal file
93
pkg/labels/regexp.go
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
// Copyright 2020 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package labels
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"regexp/syntax"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FastRegexMatcher struct {
|
||||||
|
re *regexp.Regexp
|
||||||
|
prefix string
|
||||||
|
suffix string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
||||||
|
re, err := regexp.Compile("^(?:" + v + ")$")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed, err := syntax.Parse(v, syntax.Perl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
m := &FastRegexMatcher{
|
||||||
|
re: re,
|
||||||
|
}
|
||||||
|
|
||||||
|
if parsed.Op == syntax.OpConcat {
|
||||||
|
m.prefix, m.suffix = optimizeConcatRegex(parsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *FastRegexMatcher) MatchString(s string) bool {
|
||||||
|
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return m.re.MatchString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *FastRegexMatcher) GetRegexString() string {
|
||||||
|
return m.re.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||||
|
// checked against the label value before running the regexp matcher.
|
||||||
|
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
|
||||||
|
sub := r.Sub
|
||||||
|
|
||||||
|
// We can safely remove begin and end text matchers respectively
|
||||||
|
// at the beginning and end of the regexp.
|
||||||
|
if len(sub) > 0 && sub[0].Op == syntax.OpBeginText {
|
||||||
|
sub = sub[1:]
|
||||||
|
}
|
||||||
|
if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText {
|
||||||
|
sub = sub[:len(sub)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(sub) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given Prometheus regex matchers are always anchored to the begin/end
|
||||||
|
// of the text, if the first/last operations are literals, we can safely
|
||||||
|
// treat them as prefix/suffix.
|
||||||
|
if sub[0].Op == syntax.OpLiteral {
|
||||||
|
prefix = string(sub[0].Rune)
|
||||||
|
}
|
||||||
|
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral {
|
||||||
|
suffix = string(sub[last].Rune)
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
79
pkg/labels/regexp_test.go
Normal file
79
pkg/labels/regexp_test.go
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
// Copyright 2020 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package labels
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp/syntax"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/util/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewFastRegexMatcher(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
regex string
|
||||||
|
value string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{regex: "(foo|bar)", value: "foo", expected: true},
|
||||||
|
{regex: "(foo|bar)", value: "foo bar", expected: false},
|
||||||
|
{regex: "(foo|bar)", value: "bar", expected: true},
|
||||||
|
{regex: "foo.*", value: "foo bar", expected: true},
|
||||||
|
{regex: "foo.*", value: "bar foo", expected: false},
|
||||||
|
{regex: ".*foo", value: "foo bar", expected: false},
|
||||||
|
{regex: ".*foo", value: "bar foo", expected: true},
|
||||||
|
{regex: ".*foo", value: "foo", expected: true},
|
||||||
|
{regex: "^.*foo$", value: "foo", expected: true},
|
||||||
|
{regex: "^.+foo$", value: "foo", expected: false},
|
||||||
|
{regex: "^.+foo$", value: "bfoo", expected: true},
|
||||||
|
{regex: ".*", value: "\n", expected: false},
|
||||||
|
{regex: ".*", value: "\nfoo", expected: false},
|
||||||
|
{regex: ".*foo", value: "\nfoo", expected: false},
|
||||||
|
{regex: "foo.*", value: "foo\n", expected: false},
|
||||||
|
{regex: "foo\n.*", value: "foo\n", expected: true},
|
||||||
|
{regex: ".*", value: "foo", expected: true},
|
||||||
|
{regex: "", value: "foo", expected: false},
|
||||||
|
{regex: "", value: "", expected: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
m, err := NewFastRegexMatcher(c.regex)
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
testutil.Equals(t, c.expected, m.MatchString(c.value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOptimizeConcatRegex(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
regex string
|
||||||
|
prefix string
|
||||||
|
suffix string
|
||||||
|
}{
|
||||||
|
{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
|
||||||
|
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
|
||||||
|
{regex: "foo.*", prefix: "foo", suffix: ""},
|
||||||
|
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
|
||||||
|
{regex: ".*foo", prefix: "", suffix: "foo"},
|
||||||
|
{regex: "^.*foo$", prefix: "", suffix: "foo"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
parsed, err := syntax.Parse(c.regex, syntax.Perl)
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
|
||||||
|
prefix, suffix := optimizeConcatRegex(parsed)
|
||||||
|
testutil.Equals(t, c.prefix, prefix)
|
||||||
|
testutil.Equals(t, c.suffix, suffix)
|
||||||
|
}
|
||||||
|
}
|
|
@ -91,6 +91,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
||||||
jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")
|
jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")
|
||||||
|
|
||||||
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
|
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
|
||||||
|
i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
|
||||||
|
iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
|
||||||
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
|
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
|
||||||
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
|
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
|
||||||
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
|
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
|
||||||
|
@ -107,6 +109,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
||||||
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
|
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
|
||||||
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
|
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
|
||||||
{`i=~".*"`, []*labels.Matcher{iStar}},
|
{`i=~".*"`, []*labels.Matcher{iStar}},
|
||||||
|
{`i=~"1.*"`, []*labels.Matcher{i1Star}},
|
||||||
|
{`i=~".*1"`, []*labels.Matcher{iStar1}},
|
||||||
{`i=~".+"`, []*labels.Matcher{iPlus}},
|
{`i=~".+"`, []*labels.Matcher{iPlus}},
|
||||||
{`i=~""`, []*labels.Matcher{iEmptyRe}},
|
{`i=~""`, []*labels.Matcher{iEmptyRe}},
|
||||||
{`i!=""`, []*labels.Matcher{iNotEmpty}},
|
{`i!=""`, []*labels.Matcher{iNotEmpty}},
|
||||||
|
|
Loading…
Reference in a new issue