mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
implement optimization.
Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
This commit is contained in:
parent
a1f6c747ed
commit
74c5667b63
|
@ -250,41 +250,164 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo remove anchors ^foo$
|
|
||||||
// remove captures.
|
|
||||||
// .+ | .* | .*POD.* | test-.* | .*-test | test-.+ | .+-test
|
|
||||||
|
|
||||||
// regexp prefix fn ???? can we use this ?
|
|
||||||
|
|
||||||
// benchmark them
|
|
||||||
|
|
||||||
type StringMatcher interface {
|
type StringMatcher interface {
|
||||||
Matches(s string) bool
|
Matches(s string) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func contains(s, substr string) bool {
|
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||||
pos := strings.Index(s, substr)
|
clearCapture(re)
|
||||||
if pos < 0 {
|
clearBeginEndText(re)
|
||||||
return false
|
switch re.Op {
|
||||||
|
case syntax.OpStar:
|
||||||
|
return anyStringMatcher{
|
||||||
|
allowEmpty: true,
|
||||||
|
matchNL: re.Flags&syntax.DotNL != 0,
|
||||||
|
}
|
||||||
|
case syntax.OpEmptyMatch:
|
||||||
|
return emptyStringMatcher{}
|
||||||
|
case syntax.OpPlus:
|
||||||
|
return anyStringMatcher{
|
||||||
|
allowEmpty: false,
|
||||||
|
matchNL: re.Flags&syntax.DotNL != 0,
|
||||||
|
}
|
||||||
|
case syntax.OpLiteral:
|
||||||
|
return equalStringMatcher{
|
||||||
|
s: string(re.Rune),
|
||||||
|
caseSensitive: !isCaseInsensitive(re),
|
||||||
|
}
|
||||||
|
case syntax.OpAlternate:
|
||||||
|
or := make([]StringMatcher, 0, len(re.Sub))
|
||||||
|
for _, sub := range re.Sub {
|
||||||
|
m := stringMatcherFromRegexp(sub)
|
||||||
|
if m == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
or = append(or, m)
|
||||||
|
}
|
||||||
|
return orStringMatcher(or)
|
||||||
|
case syntax.OpConcat:
|
||||||
|
clearCapture(re.Sub...)
|
||||||
|
if len(re.Sub) == 0 {
|
||||||
|
return emptyStringMatcher{}
|
||||||
|
}
|
||||||
|
if len(re.Sub) == 1 {
|
||||||
|
return stringMatcherFromRegexp(re.Sub[0])
|
||||||
|
}
|
||||||
|
var left, right StringMatcher
|
||||||
|
|
||||||
|
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar {
|
||||||
|
left = stringMatcherFromRegexp(re.Sub[0])
|
||||||
|
if left == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
re.Sub = re.Sub[1:]
|
||||||
|
}
|
||||||
|
if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar {
|
||||||
|
right = stringMatcherFromRegexp(re.Sub[len(re.Sub)-1])
|
||||||
|
if right == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
re.Sub = re.Sub[:len(re.Sub)-1]
|
||||||
|
}
|
||||||
|
matches := findSetMatches(re, "")
|
||||||
|
if left == nil && right == nil {
|
||||||
|
if len(matches) > 0 {
|
||||||
|
var or []StringMatcher
|
||||||
|
for _, match := range matches {
|
||||||
|
or = append(or, equalStringMatcher{
|
||||||
|
s: match,
|
||||||
|
caseSensitive: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return orStringMatcher(or)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(matches) > 0 {
|
||||||
|
return containsStringMatcher{
|
||||||
|
substr: matches,
|
||||||
|
left: left,
|
||||||
|
right: right,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return matchesAnyZeroOrMoreNotNL(s[:pos]) && matchesAnyZeroOrMoreNotNL(s[pos+len(substr):])
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchesAnyZeroOrMoreNotNL(s string) bool {
|
type containsStringMatcher struct {
|
||||||
// strings.Contains(s string, substr string) faster ??
|
substr []string
|
||||||
for _, r := range s {
|
left StringMatcher
|
||||||
if r == '\n' {
|
right StringMatcher
|
||||||
return false
|
}
|
||||||
|
|
||||||
|
func (m containsStringMatcher) Matches(s string) bool {
|
||||||
|
var pos int
|
||||||
|
for _, substr := range m.substr {
|
||||||
|
pos = strings.Index(s, substr)
|
||||||
|
if pos < 0 {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
if m.right != nil && m.left != nil {
|
||||||
|
if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(m.substr):]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.left != nil {
|
||||||
|
if m.left.Matches(s[:pos]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.right != nil {
|
||||||
|
if m.right.Matches(s[pos+len(m.substr):]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type emptyStringMatcher struct{}
|
||||||
|
|
||||||
|
func (m emptyStringMatcher) Matches(s string) bool {
|
||||||
|
return len(s) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type orStringMatcher []StringMatcher
|
||||||
|
|
||||||
|
func (m orStringMatcher) Matches(s string) bool {
|
||||||
|
for _, matcher := range m {
|
||||||
|
if matcher.Matches(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type equalStringMatcher struct {
|
||||||
|
s string
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m equalStringMatcher) Matches(s string) bool {
|
||||||
|
if !m.caseSensitive {
|
||||||
|
return m.s == s
|
||||||
|
}
|
||||||
|
return strings.EqualFold(m.s, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type anyStringMatcher struct {
|
||||||
|
allowEmpty bool
|
||||||
|
matchNL bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m anyStringMatcher) Matches(s string) bool {
|
||||||
|
if !m.matchNL && strings.ContainsRune(s, '\n') {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !m.allowEmpty && len(s) == 0 {
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchesOneZeroOrMoreNotNL(s string) bool {
|
|
||||||
for _, r := range s {
|
|
||||||
if r == '\n' {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return len(s) > 0
|
|
||||||
}
|
|
||||||
|
|
|
@ -175,3 +175,60 @@ func TestFindSetMatches(t *testing.T) {
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_OptimizeRegex(t *testing.T) {
|
||||||
|
for _, c := range []struct {
|
||||||
|
pattern string
|
||||||
|
exp StringMatcher
|
||||||
|
}{
|
||||||
|
{".*", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
||||||
|
{"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}},
|
||||||
|
{"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
||||||
|
{"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
||||||
|
{".+", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
||||||
|
{"(?s:.+)", anyStringMatcher{allowEmpty: false, matchNL: true}},
|
||||||
|
{"^.+$", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
||||||
|
{"(.+)", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
||||||
|
{"", emptyStringMatcher{}},
|
||||||
|
{"^$", emptyStringMatcher{}},
|
||||||
|
{"^foo$", equalStringMatcher{s: "foo", caseSensitive: true}},
|
||||||
|
{"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}},
|
||||||
|
{"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||||
|
{"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})},
|
||||||
|
{".*foo.*", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"(.*)foo.*", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"(.*)foo(.*)", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"(.+)foo(.*)", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"^.+foo.+", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
||||||
|
{"^(.*)(foo)(.*)$", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substr: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substr: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
||||||
|
{"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substr: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
||||||
|
{"10\\.0\\.(1|2)\\.+", containsStringMatcher{substr: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
||||||
|
{"^.+foo", containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
|
||||||
|
{"foo-.*$", containsStringMatcher{substr: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
||||||
|
{"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substr: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
||||||
|
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substr: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})},
|
||||||
|
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substr: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})},
|
||||||
|
// we don't support case insensitive matching for contains.
|
||||||
|
// This is because there's no strings.IndexOfFold function.
|
||||||
|
// We can revisit later if this is really popular by using strings.ToUpper.
|
||||||
|
{"^(.*)((?i)foo|foobar)(.*)$", nil},
|
||||||
|
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
|
||||||
|
{"[a-z][a-z]", nil},
|
||||||
|
{"[1^3]", nil},
|
||||||
|
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
|
||||||
|
// It would make the code too complex to handle it.
|
||||||
|
{"/|/bar.*", nil},
|
||||||
|
{"(.+)/(foo.*|bar$)", nil},
|
||||||
|
} {
|
||||||
|
c := c
|
||||||
|
t.Run(c.pattern, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
||||||
|
require.NoError(t, err)
|
||||||
|
matches := stringMatcherFromRegexp(parsed)
|
||||||
|
require.Equal(t, c.exp, matches)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue