prometheus/model/labels/regexp_test.go

316 lines
13 KiB
Go
Raw Normal View History

// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package labels
import (
"math/rand"
"strings"
"testing"
"time"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
"github.com/stretchr/testify/require"
)
func init() {
rand.Seed(time.Now().UnixNano())
}
var (
letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
regexes = []string{
"(foo|bar)",
"foo.*",
".*foo",
"^.*foo$",
"^.+foo$",
".*",
".+",
"foo.+",
".+foo",
"foo\n.+",
"foo\n.*",
".*foo.*",
".+foo.+",
"",
"(?s:.*)",
"(?s:.+)",
"(?s:^.*foo$)",
"^(?i:foo|oo)|(bar)$",
"((.*)(bar|b|buzz)(.+)|foo)$",
"^$",
"(prometheus|api_prom)_api_v1_.+",
"10\\.0\\.(1|2)\\.+",
"10\\.0\\.(1|2).+",
"((fo(bar))|.+foo)",
}
values = []string{
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
}
)
func TestNewFastRegexMatcher(t *testing.T) {
for _, r := range regexes {
r := r
for _, v := range values {
v := v
t.Run(r+` on "`+v+`"`, func(t *testing.T) {
t.Parallel()
m, err := NewFastRegexMatcher(r)
require.NoError(t, err)
re, err := regexp.Compile("^(?:" + r + ")$")
require.NoError(t, err)
require.Equal(t, re.MatchString(v), m.MatchString(v))
})
}
}
}
func BenchmarkNewFastRegexMatcher(b *testing.B) {
Better benchmarks values. ``` ❯ benchcmp before.txt after.txt benchmark old ns/op new ns/op delta BenchmarkNewFastRegexMatcher/(foo|bar)-16 2487 2520 +1.33% BenchmarkNewFastRegexMatcher/foo.*-16 8856 4531 -48.84% BenchmarkNewFastRegexMatcher/.*foo-16 25195 6389 -74.64% BenchmarkNewFastRegexMatcher/^.*foo$-16 25626 6253 -75.60% BenchmarkNewFastRegexMatcher/^.+foo$-16 25429 6248 -75.43% BenchmarkNewFastRegexMatcher/.*-16 429707 8439 -98.04% BenchmarkNewFastRegexMatcher/.+-16 380165 8503 -97.76% BenchmarkNewFastRegexMatcher/foo.+-16 8180 4586 -43.94% BenchmarkNewFastRegexMatcher/.+foo-16 25214 6255 -75.19% BenchmarkNewFastRegexMatcher/foo_.+-16 8116 4334 -46.60% BenchmarkNewFastRegexMatcher/foo_.*-16 8354 4287 -48.68% BenchmarkNewFastRegexMatcher/.*foo.*-16 206076 19227 -90.67% BenchmarkNewFastRegexMatcher/.+foo.+-16 208434 18793 -90.98% BenchmarkNewFastRegexMatcher/#00-16 33045 3936 -88.09% BenchmarkNewFastRegexMatcher/(?s:.*)-16 403806 4208 -98.96% BenchmarkNewFastRegexMatcher/(?s:.+)-16 418177 4150 -99.01% BenchmarkNewFastRegexMatcher/(?s:^.*foo$)-16 24452 5661 -76.85% BenchmarkNewFastRegexMatcher/^(?i:foo|oo)|(bar)$-16 48087 23183 -51.79% BenchmarkNewFastRegexMatcher/((.*)(bar|b|buzz)(.+)|foo)$-16 645430 47193 -92.69% BenchmarkNewFastRegexMatcher/^$-16 37244 3912 -89.50% BenchmarkNewFastRegexMatcher/(prometheus|api_prom)_api_v1_.+-16 17205 10006 -41.84% BenchmarkNewFastRegexMatcher/10\.0\.(1|2)\.+-16 6776 7011 +3.47% BenchmarkNewFastRegexMatcher/10\.0\.(1|2).+-16 14792 4674 -68.40% BenchmarkNewFastRegexMatcher/((fo(bar))|.+foo)-16 497007 17410 -96.50% ``` Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
2021-10-11 00:45:02 -07:00
benchValues := values
for _, v := range values {
for i := 5; i < 50; i = i + 5 {
benchValues = append(benchValues, v+RandStringRunes(i))
benchValues = append(benchValues, RandStringRunes(i)+v+RandStringRunes(i))
benchValues = append(benchValues, RandStringRunes(i)+v)
}
}
for _, r := range regexes {
r := r
b.Run(r, func(b *testing.B) {
m, err := NewFastRegexMatcher(r)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range benchValues {
_ = m.MatchString(v)
}
}
})
}
}
func TestOptimizeConcatRegex(t *testing.T) {
cases := []struct {
regex string
prefix string
suffix string
contains string
}{
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: ""},
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: ""},
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: ""},
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: ""},
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: "def"},
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: "abc"},
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: "abc"},
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: "bc"},
}
for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl)
require.NoError(t, err)
prefix, suffix, contains := optimizeConcatRegex(parsed)
require.Equal(t, c.prefix, prefix)
require.Equal(t, c.suffix, suffix)
require.Equal(t, c.contains, contains)
}
}
// Refer to https://github.com/prometheus/prometheus/issues/2651.
func TestFindSetMatches(t *testing.T) {
for _, c := range []struct {
pattern string
exp []string
}{
// Single value, coming from a `bar=~"foo"` selector.
{"foo", []string{"foo"}},
{"^foo", []string{"foo"}},
{"^foo$", []string{"foo"}},
// Simple sets alternates.
{"foo|bar|zz", []string{"foo", "bar", "zz"}},
// Simple sets alternate and concat (bar|baz is parsed as "ba[rz]").
{"foo|bar|baz", []string{"foo", "bar", "baz"}},
// Simple sets alternate and concat and capture
{"foo|bar|baz|(zz)", []string{"foo", "bar", "baz", "zz"}},
// Simple sets alternate and concat and alternates with empty matches
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
{"bar|b|buzz", []string{"bar", "b", "buzz"}},
// Skip anchors it's enforced anyway at the root.
{"(^bar$)|(b$)|(^buzz)", []string{"bar", "b", "buzz"}},
// Simple sets containing escaped characters.
{"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}},
// using charclass
{"[abc]d", []string{"ad", "bd", "cd"}},
// high low charset different => A(B[CD]|EF)|BC[XY]
{"ABC|ABD|AEF|BCX|BCY", []string{"ABC", "ABD", "AEF", "BCX", "BCY"}},
// triple concat
{"api_(v1|prom)_push", []string{"api_v1_push", "api_prom_push"}},
// triple concat with multiple alternates
{"(api|rpc)_(v1|prom)_push", []string{"api_v1_push", "api_prom_push", "rpc_v1_push", "rpc_prom_push"}},
{"(api|rpc)_(v1|prom)_(push|query)", []string{"api_v1_push", "api_v1_query", "api_prom_push", "api_prom_query", "rpc_v1_push", "rpc_v1_query", "rpc_prom_push", "rpc_prom_query"}},
// class starting with "-"
{"[-1-2][a-c]", []string{"-a", "-b", "-c", "1a", "1b", "1c", "2a", "2b", "2c"}},
{"[1^3]", []string{"1", "3", "^"}},
// OpPlus with concat
{"(.+)/(foo|bar)", nil},
// Simple sets containing special characters without escaping.
{"fo.o|bar?|^baz", nil},
// case sensitive wrapper.
{"(?i)foo", nil},
// case sensitive wrapper on alternate.
{"(?i)foo|bar|baz", nil},
// case sensitive wrapper on concat.
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
// too high charset combination
{"(api|rpc)_[^0-9]", nil},
// too many combinations
{"[a-z][a-z]", nil},
} {
c := c
t.Run(c.pattern, func(t *testing.T) {
t.Parallel()
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
require.NoError(t, err)
matches := findSetMatches(parsed, "")
require.Equal(t, c.exp, matches)
})
}
}
func BenchmarkFastRegexMatcher(b *testing.B) {
var (
x = strings.Repeat("x", 50)
y = "foo" + x
z = x + "foo"
)
regexes := []string{
"foo",
"^foo",
"(foo|bar)",
"foo.*",
".*foo",
"^.*foo$",
"^.+foo$",
".*",
".+",
"foo.+",
".+foo",
".*foo.*",
"(?i:foo)",
"(prometheus|api_prom)_api_v1_.+",
"((fo(bar))|.+foo)",
}
for _, r := range regexes {
r := r
b.Run(r, func(b *testing.B) {
m, err := NewFastRegexMatcher(r)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = m.MatchString(x)
_ = m.MatchString(y)
_ = m.MatchString(z)
}
})
}
}
func Test_OptimizeRegex(t *testing.T) {
for _, c := range []struct {
pattern string
exp StringMatcher
}{
{".*", &anyStringMatcher{allowEmpty: true, matchNL: false}},
{".*?", &anyStringMatcher{allowEmpty: true, matchNL: false}},
{"(?s:.*)", &anyStringMatcher{allowEmpty: true, matchNL: true}},
{"(.*)", &anyStringMatcher{allowEmpty: true, matchNL: false}},
{"^.*$", &anyStringMatcher{allowEmpty: true, matchNL: false}},
{".+", &anyStringMatcher{allowEmpty: false, matchNL: false}},
{"(?s:.+)", &anyStringMatcher{allowEmpty: false, matchNL: true}},
{"^.+$", &anyStringMatcher{allowEmpty: false, matchNL: false}},
{"(.+)", &anyStringMatcher{allowEmpty: false, matchNL: false}},
{"", emptyStringMatcher{}},
{"^$", emptyStringMatcher{}},
{"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}},
{"^(?i:foo)$", &equalStringMatcher{s: "FOO", caseSensitive: false}},
{"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
{"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}}), &equalStringMatcher{s: "bar", caseSensitive: true}})},
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: false, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: false, matchNL: false}, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"10\\.0\\.(1|2)\\.+", nil},
{"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^.+foo", &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
{"foo-.*$", &containsStringMatcher{substrings: []string{"foo-"}, left: nil, right: &anyStringMatcher{allowEmpty: true, matchNL: false}}},
{"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}},
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: &anyStringMatcher{allowEmpty: true, matchNL: false}, right: &anyStringMatcher{allowEmpty: false, matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})},
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &containsStringMatcher{substrings: []string{"foo"}, left: &anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})},
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
// we don't support case insensitive matching for contains.
// This is because there's no strings.IndexOfFold function.
// We can revisit later if this is really popular by using strings.ToUpper.
{"^(.*)((?i)foo|foobar)(.*)$", nil},
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
{"[a-z][a-z]", nil},
{"[1^3]", nil},
{".*foo.*bar.*", nil},
{`\d*`, nil},
{".", nil},
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
// It would make the code too complex to handle it.
{"/|/bar.*", nil},
{"(.+)/(foo.*|bar$)", nil},
} {
c := c
t.Run(c.pattern, func(t *testing.T) {
t.Parallel()
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
require.NoError(t, err)
matches := stringMatcherFromRegexp(parsed)
require.Equal(t, c.exp, matches)
})
}
}
func RandStringRunes(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}