2020-06-26 02:49:09 -07:00
|
|
|
// Copyright 2020 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package labels
|
|
|
|
|
|
|
|
import (
|
2021-10-07 05:25:31 -07:00
|
|
|
"math/rand"
|
2021-10-06 06:24:57 -07:00
|
|
|
"regexp"
|
2020-06-26 02:49:09 -07:00
|
|
|
"regexp/syntax"
|
2021-10-07 05:25:31 -07:00
|
|
|
"strings"
|
2020-06-26 02:49:09 -07:00
|
|
|
"testing"
|
2021-10-07 05:25:31 -07:00
|
|
|
"time"
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2020-10-29 02:43:23 -07:00
|
|
|
"github.com/stretchr/testify/require"
|
2020-06-26 02:49:09 -07:00
|
|
|
)
|
|
|
|
|
2021-10-07 05:25:31 -07:00
|
|
|
func init() {
|
|
|
|
rand.Seed(time.Now().UnixNano())
|
|
|
|
}
|
|
|
|
|
2021-10-06 06:24:57 -07:00
|
|
|
var (
|
2021-10-07 05:25:31 -07:00
|
|
|
letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
|
|
regexes = []string{
|
2021-10-06 06:24:57 -07:00
|
|
|
"(foo|bar)",
|
|
|
|
"foo.*",
|
|
|
|
".*foo",
|
|
|
|
"^.*foo$",
|
|
|
|
"^.+foo$",
|
|
|
|
".*",
|
|
|
|
".+",
|
|
|
|
"foo.+",
|
|
|
|
".+foo",
|
|
|
|
"foo\n.+",
|
|
|
|
"foo\n.*",
|
|
|
|
".*foo.*",
|
|
|
|
".+foo.+",
|
|
|
|
"",
|
2021-10-07 05:25:31 -07:00
|
|
|
"(?s:.*)",
|
|
|
|
"(?s:.+)",
|
|
|
|
"(?s:^.*foo$)",
|
2021-10-07 06:10:26 -07:00
|
|
|
"^(?i:foo|oo)|(bar)$",
|
2021-10-07 05:25:31 -07:00
|
|
|
"((.*)(bar|b|buzz)(.+)|foo)$",
|
|
|
|
"^$",
|
|
|
|
"(prometheus|api_prom)_api_v1_.+",
|
|
|
|
"10\\.0\\.(1|2)\\.+",
|
2021-10-07 06:10:26 -07:00
|
|
|
"10\\.0\\.(1|2).+",
|
|
|
|
"((fo(bar))|.+foo)",
|
2021-10-07 05:25:31 -07:00
|
|
|
}
|
|
|
|
values = []string{
|
|
|
|
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
2021-10-07 06:10:26 -07:00
|
|
|
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
2021-10-07 05:25:31 -07:00
|
|
|
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
2021-10-06 06:24:57 -07:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2020-06-26 02:49:09 -07:00
|
|
|
func TestNewFastRegexMatcher(t *testing.T) {
|
2021-10-06 06:24:57 -07:00
|
|
|
for _, r := range regexes {
|
|
|
|
r := r
|
|
|
|
for _, v := range values {
|
|
|
|
v := v
|
|
|
|
t.Run(r+` on "`+v+`"`, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
m, err := NewFastRegexMatcher(r)
|
|
|
|
require.NoError(t, err)
|
|
|
|
re, err := regexp.Compile("^(?:" + r + ")$")
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, re.MatchString(v), m.MatchString(v))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
2021-10-06 06:24:57 -07:00
|
|
|
}
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2021-10-06 06:24:57 -07:00
|
|
|
func BenchmarkNewFastRegexMatcher(b *testing.B) {
|
2021-10-07 05:25:31 -07:00
|
|
|
benchValues := append(values,
|
|
|
|
RandStringRunes(128), RandStringRunes(256), RandStringRunes(1024))
|
2021-10-06 06:24:57 -07:00
|
|
|
for _, r := range regexes {
|
|
|
|
r := r
|
2021-10-07 05:25:31 -07:00
|
|
|
b.Run(r, func(b *testing.B) {
|
|
|
|
m, err := NewFastRegexMatcher(r)
|
|
|
|
require.NoError(b, err)
|
|
|
|
b.ResetTimer()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for _, v := range benchValues {
|
|
|
|
_ = m.MatchString(v)
|
2021-10-06 06:24:57 -07:00
|
|
|
}
|
2021-10-07 05:25:31 -07:00
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestOptimizeConcatRegex(t *testing.T) {
|
|
|
|
cases := []struct {
|
2020-07-07 01:38:04 -07:00
|
|
|
regex string
|
|
|
|
prefix string
|
|
|
|
suffix string
|
|
|
|
contains string
|
2020-06-26 02:49:09 -07:00
|
|
|
}{
|
2020-07-07 01:38:04 -07:00
|
|
|
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""},
|
|
|
|
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""},
|
|
|
|
{regex: "foo.*", prefix: "foo", suffix: "", contains: ""},
|
|
|
|
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"},
|
|
|
|
{regex: ".*foo", prefix: "", suffix: "foo", contains: ""},
|
|
|
|
{regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""},
|
|
|
|
{regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"},
|
|
|
|
{regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"},
|
|
|
|
{regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*[abc].*", prefix: "", suffix: "", contains: ""},
|
2020-10-06 05:16:26 -07:00
|
|
|
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: ""},
|
|
|
|
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: "def"},
|
|
|
|
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: "abc"},
|
|
|
|
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: "abc"},
|
2020-10-12 04:17:29 -07:00
|
|
|
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: "bc"},
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, c := range cases {
|
|
|
|
parsed, err := syntax.Parse(c.regex, syntax.Perl)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-06-26 02:49:09 -07:00
|
|
|
|
2020-07-07 01:38:04 -07:00
|
|
|
prefix, suffix, contains := optimizeConcatRegex(parsed)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.Equal(t, c.prefix, prefix)
|
|
|
|
require.Equal(t, c.suffix, suffix)
|
|
|
|
require.Equal(t, c.contains, contains)
|
2020-06-26 02:49:09 -07:00
|
|
|
}
|
|
|
|
}
|
2021-10-05 04:43:41 -07:00
|
|
|
|
|
|
|
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
|
|
|
func TestFindSetMatches(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
exp []string
|
|
|
|
}{
|
|
|
|
// Single value, coming from a `bar=~"foo"` selector.
|
|
|
|
{"foo", []string{"foo"}},
|
2021-10-06 07:22:48 -07:00
|
|
|
{"^foo", []string{"foo"}},
|
|
|
|
{"^foo$", []string{"foo"}},
|
2021-10-05 04:43:41 -07:00
|
|
|
// Simple sets alternates.
|
|
|
|
{"foo|bar|zz", []string{"foo", "bar", "zz"}},
|
2021-10-06 07:44:26 -07:00
|
|
|
// Simple sets alternate and concat (bar|baz is parsed as "ba[rz]").
|
2021-10-05 04:43:41 -07:00
|
|
|
{"foo|bar|baz", []string{"foo", "bar", "baz"}},
|
|
|
|
// Simple sets alternate and concat and capture
|
|
|
|
{"foo|bar|baz|(zz)", []string{"foo", "bar", "baz", "zz"}},
|
|
|
|
// Simple sets alternate and concat and alternates with empty matches
|
|
|
|
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
|
|
|
|
{"bar|b|buzz", []string{"bar", "b", "buzz"}},
|
2021-10-06 07:22:48 -07:00
|
|
|
// Skip anchors it's enforced anyway at the root.
|
|
|
|
{"(^bar$)|(b$)|(^buzz)", []string{"bar", "b", "buzz"}},
|
2021-10-05 04:43:41 -07:00
|
|
|
// Simple sets containing escaped characters.
|
|
|
|
{"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}},
|
2021-10-05 06:59:40 -07:00
|
|
|
// using charclass
|
2021-10-05 06:16:21 -07:00
|
|
|
{"[abc]d", []string{"ad", "bd", "cd"}},
|
2021-10-05 04:43:41 -07:00
|
|
|
// high low charset different => A(B[CD]|EF)|BC[XY]
|
|
|
|
{"ABC|ABD|AEF|BCX|BCY", []string{"ABC", "ABD", "AEF", "BCX", "BCY"}},
|
|
|
|
// triple concat
|
|
|
|
{"api_(v1|prom)_push", []string{"api_v1_push", "api_prom_push"}},
|
|
|
|
// triple concat with multiple alternates
|
|
|
|
{"(api|rpc)_(v1|prom)_push", []string{"api_v1_push", "api_prom_push", "rpc_v1_push", "rpc_prom_push"}},
|
|
|
|
{"(api|rpc)_(v1|prom)_(push|query)", []string{"api_v1_push", "api_v1_query", "api_prom_push", "api_prom_query", "rpc_v1_push", "rpc_v1_query", "rpc_prom_push", "rpc_prom_query"}},
|
2021-10-06 07:22:48 -07:00
|
|
|
// class starting with "-"
|
|
|
|
{"[-1-2][a-c]", []string{"-a", "-b", "-c", "1a", "1b", "1c", "2a", "2b", "2c"}},
|
|
|
|
{"[1^3]", []string{"1", "3", "^"}},
|
2021-10-05 04:43:41 -07:00
|
|
|
// OpPlus with concat
|
|
|
|
{"(.+)/(foo|bar)", nil},
|
|
|
|
// Simple sets containing special characters without escaping.
|
|
|
|
{"fo.o|bar?|^baz", nil},
|
|
|
|
// case sensitive wrapper.
|
|
|
|
{"(?i)foo", nil},
|
|
|
|
// case sensitive wrapper on alternate.
|
|
|
|
{"(?i)foo|bar|baz", nil},
|
|
|
|
// case sensitive wrapper on concat.
|
|
|
|
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
|
|
|
|
// too high charset combination
|
|
|
|
{"(api|rpc)_[^0-9]", nil},
|
2021-10-05 07:46:24 -07:00
|
|
|
// too many combinations
|
|
|
|
{"[a-z][a-z]", nil},
|
2021-10-05 04:43:41 -07:00
|
|
|
} {
|
|
|
|
c := c
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
matches := findSetMatches(parsed, "")
|
|
|
|
require.Equal(t, c.exp, matches)
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
2021-10-07 04:56:31 -07:00
|
|
|
|
|
|
|
func Test_OptimizeRegex(t *testing.T) {
|
|
|
|
for _, c := range []struct {
|
|
|
|
pattern string
|
|
|
|
exp StringMatcher
|
|
|
|
}{
|
2021-10-07 08:15:20 -07:00
|
|
|
// {".*", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
|
|
|
// {".*?", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
|
|
|
// {"(?s:.*)", anyStringMatcher{allowEmpty: true, matchNL: true}},
|
|
|
|
// {"(.*)", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
|
|
|
// {"^.*$", anyStringMatcher{allowEmpty: true, matchNL: false}},
|
|
|
|
// {".+", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
|
|
|
// {"(?s:.+)", anyStringMatcher{allowEmpty: false, matchNL: true}},
|
|
|
|
// {"^.+$", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
|
|
|
// {"(.+)", anyStringMatcher{allowEmpty: false, matchNL: false}},
|
|
|
|
// {"", emptyStringMatcher{}},
|
|
|
|
// {"^$", emptyStringMatcher{}},
|
|
|
|
// {"^foo$", equalStringMatcher{s: "foo", caseSensitive: true}},
|
|
|
|
// {"^(?i:foo)$", equalStringMatcher{s: "FOO", caseSensitive: false}},
|
|
|
|
// {"^(?i:foo)|(bar)$", orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "bar", caseSensitive: true}})},
|
|
|
|
// {"^(?i:foo|oo)|(bar)$", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "FOO", caseSensitive: false}, equalStringMatcher{s: "OO", caseSensitive: false}}), equalStringMatcher{s: "bar", caseSensitive: true}})},
|
|
|
|
// {".*foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"(.*)foo.*", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"(.*)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"(.+)foo(.*)", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"^.+foo.+", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
|
|
|
// {"^(.*)(foo)(.*)$", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"^(.*)(foo|foobar)(.*)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"^(.*)(foo|foobar)(.+)$", containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
|
|
|
// {"^(.*)(bar|b|buzz)(.+)$", containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
|
|
|
// {"10\\.0\\.(1|2)\\.+", nil},
|
|
|
|
// {"10\\.0\\.(1|2).+", containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
|
|
|
// {"^.+foo", containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
|
|
|
|
// {"foo-.*$", containsStringMatcher{substrings: []string{"foo-"}, left: nil, right: anyStringMatcher{allowEmpty: true, matchNL: false}}},
|
|
|
|
// {"(prometheus|api_prom)_api_v1_.+", containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: anyStringMatcher{allowEmpty: false, matchNL: false}}},
|
|
|
|
// {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringMatcher{allowEmpty: true, matchNL: false}, right: anyStringMatcher{allowEmpty: false, matchNL: false}}, equalStringMatcher{s: "foo", caseSensitive: true}})},
|
|
|
|
// {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{equalStringMatcher{s: "fobar", caseSensitive: true}}), containsStringMatcher{substrings: []string{"foo"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}})},
|
|
|
|
// {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: anyStringMatcher{allowEmpty: false, matchNL: false}, right: nil}},
|
|
|
|
// // we don't support case insensitive matching for contains.
|
|
|
|
// // This is because there's no strings.IndexOfFold function.
|
|
|
|
// // We can revisit later if this is really popular by using strings.ToUpper.
|
|
|
|
// {"^(.*)((?i)foo|foobar)(.*)$", nil},
|
|
|
|
// {"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
|
|
|
|
// {"[a-z][a-z]", nil},
|
|
|
|
// {"[1^3]", nil},
|
|
|
|
// // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
|
|
|
|
// // It would make the code too complex to handle it.
|
|
|
|
// {"/|/bar.*", nil},
|
|
|
|
// {"(.+)/(foo.*|bar$)", nil},
|
2021-10-07 04:56:31 -07:00
|
|
|
} {
|
|
|
|
c := c
|
|
|
|
t.Run(c.pattern, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
|
|
|
require.NoError(t, err)
|
|
|
|
matches := stringMatcherFromRegexp(parsed)
|
|
|
|
require.Equal(t, c.exp, matches)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2021-10-07 05:25:31 -07:00
|
|
|
|
|
|
|
func RandStringRunes(n int) string {
|
|
|
|
b := make([]rune, n)
|
|
|
|
for i := range b {
|
|
|
|
b[i] = letterRunes[rand.Intn(len(letterRunes))]
|
|
|
|
}
|
|
|
|
return string(b)
|
|
|
|
}
|