Merge pull request #14505 from marioferh/improve_performance_regex

[CHANGE] regexp . to match \n and optimize performance
This commit is contained in:
Bryan Boreham 2024-09-18 09:54:16 +01:00 committed by GitHub
commit bb47f78929
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 156 additions and 64 deletions

View file

@ -63,13 +63,13 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
// available, even if the string matcher is faster. // available, even if the string matcher is faster.
m.matchString = m.stringMatcher.Matches m.matchString = m.stringMatcher.Matches
} else { } else {
parsed, err := syntax.Parse(v, syntax.Perl) parsed, err := syntax.Parse(v, syntax.Perl|syntax.DotNL)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Simplify the syntax tree to run faster. // Simplify the syntax tree to run faster.
parsed = parsed.Simplify() parsed = parsed.Simplify()
m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$") m.re, err = regexp.Compile("^(?s:" + parsed.String() + ")$")
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -121,7 +121,7 @@ func TestFastRegexMatcher_MatchString(t *testing.T) {
t.Parallel() t.Parallel()
m, err := NewFastRegexMatcher(r) m, err := NewFastRegexMatcher(r)
require.NoError(t, err) require.NoError(t, err)
re := regexp.MustCompile("^(?:" + r + ")$") re := regexp.MustCompile("^(?s:" + r + ")$")
require.Equal(t, re.MatchString(v), m.MatchString(v)) require.Equal(t, re.MatchString(v), m.MatchString(v))
}) })
} }
@ -167,7 +167,7 @@ func TestOptimizeConcatRegex(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl) parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
prefix, suffix, contains := optimizeConcatRegex(parsed) prefix, suffix, contains := optimizeConcatRegex(parsed)
@ -248,7 +248,7 @@ func TestFindSetMatches(t *testing.T) {
c := c c := c
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {
t.Parallel() t.Parallel()
parsed, err := syntax.Parse(c.pattern, syntax.Perl) parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
matches, actualCaseSensitive := findSetMatches(parsed) matches, actualCaseSensitive := findSetMatches(parsed)
require.Equal(t, c.expMatches, matches) require.Equal(t, c.expMatches, matches)
@ -348,15 +348,15 @@ func TestStringMatcherFromRegexp(t *testing.T) {
pattern string pattern string
exp StringMatcher exp StringMatcher
}{ }{
{".*", anyStringWithoutNewlineMatcher{}}, {".*", trueMatcher{}},
{".*?", anyStringWithoutNewlineMatcher{}}, {".*?", trueMatcher{}},
{"(?s:.*)", trueMatcher{}}, {"(?s:.*)", trueMatcher{}},
{"(.*)", anyStringWithoutNewlineMatcher{}}, {"(.*)", trueMatcher{}},
{"^.*$", anyStringWithoutNewlineMatcher{}}, {"^.*$", trueMatcher{}},
{".+", &anyNonEmptyStringMatcher{matchNL: false}}, {".+", &anyNonEmptyStringMatcher{matchNL: true}},
{"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}}, {"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}},
{"^.+$", &anyNonEmptyStringMatcher{matchNL: false}}, {"^.+$", &anyNonEmptyStringMatcher{matchNL: true}},
{"(.+)", &anyNonEmptyStringMatcher{matchNL: false}}, {"(.+)", &anyNonEmptyStringMatcher{matchNL: true}},
{"", emptyStringMatcher{}}, {"", emptyStringMatcher{}},
{"^$", emptyStringMatcher{}}, {"^$", emptyStringMatcher{}},
{"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}}, {"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}},
@ -366,23 +366,23 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
{"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: anyStringWithoutNewlineMatcher{}}}, {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}},
{"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}},
{"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"10\\.0\\.(1|2)\\.+", nil}, {"10\\.0\\.(1|2)\\.+", nil},
{"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{matchNL: true}, suffix: "foo", suffixCaseSensitive: true}},
{"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}}, {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: trueMatcher{}}},
{"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}, &equalStringMatcher{s: "foo", caseSensitive: true}})},
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: true}}})},
{"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: nil}}, {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: nil}},
// we don't support case insensitive matching for contains. // we don't support case insensitive matching for contains.
// This is because there's no strings.IndexOfFold function. // This is because there's no strings.IndexOfFold function.
// We can revisit later if this is really popular by using strings.ToUpper. // We can revisit later if this is really popular by using strings.ToUpper.
@ -393,15 +393,15 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{".*foo.*bar.*", nil}, {".*foo.*bar.*", nil},
{`\d*`, nil}, {`\d*`, nil},
{".", nil}, {".", nil},
{"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}}, {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}},
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
// It would make the code too complex to handle it. // It would make the code too complex to handle it.
{"(.+)/(foo.*|bar$)", nil}, {"(.+)/(foo.*|bar$)", nil},
// Case sensitive alternate with same literal prefix and .* suffix. // Case sensitive alternate with same literal prefix and .* suffix.
{"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}}, {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: trueMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: trueMatcher{}}}}},
// Case insensitive alternate with same literal prefix and .* suffix. // Case insensitive alternate with same literal prefix and .* suffix.
{"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}},
{"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}},
// Concatenated variable length selectors are not supported. // Concatenated variable length selectors are not supported.
{"foo.*.*", nil}, {"foo.*.*", nil},
{"foo.+.+", nil}, {"foo.+.+", nil},
@ -410,15 +410,15 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{"aaa.?.?", nil}, {"aaa.?.?", nil},
{"aaa.?.*", nil}, {"aaa.?.*", nil},
// Regexps with ".?". // Regexps with ".?".
{"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
{"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
{"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}},
{"f.?o", nil}, {"f.?o", nil},
} { } {
c := c c := c
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {
t.Parallel() t.Parallel()
parsed, err := syntax.Parse(c.pattern, syntax.Perl) parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
matches := stringMatcherFromRegexp(parsed) matches := stringMatcherFromRegexp(parsed)
require.Equal(t, c.exp, matches) require.Equal(t, c.exp, matches)
@ -437,16 +437,16 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) {
{ {
pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)",
expectedLiteralPrefixMatchers: 3, expectedLiteralPrefixMatchers: 3,
expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX"}, expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"},
expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "xyz-016a-ixb-dp\n"}, expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"},
}, },
// Case insensitive. // Case insensitive.
{ {
pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)",
expectedLiteralPrefixMatchers: 3, expectedLiteralPrefixMatchers: 3,
expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX"}, expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"},
expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp", "xyz-016a-ixb-dp\n"}, expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp"},
}, },
// Nested literal prefixes, case sensitive. // Nested literal prefixes, case sensitive.
@ -474,13 +474,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) {
}, },
} { } {
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {
parsed, err := syntax.Parse(c.pattern, syntax.Perl) parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
matcher := stringMatcherFromRegexp(parsed) matcher := stringMatcherFromRegexp(parsed)
require.NotNil(t, matcher) require.NotNil(t, matcher)
re := regexp.MustCompile("^" + c.pattern + "$") re := regexp.MustCompile("^(?s:" + c.pattern + ")$")
// Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher. // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher.
numPrefixMatchers := 0 numPrefixMatchers := 0
@ -523,16 +523,16 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) {
{ {
pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)",
expectedLiteralSuffixMatchers: 2, expectedLiteralSuffixMatchers: 2,
expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op"}, expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"},
expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"},
}, },
// Case insensitive. // Case insensitive.
{ {
pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)",
expectedLiteralSuffixMatchers: 2, expectedLiteralSuffixMatchers: 2,
expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op"}, expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"},
expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp"},
}, },
// Nested literal suffixes, case sensitive. // Nested literal suffixes, case sensitive.
@ -552,13 +552,13 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) {
}, },
} { } {
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {
parsed, err := syntax.Parse(c.pattern, syntax.Perl) parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
matcher := stringMatcherFromRegexp(parsed) matcher := stringMatcherFromRegexp(parsed)
require.NotNil(t, matcher) require.NotNil(t, matcher)
re := regexp.MustCompile("^" + c.pattern + "$") re := regexp.MustCompile("^(?s:" + c.pattern + ")$")
// Pre-condition check: ensure it contains literalSuffixStringMatcher. // Pre-condition check: ensure it contains literalSuffixStringMatcher.
numSuffixMatchers := 0 numSuffixMatchers := 0
@ -598,26 +598,26 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) {
{ {
pattern: "test.?", pattern: "test.?",
expectedZeroOrOneMatchers: 1, expectedZeroOrOneMatchers: 1,
expectedMatches: []string{"test", "test!"}, expectedMatches: []string{"test\n", "test", "test!"},
expectedNotMatches: []string{"test\n", "tes", "test!!"}, expectedNotMatches: []string{"tes", "test!!"},
}, },
{ {
pattern: ".?test", pattern: ".?test",
expectedZeroOrOneMatchers: 1, expectedZeroOrOneMatchers: 1,
expectedMatches: []string{"test", "!test"}, expectedMatches: []string{"\ntest", "test", "!test"},
expectedNotMatches: []string{"\ntest", "tes", "test!"}, expectedNotMatches: []string{"tes", "test!"},
}, },
{ {
pattern: "(aaa.?|bbb.?)", pattern: "(aaa.?|bbb.?)",
expectedZeroOrOneMatchers: 2, expectedZeroOrOneMatchers: 2,
expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"}, expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX", "aaa\n", "bbb\n"},
expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"}, expectedNotMatches: []string{"aa", "aaaXX", "bb", "bbbXX"},
}, },
{ {
pattern: ".*aaa.?", pattern: ".*aaa.?",
expectedZeroOrOneMatchers: 1, expectedZeroOrOneMatchers: 1,
expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"}, expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX", "XXXaaa\n"},
expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"}, expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX"},
}, },
// Match newline. // Match newline.
@ -632,18 +632,18 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) {
{ {
pattern: "(aaa.?|((?s).?bbb.+))", pattern: "(aaa.?|((?s).?bbb.+))",
expectedZeroOrOneMatchers: 2, expectedZeroOrOneMatchers: 2,
expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"}, expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX", "aaa\n"},
expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"}, expectedNotMatches: []string{"aa", "Xbbb", "\nbbb"},
}, },
} { } {
t.Run(c.pattern, func(t *testing.T) { t.Run(c.pattern, func(t *testing.T) {
parsed, err := syntax.Parse(c.pattern, syntax.Perl) parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
require.NoError(t, err) require.NoError(t, err)
matcher := stringMatcherFromRegexp(parsed) matcher := stringMatcherFromRegexp(parsed)
require.NotNil(t, matcher) require.NotNil(t, matcher)
re := regexp.MustCompile("^" + c.pattern + "$") re := regexp.MustCompile("^(?s:" + c.pattern + ")$")
// Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher. // Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher.
numZeroOrOneMatchers := 0 numZeroOrOneMatchers := 0
@ -1112,7 +1112,7 @@ func BenchmarkOptimizeEqualOrPrefixStringMatchers(b *testing.B) {
} }
b.Logf("regexp: %s", re) b.Logf("regexp: %s", re)
parsed, err := syntax.Parse(re, syntax.Perl) parsed, err := syntax.Parse(re, syntax.Perl|syntax.DotNL)
require.NoError(b, err) require.NoError(b, err)
unoptimized := stringMatcherFromRegexpInternal(parsed) unoptimized := stringMatcherFromRegexpInternal(parsed)

View file

@ -171,7 +171,7 @@ type Regexp struct {
// NewRegexp creates a new anchored Regexp and returns an error if the // NewRegexp creates a new anchored Regexp and returns an error if the
// passed-in regular expression does not compile. // passed-in regular expression does not compile.
func NewRegexp(s string) (Regexp, error) { func NewRegexp(s string) (Regexp, error) {
regex, err := regexp.Compile("^(?:" + s + ")$") regex, err := regexp.Compile("^(?s:" + s + ")$")
return Regexp{Regexp: regex}, err return Regexp{Regexp: regex}, err
} }
@ -218,8 +218,8 @@ func (re Regexp) String() string {
} }
str := re.Regexp.String() str := re.Regexp.String()
// Trim the anchor `^(?:` prefix and `)$` suffix. // Trim the anchor `^(?s:` prefix and `)$` suffix.
return str[4 : len(str)-2] return str[5 : len(str)-2]
} }
// Process returns a relabeled version of the given label set. The relabel configurations // Process returns a relabeled version of the given label set. The relabel configurations

View file

@ -569,6 +569,29 @@ func TestRelabel(t *testing.T) {
}, },
drop: true, drop: true,
}, },
{
input: labels.FromMap(map[string]string{
"a": "line1\nline2",
"b": "bar",
"c": "baz",
}),
relabel: []*Config{
{
SourceLabels: model.LabelNames{"a"},
Regex: MustNewRegexp("line1.*line2"),
TargetLabel: "d",
Separator: ";",
Replacement: "match${1}",
Action: Replace,
},
},
output: labels.FromMap(map[string]string{
"a": "line1\nline2",
"b": "bar",
"c": "baz",
"d": "match",
}),
},
} }
for _, test := range tests { for _, test := range tests {

View file

@ -1480,7 +1480,7 @@ func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressio
regexStr = stringFromArg(args[4]) regexStr = stringFromArg(args[4])
) )
regex, err := regexp.Compile("^(?:" + regexStr + ")$") regex, err := regexp.Compile("^(?s:" + regexStr + ")$")
if err != nil { if err != nil {
panic(fmt.Errorf("invalid regular expression in label_replace(): %s", regexStr)) panic(fmt.Errorf("invalid regular expression in label_replace(): %s", regexStr))
} }

View file

@ -254,6 +254,10 @@ func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matc
return nil, err return nil, err
} }
its = append(its, allPostings) its = append(its, allPostings)
case m.Type == labels.MatchRegexp && m.Value == ".*":
// .* regexp matches any string: do nothing.
case m.Type == labels.MatchNotRegexp && m.Value == ".*":
return index.EmptyPostings(), nil
case labelMustBeSet[m.Name]: case labelMustBeSet[m.Name]:
// If this matcher must be non-empty, we can be smarter. // If this matcher must be non-empty, we can be smarter.
matchesEmpty := m.Matches("") matchesEmpty := m.Matches("")

View file

@ -2689,6 +2689,7 @@ func TestPostingsForMatchers(t *testing.T) {
app.Append(0, labels.FromStrings("n", "1"), 0, 0) app.Append(0, labels.FromStrings("n", "1"), 0, 0)
app.Append(0, labels.FromStrings("n", "1", "i", "a"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "a"), 0, 0)
app.Append(0, labels.FromStrings("n", "1", "i", "b"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "b"), 0, 0)
app.Append(0, labels.FromStrings("n", "1", "i", "\n"), 0, 0)
app.Append(0, labels.FromStrings("n", "2"), 0, 0) app.Append(0, labels.FromStrings("n", "2"), 0, 0)
app.Append(0, labels.FromStrings("n", "2.5"), 0, 0) app.Append(0, labels.FromStrings("n", "2.5"), 0, 0)
require.NoError(t, app.Commit()) require.NoError(t, app.Commit())
@ -2704,6 +2705,7 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2722,6 +2724,7 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
labels.FromStrings("n", "2"), labels.FromStrings("n", "2"),
labels.FromStrings("n", "2.5"), labels.FromStrings("n", "2.5"),
}, },
@ -2739,6 +2742,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2750,6 +2754,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2757,6 +2762,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
// Regex. // Regex.
@ -2766,6 +2772,7 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2801,6 +2808,7 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2808,6 +2816,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
// Not regex. // Not regex.
@ -2816,6 +2825,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2849,12 +2859,14 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a?$")}, matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a?$")},
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2862,6 +2874,7 @@ func TestPostingsForMatchers(t *testing.T) {
exp: []labels.Labels{ exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
}, },
}, },
{ {
@ -2895,6 +2908,7 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1"), labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
labels.FromStrings("n", "2"), labels.FromStrings("n", "2"),
}, },
}, },
@ -2942,6 +2956,57 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "2.5"), labels.FromStrings("n", "2.5"),
}, },
}, },
// Test shortcut for i=~".*"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
labels.FromStrings("n", "2"),
labels.FromStrings("n", "2.5"),
},
},
// Test shortcut for n=~".*" and i=~"^.*$"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "1", "i", "\n"),
labels.FromStrings("n", "2"),
labels.FromStrings("n", "2.5"),
},
},
// Test shortcut for n=~"^.*$"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchEqual, "i", "a")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
},
},
// Test shortcut for i!~".*"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")},
exp: []labels.Labels{},
},
// Test shortcut for n!~"^.*$", i!~".*". First one triggers empty result.
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")},
exp: []labels.Labels{},
},
// Test shortcut i!~".*"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")},
exp: []labels.Labels{},
},
// Test shortcut i!~"^.*$"
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*$")},
exp: []labels.Labels{},
},
} }
ir, err := h.Index() ir, err := h.Index()