FastRegexMatcher: small optimization for the literal prefix case

Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
Marco Pracucci 2024-07-01 10:12:50 +02:00
parent f4b1fcb73e
commit ec31acaf02
No known key found for this signature in database
GPG key ID: 74C1BD403D2DF9B5
2 changed files with 63 additions and 33 deletions

View file

@ -549,11 +549,7 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
// Right matcher with 1 fixed set match. // Right matcher with 1 fixed set match.
case left == nil && len(matches) == 1: case left == nil && len(matches) == 1:
return &literalPrefixStringMatcher{ return newLiteralPrefixStringMatcher(matches[0], matchesCaseSensitive, right)
prefix: matches[0],
prefixCaseSensitive: matchesCaseSensitive,
right: right,
}
// Left matcher with 1 fixed set match. // Left matcher with 1 fixed set match.
case right == nil && len(matches) == 1: case right == nil && len(matches) == 1:
@ -631,21 +627,47 @@ func (m *containsStringMatcher) Matches(s string) bool {
return false return false
} }
// literalPrefixStringMatcher matches a string with the given literal prefix and right side matcher. func newLiteralPrefixStringMatcher(prefix string, prefixCaseSensitive bool, right StringMatcher) StringMatcher {
type literalPrefixStringMatcher struct { if prefixCaseSensitive {
prefix string return &literalPrefixSensitiveStringMatcher{
prefixCaseSensitive bool prefix: prefix,
right: right,
}
}
return &literalPrefixInsensitiveStringMatcher{
prefix: prefix,
right: right,
}
}
// literalPrefixSensitiveStringMatcher matches a string with the given literal case-sensitive prefix and right side matcher.
type literalPrefixSensitiveStringMatcher struct {
prefix string
// The matcher that must match the right side. Can be nil. // The matcher that must match the right side. Can be nil.
right StringMatcher right StringMatcher
} }
func (m *literalPrefixStringMatcher) Matches(s string) bool { func (m *literalPrefixSensitiveStringMatcher) Matches(s string) bool {
// Ensure the prefix matches. if !strings.HasPrefix(s, m.prefix) {
if m.prefixCaseSensitive && !strings.HasPrefix(s, m.prefix) {
return false return false
} }
if !m.prefixCaseSensitive && !hasPrefixCaseInsensitive(s, m.prefix) {
// Ensure the right side matches.
return m.right.Matches(s[len(m.prefix):])
}
// literalPrefixInsensitiveStringMatcher matches a string with the given literal case-insensitive prefix and right side matcher.
type literalPrefixInsensitiveStringMatcher struct {
prefix string
// The matcher that must match the right side. Can be nil.
right StringMatcher
}
func (m *literalPrefixInsensitiveStringMatcher) Matches(s string) bool {
if !hasPrefixCaseInsensitive(s, m.prefix) {
return false return false
} }

View file

@ -376,7 +376,7 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{"10\\.0\\.(1|2)\\.+", nil}, {"10\\.0\\.(1|2)\\.+", nil},
{"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}},
{"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}},
{"foo-.*$", &literalPrefixStringMatcher{prefix: "foo-", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}, {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}},
{"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}},
{"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})},
{"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})},
@ -391,15 +391,15 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{".*foo.*bar.*", nil}, {".*foo.*bar.*", nil},
{`\d*`, nil}, {`\d*`, nil},
{".", nil}, {".", nil},
{"/|/bar.*", &literalPrefixStringMatcher{prefix: "/", prefixCaseSensitive: true, right: orStringMatcher{emptyStringMatcher{}, &literalPrefixStringMatcher{prefix: "bar", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}}, {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}},
// This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat.
// It would make the code too complex to handle it. // It would make the code too complex to handle it.
{"(.+)/(foo.*|bar$)", nil}, {"(.+)/(foo.*|bar$)", nil},
// Case sensitive alternate with same literal prefix and .* suffix. // Case sensitive alternate with same literal prefix and .* suffix.
{"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "xyz-016a-ixb-", prefixCaseSensitive: true, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "dp", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "op", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}}, {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}},
// Case insensitive alternate with same literal prefix and .* suffix. // Case insensitive alternate with same literal prefix and .* suffix.
{"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}}, {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}},
{"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}}, {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}},
// Concatenated variable length selectors are not supported. // Concatenated variable length selectors are not supported.
{"foo.*.*", nil}, {"foo.*.*", nil},
{"foo.+.+", nil}, {"foo.+.+", nil},
@ -408,9 +408,9 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{"aaa.?.?", nil}, {"aaa.?.?", nil},
{"aaa.?.*", nil}, {"aaa.?.*", nil},
// Regexps with ".?". // Regexps with ".?".
{"ext.?|xfs", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
{"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}},
{"foo.?", &literalPrefixStringMatcher{prefix: "foo", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}},
{"f.?o", nil}, {"f.?o", nil},
} { } {
c := c c := c
@ -480,10 +480,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) {
re := regexp.MustCompile("^" + c.pattern + "$") re := regexp.MustCompile("^" + c.pattern + "$")
// Pre-condition check: ensure it contains literalPrefixStringMatcher. // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher.
numPrefixMatchers := 0 numPrefixMatchers := 0
visitStringMatcher(matcher, func(matcher StringMatcher) { visitStringMatcher(matcher, func(matcher StringMatcher) {
if _, ok := matcher.(*literalPrefixStringMatcher); ok { if _, ok := matcher.(*literalPrefixSensitiveStringMatcher); ok {
numPrefixMatchers++
}
if _, ok := matcher.(*literalPrefixInsensitiveStringMatcher); ok {
numPrefixMatchers++ numPrefixMatchers++
} }
}) })
@ -1074,20 +1077,14 @@ func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) {
} }
} }
func TestLiteralPrefixStringMatcher(t *testing.T) { func TestLiteralPrefixSensitiveStringMatcher(t *testing.T) {
m := &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &emptyStringMatcher{}} m := &literalPrefixSensitiveStringMatcher{prefix: "mar", right: &emptyStringMatcher{}}
require.True(t, m.Matches("mar")) require.True(t, m.Matches("mar"))
require.False(t, m.Matches("marco")) require.False(t, m.Matches("marco"))
require.False(t, m.Matches("ma")) require.False(t, m.Matches("ma"))
require.False(t, m.Matches("mAr")) require.False(t, m.Matches("mAr"))
m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: false, right: &emptyStringMatcher{}} m = &literalPrefixSensitiveStringMatcher{prefix: "mar", right: &equalStringMatcher{s: "co", caseSensitive: false}}
require.True(t, m.Matches("mar"))
require.False(t, m.Matches("marco"))
require.False(t, m.Matches("ma"))
require.True(t, m.Matches("mAr"))
m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &equalStringMatcher{s: "co", caseSensitive: false}}
require.True(t, m.Matches("marco")) require.True(t, m.Matches("marco"))
require.True(t, m.Matches("marCO")) require.True(t, m.Matches("marCO"))
require.False(t, m.Matches("MARco")) require.False(t, m.Matches("MARco"))
@ -1095,6 +1092,14 @@ func TestLiteralPrefixStringMatcher(t *testing.T) {
require.False(t, m.Matches("marcopracucci")) require.False(t, m.Matches("marcopracucci"))
} }
func TestLiteralPrefixInsensitiveStringMatcher(t *testing.T) {
m := &literalPrefixInsensitiveStringMatcher{prefix: "mar", right: &emptyStringMatcher{}}
require.True(t, m.Matches("mar"))
require.False(t, m.Matches("marco"))
require.False(t, m.Matches("ma"))
require.True(t, m.Matches("mAr"))
}
func TestLiteralSuffixStringMatcher(t *testing.T) { func TestLiteralSuffixStringMatcher(t *testing.T) {
m := &literalSuffixStringMatcher{left: &emptyStringMatcher{}, suffix: "co", suffixCaseSensitive: true} m := &literalSuffixStringMatcher{left: &emptyStringMatcher{}, suffix: "co", suffixCaseSensitive: true}
require.True(t, m.Matches("co")) require.True(t, m.Matches("co"))
@ -1184,7 +1189,10 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch
visitStringMatcher(casted.right, callback) visitStringMatcher(casted.right, callback)
} }
case *literalPrefixStringMatcher: case *literalPrefixSensitiveStringMatcher:
visitStringMatcher(casted.right, callback)
case *literalPrefixInsensitiveStringMatcher:
visitStringMatcher(casted.right, callback) visitStringMatcher(casted.right, callback)
case *literalSuffixStringMatcher: case *literalSuffixStringMatcher: