mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Merge pull request #13461 from pracucci/upstream-fastregexmatcher
Further optimise FastRegexMatcher
This commit is contained in:
commit
5540d34d94
2
go.mod
2
go.mod
|
@ -74,7 +74,6 @@ require (
|
||||||
go.uber.org/automaxprocs v1.5.3
|
go.uber.org/automaxprocs v1.5.3
|
||||||
go.uber.org/goleak v1.3.0
|
go.uber.org/goleak v1.3.0
|
||||||
go.uber.org/multierr v1.11.0
|
go.uber.org/multierr v1.11.0
|
||||||
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
|
|
||||||
golang.org/x/net v0.22.0
|
golang.org/x/net v0.22.0
|
||||||
golang.org/x/oauth2 v0.18.0
|
golang.org/x/oauth2 v0.18.0
|
||||||
golang.org/x/sync v0.6.0
|
golang.org/x/sync v0.6.0
|
||||||
|
@ -186,6 +185,7 @@ require (
|
||||||
go.opentelemetry.io/otel/metric v1.24.0 // indirect
|
go.opentelemetry.io/otel/metric v1.24.0 // indirect
|
||||||
go.opentelemetry.io/proto/otlp v1.1.0 // indirect
|
go.opentelemetry.io/proto/otlp v1.1.0 // indirect
|
||||||
golang.org/x/crypto v0.21.0 // indirect
|
golang.org/x/crypto v0.21.0 // indirect
|
||||||
|
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
|
||||||
golang.org/x/mod v0.16.0 // indirect
|
golang.org/x/mod v0.16.0 // indirect
|
||||||
golang.org/x/term v0.18.0 // indirect
|
golang.org/x/term v0.18.0 // indirect
|
||||||
golang.org/x/text v0.14.0 // indirect
|
golang.org/x/text v0.14.0 // indirect
|
||||||
|
|
|
@ -118,3 +118,30 @@ func (m *Matcher) GetRegexString() string {
|
||||||
}
|
}
|
||||||
return m.re.GetRegexString()
|
return m.re.GetRegexString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetMatches returns a set of equality matchers for the current regex matchers if possible.
|
||||||
|
// For examples the regexp `a(b|f)` will returns "ab" and "af".
|
||||||
|
// Returns nil if we can't replace the regexp by only equality matchers.
|
||||||
|
func (m *Matcher) SetMatches() []string {
|
||||||
|
if m.re == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return m.re.SetMatches()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefix returns the required prefix of the value to match, if possible.
|
||||||
|
// It will be empty if it's an equality matcher or if the prefix can't be determined.
|
||||||
|
func (m *Matcher) Prefix() string {
|
||||||
|
if m.re == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return m.re.prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsRegexOptimized returns whether regex is optimized.
|
||||||
|
func (m *Matcher) IsRegexOptimized() bool {
|
||||||
|
if m.re == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return m.re.IsOptimized()
|
||||||
|
}
|
||||||
|
|
|
@ -14,13 +14,14 @@
|
||||||
package labels
|
package labels
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher {
|
func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher {
|
||||||
m, err := NewMatcher(mType, "", value)
|
m, err := NewMatcher(mType, "test_label_name", value)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
@ -81,6 +82,21 @@ func TestMatcher(t *testing.T) {
|
||||||
value: "foo-bar",
|
value: "foo-bar",
|
||||||
match: false,
|
match: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "$*bar"),
|
||||||
|
value: "foo-bar",
|
||||||
|
match: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "bar^+"),
|
||||||
|
value: "foo-bar",
|
||||||
|
match: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "$+bar"),
|
||||||
|
value: "foo-bar",
|
||||||
|
match: false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
@ -118,6 +134,82 @@ func TestInverse(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPrefix(t *testing.T) {
|
||||||
|
for i, tc := range []struct {
|
||||||
|
matcher *Matcher
|
||||||
|
prefix string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchEqual, "abc"),
|
||||||
|
prefix: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchNotEqual, "abc"),
|
||||||
|
prefix: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abc.+"),
|
||||||
|
prefix: "abc",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abcd|abc.+"),
|
||||||
|
prefix: "abc",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchNotRegexp, "abcd|abc.+"),
|
||||||
|
prefix: "abc",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abc(def|ghj)|ab|a."),
|
||||||
|
prefix: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "foo.+bar|foo.*baz"),
|
||||||
|
prefix: "foo",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abc|.*"),
|
||||||
|
prefix: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abc|def"),
|
||||||
|
prefix: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, ".+def"),
|
||||||
|
prefix: "",
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
|
||||||
|
require.Equal(t, tc.prefix, tc.matcher.Prefix())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsRegexOptimized(t *testing.T) {
|
||||||
|
for i, tc := range []struct {
|
||||||
|
matcher *Matcher
|
||||||
|
isRegexOptimized bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchEqual, "abc"),
|
||||||
|
isRegexOptimized: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "."),
|
||||||
|
isRegexOptimized: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
matcher: mustNewMatcher(t, MatchRegexp, "abc.+"),
|
||||||
|
isRegexOptimized: true,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
|
||||||
|
require.Equal(t, tc.isRegexOptimized, tc.matcher.IsRegexOptimized())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkMatchType_String(b *testing.B) {
|
func BenchmarkMatchType_String(b *testing.B) {
|
||||||
for i := 0; i <= b.N; i++ {
|
for i := 0; i <= b.N; i++ {
|
||||||
_ = MatchType(i % int(MatchNotRegexp+1)).String()
|
_ = MatchType(i % int(MatchNotRegexp+1)).String()
|
||||||
|
|
|
@ -14,73 +14,348 @@
|
||||||
package labels
|
package labels
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/grafana/regexp"
|
"github.com/grafana/regexp"
|
||||||
"github.com/grafana/regexp/syntax"
|
"github.com/grafana/regexp/syntax"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FastRegexMatcher struct {
|
const (
|
||||||
re *regexp.Regexp
|
maxSetMatches = 256
|
||||||
prefix string
|
|
||||||
suffix string
|
|
||||||
contains string
|
|
||||||
|
|
||||||
// shortcut for literals
|
// The minimum number of alternate values a regex should have to trigger
|
||||||
literal bool
|
// the optimization done by optimizeEqualStringMatchers() and so use a map
|
||||||
value string
|
// to match values instead of iterating over a list. This value has
|
||||||
|
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
||||||
|
minEqualMultiStringMatcherMapThreshold = 16
|
||||||
|
)
|
||||||
|
|
||||||
|
type FastRegexMatcher struct {
|
||||||
|
// Under some conditions, re is nil because the expression is never parsed.
|
||||||
|
// We store the original string to be able to return it in GetRegexString().
|
||||||
|
reString string
|
||||||
|
re *regexp.Regexp
|
||||||
|
|
||||||
|
setMatches []string
|
||||||
|
stringMatcher StringMatcher
|
||||||
|
prefix string
|
||||||
|
suffix string
|
||||||
|
contains string
|
||||||
|
|
||||||
|
// matchString is the "compiled" function to run by MatchString().
|
||||||
|
matchString func(string) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
||||||
if isLiteral(v) {
|
|
||||||
return &FastRegexMatcher{literal: true, value: v}, nil
|
|
||||||
}
|
|
||||||
re, err := regexp.Compile("^(?:" + v + ")$")
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
parsed, err := syntax.Parse(v, syntax.Perl)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
m := &FastRegexMatcher{
|
m := &FastRegexMatcher{
|
||||||
re: re,
|
reString: v,
|
||||||
}
|
}
|
||||||
|
|
||||||
if parsed.Op == syntax.OpConcat {
|
m.stringMatcher, m.setMatches = optimizeAlternatingLiterals(v)
|
||||||
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
if m.stringMatcher != nil {
|
||||||
|
// If we already have a string matcher, we don't need to parse the regex
|
||||||
|
// or compile the matchString function. This also avoids the behavior in
|
||||||
|
// compileMatchStringFunction where it prefers to use setMatches when
|
||||||
|
// available, even if the string matcher is faster.
|
||||||
|
m.matchString = m.stringMatcher.Matches
|
||||||
|
} else {
|
||||||
|
parsed, err := syntax.Parse(v, syntax.Perl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Simplify the syntax tree to run faster.
|
||||||
|
parsed = parsed.Simplify()
|
||||||
|
m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if parsed.Op == syntax.OpConcat {
|
||||||
|
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
||||||
|
}
|
||||||
|
if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
|
||||||
|
m.setMatches = matches
|
||||||
|
}
|
||||||
|
m.stringMatcher = stringMatcherFromRegexp(parsed)
|
||||||
|
m.matchString = m.compileMatchStringFunction()
|
||||||
}
|
}
|
||||||
|
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compileMatchStringFunction returns the function to run by MatchString().
|
||||||
|
func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
|
||||||
|
// If the only optimization available is the string matcher, then we can just run it.
|
||||||
|
if len(m.setMatches) == 0 && m.prefix == "" && m.suffix == "" && m.contains == "" && m.stringMatcher != nil {
|
||||||
|
return m.stringMatcher.Matches
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(s string) bool {
|
||||||
|
if len(m.setMatches) != 0 {
|
||||||
|
for _, match := range m.setMatches {
|
||||||
|
if match == s {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.contains != "" && !strings.Contains(s, m.contains) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.stringMatcher != nil {
|
||||||
|
return m.stringMatcher.Matches(s)
|
||||||
|
}
|
||||||
|
return m.re.MatchString(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsOptimized returns true if any fast-path optimization is applied to the
|
||||||
|
// regex matcher.
|
||||||
|
func (m *FastRegexMatcher) IsOptimized() bool {
|
||||||
|
return len(m.setMatches) > 0 || m.stringMatcher != nil || m.prefix != "" || m.suffix != "" || m.contains != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// findSetMatches extract equality matches from a regexp.
|
||||||
|
// Returns nil if we can't replace the regexp by only equality matchers or the regexp contains
|
||||||
|
// a mix of case sensitive and case insensitive matchers.
|
||||||
|
func findSetMatches(re *syntax.Regexp) (matches []string, caseSensitive bool) {
|
||||||
|
clearBeginEndText(re)
|
||||||
|
|
||||||
|
return findSetMatchesInternal(re, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func findSetMatchesInternal(re *syntax.Regexp, base string) (matches []string, caseSensitive bool) {
|
||||||
|
switch re.Op {
|
||||||
|
case syntax.OpBeginText:
|
||||||
|
// Correctly handling the begin text operator inside a regex is tricky,
|
||||||
|
// so in this case we fallback to the regex engine.
|
||||||
|
return nil, false
|
||||||
|
case syntax.OpEndText:
|
||||||
|
// Correctly handling the end text operator inside a regex is tricky,
|
||||||
|
// so in this case we fallback to the regex engine.
|
||||||
|
return nil, false
|
||||||
|
case syntax.OpLiteral:
|
||||||
|
return []string{base + string(re.Rune)}, isCaseSensitive(re)
|
||||||
|
case syntax.OpEmptyMatch:
|
||||||
|
if base != "" {
|
||||||
|
return []string{base}, isCaseSensitive(re)
|
||||||
|
}
|
||||||
|
case syntax.OpAlternate:
|
||||||
|
return findSetMatchesFromAlternate(re, base)
|
||||||
|
case syntax.OpCapture:
|
||||||
|
clearCapture(re)
|
||||||
|
return findSetMatchesInternal(re, base)
|
||||||
|
case syntax.OpConcat:
|
||||||
|
return findSetMatchesFromConcat(re, base)
|
||||||
|
case syntax.OpCharClass:
|
||||||
|
if len(re.Rune)%2 != 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
var matches []string
|
||||||
|
var totalSet int
|
||||||
|
for i := 0; i+1 < len(re.Rune); i += 2 {
|
||||||
|
totalSet += int(re.Rune[i+1]-re.Rune[i]) + 1
|
||||||
|
}
|
||||||
|
// limits the total characters that can be used to create matches.
|
||||||
|
// In some case like negation [^0-9] a lot of possibilities exists and that
|
||||||
|
// can create thousands of possible matches at which points we're better off using regexp.
|
||||||
|
if totalSet > maxSetMatches {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
for i := 0; i+1 < len(re.Rune); i += 2 {
|
||||||
|
lo, hi := re.Rune[i], re.Rune[i+1]
|
||||||
|
for c := lo; c <= hi; c++ {
|
||||||
|
matches = append(matches, base+string(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matches, isCaseSensitive(re)
|
||||||
|
default:
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func findSetMatchesFromConcat(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) {
|
||||||
|
if len(re.Sub) == 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
clearCapture(re.Sub...)
|
||||||
|
|
||||||
|
matches = []string{base}
|
||||||
|
|
||||||
|
for i := 0; i < len(re.Sub); i++ {
|
||||||
|
var newMatches []string
|
||||||
|
for j, b := range matches {
|
||||||
|
m, caseSensitive := findSetMatchesInternal(re.Sub[i], b)
|
||||||
|
if m == nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
if tooManyMatches(newMatches, m...) {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// All matches must have the same case sensitivity. If it's the first set of matches
|
||||||
|
// returned, we store its sensitivity as the expected case, and then we'll check all
|
||||||
|
// other ones.
|
||||||
|
if i == 0 && j == 0 {
|
||||||
|
matchesCaseSensitive = caseSensitive
|
||||||
|
}
|
||||||
|
if matchesCaseSensitive != caseSensitive {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
newMatches = append(newMatches, m...)
|
||||||
|
}
|
||||||
|
matches = newMatches
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches, matchesCaseSensitive
|
||||||
|
}
|
||||||
|
|
||||||
|
func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) {
|
||||||
|
for i, sub := range re.Sub {
|
||||||
|
found, caseSensitive := findSetMatchesInternal(sub, base)
|
||||||
|
if found == nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
if tooManyMatches(matches, found...) {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// All matches must have the same case sensitivity. If it's the first set of matches
|
||||||
|
// returned, we store its sensitivity as the expected case, and then we'll check all
|
||||||
|
// other ones.
|
||||||
|
if i == 0 {
|
||||||
|
matchesCaseSensitive = caseSensitive
|
||||||
|
}
|
||||||
|
if matchesCaseSensitive != caseSensitive {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
matches = append(matches, found...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches, matchesCaseSensitive
|
||||||
|
}
|
||||||
|
|
||||||
|
// clearCapture removes capture operation as they are not used for matching.
|
||||||
|
func clearCapture(regs ...*syntax.Regexp) {
|
||||||
|
for _, r := range regs {
|
||||||
|
// Iterate on the regexp because capture groups could be nested.
|
||||||
|
for r.Op == syntax.OpCapture {
|
||||||
|
*r = *r.Sub[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string.
|
||||||
|
func clearBeginEndText(re *syntax.Regexp) {
|
||||||
|
// Do not clear begin/end text from an alternate operator because it could
|
||||||
|
// change the actual regexp properties.
|
||||||
|
if re.Op == syntax.OpAlternate {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(re.Sub) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(re.Sub) == 1 {
|
||||||
|
if re.Sub[0].Op == syntax.OpBeginText || re.Sub[0].Op == syntax.OpEndText {
|
||||||
|
// We need to remove this element. Since it's the only one, we convert into a matcher of an empty string.
|
||||||
|
// OpEmptyMatch is regexp's nop operator.
|
||||||
|
re.Op = syntax.OpEmptyMatch
|
||||||
|
re.Sub = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if re.Sub[0].Op == syntax.OpBeginText {
|
||||||
|
re.Sub = re.Sub[1:]
|
||||||
|
}
|
||||||
|
if re.Sub[len(re.Sub)-1].Op == syntax.OpEndText {
|
||||||
|
re.Sub = re.Sub[:len(re.Sub)-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// isCaseInsensitive tells if a regexp is case insensitive.
|
||||||
|
// The flag should be check at each level of the syntax tree.
|
||||||
|
func isCaseInsensitive(reg *syntax.Regexp) bool {
|
||||||
|
return (reg.Flags & syntax.FoldCase) != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// isCaseSensitive tells if a regexp is case sensitive.
|
||||||
|
// The flag should be check at each level of the syntax tree.
|
||||||
|
func isCaseSensitive(reg *syntax.Regexp) bool {
|
||||||
|
return !isCaseInsensitive(reg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tooManyMatches guards against creating too many set matches.
|
||||||
|
func tooManyMatches(matches []string, added ...string) bool {
|
||||||
|
return len(matches)+len(added) > maxSetMatches
|
||||||
|
}
|
||||||
|
|
||||||
func (m *FastRegexMatcher) MatchString(s string) bool {
|
func (m *FastRegexMatcher) MatchString(s string) bool {
|
||||||
if m.literal {
|
return m.matchString(s)
|
||||||
return s == m.value
|
}
|
||||||
}
|
|
||||||
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
func (m *FastRegexMatcher) SetMatches() []string {
|
||||||
return false
|
// IMPORTANT: always return a copy, otherwise if the caller manipulate this slice it will
|
||||||
}
|
// also get manipulated in the cached FastRegexMatcher instance.
|
||||||
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
return slices.Clone(m.setMatches)
|
||||||
return false
|
|
||||||
}
|
|
||||||
if m.contains != "" && !strings.Contains(s, m.contains) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return m.re.MatchString(s)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *FastRegexMatcher) GetRegexString() string {
|
func (m *FastRegexMatcher) GetRegexString() string {
|
||||||
if m.literal {
|
return m.reString
|
||||||
return m.value
|
|
||||||
}
|
|
||||||
return m.re.String()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isLiteral(re string) bool {
|
// optimizeAlternatingLiterals optimizes a regex of the form
|
||||||
return regexp.QuoteMeta(re) == re
|
//
|
||||||
|
// `literal1|literal2|literal3|...`
|
||||||
|
//
|
||||||
|
// this function returns an optimized StringMatcher or nil if the regex
|
||||||
|
// cannot be optimized in this way, and a list of setMatches up to maxSetMatches.
|
||||||
|
func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return emptyStringMatcher{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
estimatedAlternates := strings.Count(s, "|") + 1
|
||||||
|
|
||||||
|
// If there are no alternates, check if the string is a literal
|
||||||
|
if estimatedAlternates == 1 {
|
||||||
|
if regexp.QuoteMeta(s) == s {
|
||||||
|
return &equalStringMatcher{s: s, caseSensitive: true}, []string{s}
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
|
||||||
|
|
||||||
|
for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
|
||||||
|
// Split the string into the next literal and the remainder
|
||||||
|
subMatch := s[:end]
|
||||||
|
s = s[end+1:]
|
||||||
|
|
||||||
|
// break if any of the submatches are not literals
|
||||||
|
if regexp.QuoteMeta(subMatch) != subMatch {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
multiMatcher.add(subMatch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// break if the remainder is not a literal
|
||||||
|
if regexp.QuoteMeta(s) != s {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
multiMatcher.add(s)
|
||||||
|
|
||||||
|
return multiMatcher, multiMatcher.setMatches()
|
||||||
}
|
}
|
||||||
|
|
||||||
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||||
|
@ -123,3 +398,540 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StringMatcher is a matcher that matches a string in place of a regular expression.
|
||||||
|
type StringMatcher interface {
|
||||||
|
Matches(s string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// stringMatcherFromRegexp attempts to replace a common regexp with a string matcher.
|
||||||
|
// It returns nil if the regexp is not supported.
|
||||||
|
func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||||
|
clearBeginEndText(re)
|
||||||
|
|
||||||
|
m := stringMatcherFromRegexpInternal(re)
|
||||||
|
m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
|
||||||
|
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
|
||||||
|
clearCapture(re)
|
||||||
|
|
||||||
|
switch re.Op {
|
||||||
|
case syntax.OpBeginText:
|
||||||
|
// Correctly handling the begin text operator inside a regex is tricky,
|
||||||
|
// so in this case we fallback to the regex engine.
|
||||||
|
return nil
|
||||||
|
case syntax.OpEndText:
|
||||||
|
// Correctly handling the end text operator inside a regex is tricky,
|
||||||
|
// so in this case we fallback to the regex engine.
|
||||||
|
return nil
|
||||||
|
case syntax.OpPlus:
|
||||||
|
if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &anyNonEmptyStringMatcher{
|
||||||
|
matchNL: re.Sub[0].Op == syntax.OpAnyChar,
|
||||||
|
}
|
||||||
|
case syntax.OpStar:
|
||||||
|
if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the newline is valid, than this matcher literally match any string (even empty).
|
||||||
|
if re.Sub[0].Op == syntax.OpAnyChar {
|
||||||
|
return trueMatcher{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Any string is fine (including an empty one), as far as it doesn't contain any newline.
|
||||||
|
return anyStringWithoutNewlineMatcher{}
|
||||||
|
case syntax.OpQuest:
|
||||||
|
// Only optimize for ".?".
|
||||||
|
if len(re.Sub) != 1 || (re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &zeroOrOneCharacterStringMatcher{
|
||||||
|
matchNL: re.Sub[0].Op == syntax.OpAnyChar,
|
||||||
|
}
|
||||||
|
case syntax.OpEmptyMatch:
|
||||||
|
return emptyStringMatcher{}
|
||||||
|
|
||||||
|
case syntax.OpLiteral:
|
||||||
|
return &equalStringMatcher{
|
||||||
|
s: string(re.Rune),
|
||||||
|
caseSensitive: !isCaseInsensitive(re),
|
||||||
|
}
|
||||||
|
case syntax.OpAlternate:
|
||||||
|
or := make([]StringMatcher, 0, len(re.Sub))
|
||||||
|
for _, sub := range re.Sub {
|
||||||
|
m := stringMatcherFromRegexpInternal(sub)
|
||||||
|
if m == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
or = append(or, m)
|
||||||
|
}
|
||||||
|
return orStringMatcher(or)
|
||||||
|
case syntax.OpConcat:
|
||||||
|
clearCapture(re.Sub...)
|
||||||
|
|
||||||
|
if len(re.Sub) == 0 {
|
||||||
|
return emptyStringMatcher{}
|
||||||
|
}
|
||||||
|
if len(re.Sub) == 1 {
|
||||||
|
return stringMatcherFromRegexpInternal(re.Sub[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
var left, right StringMatcher
|
||||||
|
|
||||||
|
// Let's try to find if there's a first and last any matchers.
|
||||||
|
if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar || re.Sub[0].Op == syntax.OpQuest {
|
||||||
|
left = stringMatcherFromRegexpInternal(re.Sub[0])
|
||||||
|
if left == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
re.Sub = re.Sub[1:]
|
||||||
|
}
|
||||||
|
if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar || re.Sub[len(re.Sub)-1].Op == syntax.OpQuest {
|
||||||
|
right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1])
|
||||||
|
if right == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
re.Sub = re.Sub[:len(re.Sub)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
matches, matchesCaseSensitive := findSetMatchesInternal(re, "")
|
||||||
|
|
||||||
|
if len(matches) == 0 && len(re.Sub) == 2 {
|
||||||
|
// We have not find fixed set matches. We look for other known cases that
|
||||||
|
// we can optimize.
|
||||||
|
switch {
|
||||||
|
// Prefix is literal.
|
||||||
|
case right == nil && re.Sub[0].Op == syntax.OpLiteral:
|
||||||
|
right = stringMatcherFromRegexpInternal(re.Sub[1])
|
||||||
|
if right != nil {
|
||||||
|
matches = []string{string(re.Sub[0].Rune)}
|
||||||
|
matchesCaseSensitive = !isCaseInsensitive(re.Sub[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Suffix is literal.
|
||||||
|
case left == nil && re.Sub[1].Op == syntax.OpLiteral:
|
||||||
|
left = stringMatcherFromRegexpInternal(re.Sub[0])
|
||||||
|
if left != nil {
|
||||||
|
matches = []string{string(re.Sub[1].Rune)}
|
||||||
|
matchesCaseSensitive = !isCaseInsensitive(re.Sub[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we've found some literals to match (optionally with a left and/or right matcher).
|
||||||
|
// If not, then this optimization doesn't trigger.
|
||||||
|
if len(matches) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the right (and best) matcher based on what we've found.
|
||||||
|
switch {
|
||||||
|
// No left and right matchers (only fixed set matches).
|
||||||
|
case left == nil && right == nil:
|
||||||
|
// if there's no any matchers on both side it's a concat of literals
|
||||||
|
or := make([]StringMatcher, 0, len(matches))
|
||||||
|
for _, match := range matches {
|
||||||
|
or = append(or, &equalStringMatcher{
|
||||||
|
s: match,
|
||||||
|
caseSensitive: matchesCaseSensitive,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return orStringMatcher(or)
|
||||||
|
|
||||||
|
// Right matcher with 1 fixed set match.
|
||||||
|
case left == nil && len(matches) == 1:
|
||||||
|
return &literalPrefixStringMatcher{
|
||||||
|
prefix: matches[0],
|
||||||
|
prefixCaseSensitive: matchesCaseSensitive,
|
||||||
|
right: right,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Left matcher with 1 fixed set match.
|
||||||
|
case right == nil && len(matches) == 1:
|
||||||
|
return &literalSuffixStringMatcher{
|
||||||
|
left: left,
|
||||||
|
suffix: matches[0],
|
||||||
|
suffixCaseSensitive: matchesCaseSensitive,
|
||||||
|
}
|
||||||
|
|
||||||
|
// We found literals in the middle. We can trigger the fast path only if
|
||||||
|
// the matches are case sensitive because containsStringMatcher doesn't
|
||||||
|
// support case insensitive.
|
||||||
|
case matchesCaseSensitive:
|
||||||
|
return &containsStringMatcher{
|
||||||
|
substrings: matches,
|
||||||
|
left: left,
|
||||||
|
right: right,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// containsStringMatcher matches a string if it contains any of the substrings.
|
||||||
|
// If left and right are not nil, it's a contains operation where left and right must match.
|
||||||
|
// If left is nil, it's a hasPrefix operation and right must match.
|
||||||
|
// Finally, if right is nil it's a hasSuffix operation and left must match.
|
||||||
|
type containsStringMatcher struct {
|
||||||
|
// The matcher that must match the left side. Can be nil.
|
||||||
|
left StringMatcher
|
||||||
|
|
||||||
|
// At least one of these strings must match in the "middle", between left and right matchers.
|
||||||
|
substrings []string
|
||||||
|
|
||||||
|
// The matcher that must match the right side. Can be nil.
|
||||||
|
right StringMatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *containsStringMatcher) Matches(s string) bool {
|
||||||
|
for _, substr := range m.substrings {
|
||||||
|
switch {
|
||||||
|
case m.right != nil && m.left != nil:
|
||||||
|
searchStartPos := 0
|
||||||
|
|
||||||
|
for {
|
||||||
|
pos := strings.Index(s[searchStartPos:], substr)
|
||||||
|
if pos < 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we started searching from searchStartPos, we have to add that offset
|
||||||
|
// to get the actual position of the substring inside the text.
|
||||||
|
pos += searchStartPos
|
||||||
|
|
||||||
|
// If both the left and right matchers match, then we can stop searching because
|
||||||
|
// we've found a match.
|
||||||
|
if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue searching for another occurrence of the substring inside the text.
|
||||||
|
searchStartPos = pos + 1
|
||||||
|
}
|
||||||
|
case m.left != nil:
|
||||||
|
// If we have to check for characters on the left then we need to match a suffix.
|
||||||
|
if strings.HasSuffix(s, substr) && m.left.Matches(s[:len(s)-len(substr)]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
case m.right != nil:
|
||||||
|
if strings.HasPrefix(s, substr) && m.right.Matches(s[len(substr):]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// literalPrefixStringMatcher matches a string with the given literal prefix and right side matcher.
|
||||||
|
type literalPrefixStringMatcher struct {
|
||||||
|
prefix string
|
||||||
|
prefixCaseSensitive bool
|
||||||
|
|
||||||
|
// The matcher that must match the right side. Can be nil.
|
||||||
|
right StringMatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *literalPrefixStringMatcher) Matches(s string) bool {
|
||||||
|
// Ensure the prefix matches.
|
||||||
|
if m.prefixCaseSensitive && !strings.HasPrefix(s, m.prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !m.prefixCaseSensitive && !hasPrefixCaseInsensitive(s, m.prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the right side matches.
|
||||||
|
return m.right.Matches(s[len(m.prefix):])
|
||||||
|
}
|
||||||
|
|
||||||
|
// literalSuffixStringMatcher matches a string with the given literal suffix and left side matcher.
|
||||||
|
type literalSuffixStringMatcher struct {
|
||||||
|
// The matcher that must match the left side. Can be nil.
|
||||||
|
left StringMatcher
|
||||||
|
|
||||||
|
suffix string
|
||||||
|
suffixCaseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *literalSuffixStringMatcher) Matches(s string) bool {
|
||||||
|
// Ensure the suffix matches.
|
||||||
|
if m.suffixCaseSensitive && !strings.HasSuffix(s, m.suffix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !m.suffixCaseSensitive && !hasSuffixCaseInsensitive(s, m.suffix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the left side matches.
|
||||||
|
return m.left.Matches(s[:len(s)-len(m.suffix)])
|
||||||
|
}
|
||||||
|
|
||||||
|
// emptyStringMatcher matches an empty string.
|
||||||
|
type emptyStringMatcher struct{}
|
||||||
|
|
||||||
|
func (m emptyStringMatcher) Matches(s string) bool {
|
||||||
|
return len(s) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// orStringMatcher matches any of the sub-matchers.
|
||||||
|
type orStringMatcher []StringMatcher
|
||||||
|
|
||||||
|
func (m orStringMatcher) Matches(s string) bool {
|
||||||
|
for _, matcher := range m {
|
||||||
|
if matcher.Matches(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalStringMatcher matches a string exactly and support case insensitive.
|
||||||
|
type equalStringMatcher struct {
|
||||||
|
s string
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalStringMatcher) Matches(s string) bool {
|
||||||
|
if m.caseSensitive {
|
||||||
|
return m.s == s
|
||||||
|
}
|
||||||
|
return strings.EqualFold(m.s, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type multiStringMatcherBuilder interface {
|
||||||
|
StringMatcher
|
||||||
|
add(s string)
|
||||||
|
setMatches() []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
|
||||||
|
// If the estimated size is low enough, it's faster to use a slice instead of a map.
|
||||||
|
if estimatedSize < minEqualMultiStringMatcherMapThreshold {
|
||||||
|
return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &equalMultiStringMapMatcher{
|
||||||
|
values: make(map[string]struct{}, estimatedSize),
|
||||||
|
caseSensitive: caseSensitive,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalMultiStringSliceMatcher matches a string exactly against a slice of valid values.
|
||||||
|
type equalMultiStringSliceMatcher struct {
|
||||||
|
values []string
|
||||||
|
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) add(s string) {
|
||||||
|
m.values = append(m.values, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) setMatches() []string {
|
||||||
|
return m.values
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
|
||||||
|
if m.caseSensitive {
|
||||||
|
for _, v := range m.values {
|
||||||
|
if s == v {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, v := range m.values {
|
||||||
|
if strings.EqualFold(s, v) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalMultiStringMapMatcher matches a string exactly against a map of valid values.
|
||||||
|
type equalMultiStringMapMatcher struct {
|
||||||
|
// values contains values to match a string against. If the matching is case insensitive,
|
||||||
|
// the values here must be lowercase.
|
||||||
|
values map[string]struct{}
|
||||||
|
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) add(s string) {
|
||||||
|
if !m.caseSensitive {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.values[s] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) setMatches() []string {
|
||||||
|
if len(m.values) >= maxSetMatches {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := make([]string, 0, len(m.values))
|
||||||
|
for s := range m.values {
|
||||||
|
matches = append(matches, s)
|
||||||
|
}
|
||||||
|
return matches
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) Matches(s string) bool {
|
||||||
|
if !m.caseSensitive {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, ok := m.values[s]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// anyStringWithoutNewlineMatcher is a stringMatcher which matches any string
|
||||||
|
// (including an empty one) as far as it doesn't contain any newline character.
|
||||||
|
type anyStringWithoutNewlineMatcher struct{}
|
||||||
|
|
||||||
|
func (m anyStringWithoutNewlineMatcher) Matches(s string) bool {
|
||||||
|
// We need to make sure it doesn't contain a newline. Since the newline is
|
||||||
|
// an ASCII character, we can use strings.IndexByte().
|
||||||
|
return strings.IndexByte(s, '\n') == -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// anyNonEmptyStringMatcher is a stringMatcher which matches any non-empty string.
|
||||||
|
type anyNonEmptyStringMatcher struct {
|
||||||
|
matchNL bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *anyNonEmptyStringMatcher) Matches(s string) bool {
|
||||||
|
if m.matchNL {
|
||||||
|
// It's OK if the string contains a newline so we just need to make
|
||||||
|
// sure it's non-empty.
|
||||||
|
return len(s) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to make sure it non-empty and doesn't contain a newline.
|
||||||
|
// Since the newline is an ASCII character, we can use strings.IndexByte().
|
||||||
|
return len(s) > 0 && strings.IndexByte(s, '\n') == -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// zeroOrOneCharacterStringMatcher is a StringMatcher which matches zero or one occurrence
|
||||||
|
// of any character. The newline character is matches only if matchNL is set to true.
|
||||||
|
type zeroOrOneCharacterStringMatcher struct {
|
||||||
|
matchNL bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
|
||||||
|
// Zero or one.
|
||||||
|
if len(s) > 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// No need to check for the newline if the string is empty or matching a newline is OK.
|
||||||
|
if m.matchNL || len(s) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return s[0] != '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
// trueMatcher is a stringMatcher which matches any string (always returns true).
|
||||||
|
type trueMatcher struct{}
|
||||||
|
|
||||||
|
func (m trueMatcher) Matches(_ string) bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
|
||||||
|
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
|
||||||
|
// this specific case, when we have many strings to match against we can use a map instead
|
||||||
|
// of iterating over the list of strings.
|
||||||
|
func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
|
||||||
|
var (
|
||||||
|
caseSensitive bool
|
||||||
|
caseSensitiveSet bool
|
||||||
|
numValues int
|
||||||
|
)
|
||||||
|
|
||||||
|
// Analyse the input StringMatcher to count the number of occurrences
|
||||||
|
// and ensure all of them have the same case sensitivity.
|
||||||
|
analyseCallback := func(matcher *equalStringMatcher) bool {
|
||||||
|
// Ensure we don't have mixed case sensitivity.
|
||||||
|
if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
|
||||||
|
return false
|
||||||
|
} else if !caseSensitiveSet {
|
||||||
|
caseSensitive = matcher.caseSensitive
|
||||||
|
caseSensitiveSet = true
|
||||||
|
}
|
||||||
|
|
||||||
|
numValues++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if !findEqualStringMatchers(input, analyseCallback) {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the number of values found is less than the threshold, then we should skip the optimization.
|
||||||
|
if numValues < threshold {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse again the input StringMatcher to extract all values and storing them.
|
||||||
|
// We can skip the case sensitivity check because we've already checked it and
|
||||||
|
// if the code reach this point then it means all matchers have the same case sensitivity.
|
||||||
|
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
|
||||||
|
|
||||||
|
// Ignore the return value because we already iterated over the input StringMatcher
|
||||||
|
// and it was all good.
|
||||||
|
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
||||||
|
multiMatcher.add(matcher.s)
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
return multiMatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
|
||||||
|
// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
|
||||||
|
// composed by an alternation of equalStringMatcher.
|
||||||
|
func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
|
||||||
|
orInput, ok := input.(orStringMatcher)
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, m := range orInput {
|
||||||
|
switch casted := m.(type) {
|
||||||
|
case orStringMatcher:
|
||||||
|
if !findEqualStringMatchers(m, callback) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
case *equalStringMatcher:
|
||||||
|
if !callback(casted) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
// It's not an equal string matcher, so we have to stop searching
|
||||||
|
// cause this optimization can't be applied.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasPrefixCaseInsensitive(s, prefix string) bool {
|
||||||
|
return len(s) >= len(prefix) && strings.EqualFold(s[0:len(prefix)], prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasSuffixCaseInsensitive(s, suffix string) bool {
|
||||||
|
return len(s) >= len(suffix) && strings.EqualFold(s[len(s)-len(suffix):], suffix)
|
||||||
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -3706,7 +3706,31 @@ func TestParseExpressions(t *testing.T) {
|
||||||
|
|
||||||
if !test.fail {
|
if !test.fail {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, test.expected, expr, "error on input '%s'", test.input)
|
expected := test.expected
|
||||||
|
|
||||||
|
// The FastRegexMatcher is not comparable with a deep equal, so only compare its String() version.
|
||||||
|
if actualVector, ok := expr.(*VectorSelector); ok {
|
||||||
|
require.IsType(t, &VectorSelector{}, test.expected, "error on input '%s'", test.input)
|
||||||
|
expectedVector := test.expected.(*VectorSelector)
|
||||||
|
|
||||||
|
require.Len(t, actualVector.LabelMatchers, len(expectedVector.LabelMatchers), "error on input '%s'", test.input)
|
||||||
|
|
||||||
|
for i := 0; i < len(actualVector.LabelMatchers); i++ {
|
||||||
|
expectedMatcher := expectedVector.LabelMatchers[i].String()
|
||||||
|
actualMatcher := actualVector.LabelMatchers[i].String()
|
||||||
|
|
||||||
|
require.Equal(t, expectedMatcher, actualMatcher, "unexpected label matcher '%s' on input '%s'", actualMatcher, test.input)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make a shallow copy of the expected expr (because the test cases are defined in a global variable)
|
||||||
|
// and then reset the LabelMatcher to not compared them with the following deep equal.
|
||||||
|
expectedCopy := *expectedVector
|
||||||
|
expectedCopy.LabelMatchers = nil
|
||||||
|
expected = &expectedCopy
|
||||||
|
actualVector.LabelMatchers = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Equal(t, expected, expr, "error on input '%s'", test.input)
|
||||||
} else {
|
} else {
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error())
|
require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error())
|
||||||
|
|
|
@ -19,8 +19,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/oklog/ulid"
|
"github.com/oklog/ulid"
|
||||||
|
|
||||||
|
@ -35,20 +33,6 @@ import (
|
||||||
"github.com/prometheus/prometheus/util/annotations"
|
"github.com/prometheus/prometheus/util/annotations"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
|
|
||||||
var regexMetaCharacterBytes [16]byte
|
|
||||||
|
|
||||||
// isRegexMetaCharacter reports whether byte b needs to be escaped.
|
|
||||||
func isRegexMetaCharacter(b byte) bool {
|
|
||||||
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
for _, b := range []byte(`.+*?()|[]{}^$`) {
|
|
||||||
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type blockBaseQuerier struct {
|
type blockBaseQuerier struct {
|
||||||
blockID ulid.ULID
|
blockID ulid.ULID
|
||||||
index IndexReader
|
index IndexReader
|
||||||
|
@ -195,55 +179,6 @@ func (q *blockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *
|
||||||
return NewBlockChunkSeriesSet(q.blockID, q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming)
|
return NewBlockChunkSeriesSet(q.blockID, q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming)
|
||||||
}
|
}
|
||||||
|
|
||||||
func findSetMatches(pattern string) []string {
|
|
||||||
// Return empty matches if the wrapper from Prometheus is missing.
|
|
||||||
if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
escaped := false
|
|
||||||
sets := []*strings.Builder{{}}
|
|
||||||
init := 4
|
|
||||||
end := len(pattern) - 2
|
|
||||||
// If the regex is wrapped in a group we can remove the first and last parentheses
|
|
||||||
if pattern[init] == '(' && pattern[end-1] == ')' {
|
|
||||||
init++
|
|
||||||
end--
|
|
||||||
}
|
|
||||||
for i := init; i < end; i++ {
|
|
||||||
if escaped {
|
|
||||||
switch {
|
|
||||||
case isRegexMetaCharacter(pattern[i]):
|
|
||||||
sets[len(sets)-1].WriteByte(pattern[i])
|
|
||||||
case pattern[i] == '\\':
|
|
||||||
sets[len(sets)-1].WriteByte('\\')
|
|
||||||
default:
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
escaped = false
|
|
||||||
} else {
|
|
||||||
switch {
|
|
||||||
case isRegexMetaCharacter(pattern[i]):
|
|
||||||
if pattern[i] == '|' {
|
|
||||||
sets = append(sets, &strings.Builder{})
|
|
||||||
} else {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
case pattern[i] == '\\':
|
|
||||||
escaped = true
|
|
||||||
default:
|
|
||||||
sets[len(sets)-1].WriteByte(pattern[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
matches := make([]string, 0, len(sets))
|
|
||||||
for _, s := range sets {
|
|
||||||
if s.Len() > 0 {
|
|
||||||
matches = append(matches, s.String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return matches
|
|
||||||
}
|
|
||||||
|
|
||||||
// PostingsForMatchers assembles a single postings iterator against the index reader
|
// PostingsForMatchers assembles a single postings iterator against the index reader
|
||||||
// based on the given matchers. The resulting postings are not ordered by series.
|
// based on the given matchers. The resulting postings are not ordered by series.
|
||||||
func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
|
func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
|
||||||
|
@ -385,7 +320,7 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher)
|
||||||
|
|
||||||
// Fast-path for set matching.
|
// Fast-path for set matching.
|
||||||
if m.Type == labels.MatchRegexp {
|
if m.Type == labels.MatchRegexp {
|
||||||
setMatches := findSetMatches(m.GetRegexString())
|
setMatches := m.SetMatches()
|
||||||
if len(setMatches) > 0 {
|
if len(setMatches) > 0 {
|
||||||
return ix.Postings(ctx, m.Name, setMatches...)
|
return ix.Postings(ctx, m.Name, setMatches...)
|
||||||
}
|
}
|
||||||
|
@ -416,7 +351,7 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma
|
||||||
// Inverse of a MatchNotRegexp is MatchRegexp (double negation).
|
// Inverse of a MatchNotRegexp is MatchRegexp (double negation).
|
||||||
// Fast-path for set matching.
|
// Fast-path for set matching.
|
||||||
if m.Type == labels.MatchNotRegexp {
|
if m.Type == labels.MatchNotRegexp {
|
||||||
setMatches := findSetMatches(m.GetRegexString())
|
setMatches := m.SetMatches()
|
||||||
if len(setMatches) > 0 {
|
if len(setMatches) > 0 {
|
||||||
return ix.Postings(ctx, m.Name, setMatches...)
|
return ix.Postings(ctx, m.Name, setMatches...)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2658,54 +2658,6 @@ func BenchmarkSetMatcher(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
|
||||||
func TestFindSetMatches(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
pattern string
|
|
||||||
exp []string
|
|
||||||
}{
|
|
||||||
// Single value, coming from a `bar=~"foo"` selector.
|
|
||||||
{
|
|
||||||
pattern: "^(?:foo)$",
|
|
||||||
exp: []string{
|
|
||||||
"foo",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Simple sets.
|
|
||||||
{
|
|
||||||
pattern: "^(?:foo|bar|baz)$",
|
|
||||||
exp: []string{
|
|
||||||
"foo",
|
|
||||||
"bar",
|
|
||||||
"baz",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Simple sets containing escaped characters.
|
|
||||||
{
|
|
||||||
pattern: "^(?:fo\\.o|bar\\?|\\^baz)$",
|
|
||||||
exp: []string{
|
|
||||||
"fo.o",
|
|
||||||
"bar?",
|
|
||||||
"^baz",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
// Simple sets containing special characters without escaping.
|
|
||||||
{
|
|
||||||
pattern: "^(?:fo.o|bar?|^baz)$",
|
|
||||||
exp: nil,
|
|
||||||
},
|
|
||||||
// Missing wrapper.
|
|
||||||
{
|
|
||||||
pattern: "foo|bar|baz",
|
|
||||||
exp: nil,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range cases {
|
|
||||||
require.Equal(t, c.exp, findSetMatches(c.pattern), "Evaluating %s, unexpected result.", c.pattern)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestPostingsForMatchers(t *testing.T) {
|
func TestPostingsForMatchers(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
|
@ -3310,7 +3262,7 @@ func TestPostingsForMatcher(t *testing.T) {
|
||||||
{
|
{
|
||||||
// Test case for double quoted regex matcher
|
// Test case for double quoted regex matcher
|
||||||
matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", "^(?:a|b)$"),
|
matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", "^(?:a|b)$"),
|
||||||
hasError: true,
|
hasError: false,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue