mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Optimize long alternate lists (#463)
* Use a prefix trie for long alternate lists * Add test for non terminal node * Fix panic in FuzzFastRegexMatcher_WithFuzzyRegularExpressions when the fuzzy regex is invalid Signed-off-by: Marco Pracucci <marco@pracucci.com> * Address PR feedback * Update model/labels/regexp_test.go Co-authored-by: Marco Pracucci <marco@pracucci.com> * Replace trie with slice or map depending on input size * Fix tests * Pull in tests from @pracucci's branch * Add setMatches back in * Use stringMatcher when it's faster * Fix linter * Estimate alternates ahead of time * Simplify construction with `IndexByte` * Add test and early return for empty regexp. * Fix race conditions in tests --------- Signed-off-by: Marco Pracucci <marco@pracucci.com> Co-authored-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
parent
9fe1bd2d63
commit
ae170f644c
|
@ -126,7 +126,7 @@ func (m *Matcher) SetMatches() []string {
|
||||||
if m.re == nil {
|
if m.re == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return m.re.setMatches
|
return m.re.SetMatches()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prefix returns the required prefix of the value to match, if possible.
|
// Prefix returns the required prefix of the value to match, if possible.
|
||||||
|
|
|
@ -25,9 +25,10 @@ const (
|
||||||
maxSetMatches = 256
|
maxSetMatches = 256
|
||||||
|
|
||||||
// The minimum number of alternate values a regex should have to trigger
|
// The minimum number of alternate values a regex should have to trigger
|
||||||
// the optimization done by optimizeEqualStringMatchers(). This value has
|
// the optimization done by optimizeEqualStringMatchers() and so use a map
|
||||||
|
// to match values instead of iterating over a list. This value has
|
||||||
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
||||||
optimizeEqualStringMatchersThreshold = 16
|
minEqualMultiStringMatcherMapThreshold = 16
|
||||||
)
|
)
|
||||||
|
|
||||||
var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher]
|
var fastRegexMatcherCache *lru.Cache[string, *FastRegexMatcher]
|
||||||
|
@ -39,6 +40,9 @@ func init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
type FastRegexMatcher struct {
|
type FastRegexMatcher struct {
|
||||||
|
// Under some conditions, re is nil because the expression is never parsed.
|
||||||
|
// We store the original string to be able to return it in GetRegexString().
|
||||||
|
reString string
|
||||||
re *regexp.Regexp
|
re *regexp.Regexp
|
||||||
|
|
||||||
setMatches []string
|
setMatches []string
|
||||||
|
@ -70,19 +74,28 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) {
|
func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) {
|
||||||
|
m := &FastRegexMatcher{
|
||||||
|
reString: v,
|
||||||
|
}
|
||||||
|
|
||||||
|
m.stringMatcher, m.setMatches = optimizeAlternatingLiterals(v)
|
||||||
|
if m.stringMatcher != nil {
|
||||||
|
// If we already have a string matcher, we don't need to parse the regex
|
||||||
|
// or compile the matchString function. This also avoids the behavior in
|
||||||
|
// compileMatchStringFunction where it prefers to use setMatches when
|
||||||
|
// available, even if the string matcher is faster.
|
||||||
|
m.matchString = m.stringMatcher.Matches
|
||||||
|
} else {
|
||||||
parsed, err := syntax.Parse(v, syntax.Perl)
|
parsed, err := syntax.Parse(v, syntax.Perl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// Simplify the syntax tree to run faster.
|
// Simplify the syntax tree to run faster.
|
||||||
parsed = parsed.Simplify()
|
parsed = parsed.Simplify()
|
||||||
re, err := regexp.Compile("^(?:" + parsed.String() + ")$")
|
m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
m := &FastRegexMatcher{
|
|
||||||
re: re,
|
|
||||||
}
|
|
||||||
if parsed.Op == syntax.OpConcat {
|
if parsed.Op == syntax.OpConcat {
|
||||||
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
||||||
}
|
}
|
||||||
|
@ -90,8 +103,9 @@ func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) {
|
||||||
m.setMatches = matches
|
m.setMatches = matches
|
||||||
}
|
}
|
||||||
m.stringMatcher = stringMatcherFromRegexp(parsed)
|
m.stringMatcher = stringMatcherFromRegexp(parsed)
|
||||||
|
|
||||||
m.matchString = m.compileMatchStringFunction()
|
m.matchString = m.compileMatchStringFunction()
|
||||||
|
}
|
||||||
|
|
||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,7 +335,52 @@ func (m *FastRegexMatcher) SetMatches() []string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *FastRegexMatcher) GetRegexString() string {
|
func (m *FastRegexMatcher) GetRegexString() string {
|
||||||
return m.re.String()
|
return m.reString
|
||||||
|
}
|
||||||
|
|
||||||
|
// optimizeAlternatingLiterals optimizes a regex of the form
|
||||||
|
//
|
||||||
|
// `literal1|literal2|literal3|...`
|
||||||
|
//
|
||||||
|
// this function returns an optimized StringMatcher or nil if the regex
|
||||||
|
// cannot be optimized in this way, and a list of setMatches up to maxSetMatches
|
||||||
|
func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return emptyStringMatcher{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
estimatedAlternates := strings.Count(s, "|") + 1
|
||||||
|
|
||||||
|
// If there are no alternates, check if the string is a literal
|
||||||
|
if estimatedAlternates == 1 {
|
||||||
|
if regexp.QuoteMeta(s) == s {
|
||||||
|
return &equalStringMatcher{s: s, caseSensitive: true}, nil
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
|
||||||
|
|
||||||
|
for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
|
||||||
|
// Split the string into the next literal and the remainder
|
||||||
|
subMatch := s[:end]
|
||||||
|
s = s[end+1:]
|
||||||
|
|
||||||
|
// break if any of the submatches are not literals
|
||||||
|
if regexp.QuoteMeta(subMatch) != subMatch {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
multiMatcher.add(subMatch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// break if the remainder is not a literal
|
||||||
|
if regexp.QuoteMeta(s) != s {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
multiMatcher.add(s)
|
||||||
|
|
||||||
|
return multiMatcher, multiMatcher.setMatches()
|
||||||
}
|
}
|
||||||
|
|
||||||
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||||
|
@ -377,7 +436,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||||
clearBeginEndText(re)
|
clearBeginEndText(re)
|
||||||
|
|
||||||
m := stringMatcherFromRegexpInternal(re)
|
m := stringMatcherFromRegexpInternal(re)
|
||||||
m = optimizeEqualStringMatchers(m, optimizeEqualStringMatchersThreshold)
|
m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
@ -567,16 +626,86 @@ func (m *equalStringMatcher) Matches(s string) bool {
|
||||||
return strings.EqualFold(m.s, s)
|
return strings.EqualFold(m.s, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// equalMultiStringMatcher matches a string exactly against a set of valid values.
|
type multiStringMatcherBuilder interface {
|
||||||
type equalMultiStringMatcher struct {
|
StringMatcher
|
||||||
// values to match a string against. If the matching is case insensitive,
|
add(s string)
|
||||||
|
setMatches() []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
|
||||||
|
// If the estimated size is low enough, it's faster to use a slice instead of a map.
|
||||||
|
if estimatedSize < minEqualMultiStringMatcherMapThreshold {
|
||||||
|
return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &equalMultiStringMapMatcher{
|
||||||
|
values: make(map[string]struct{}, estimatedSize),
|
||||||
|
caseSensitive: caseSensitive,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalMultiStringSliceMatcher matches a string exactly against a slice of valid values.
|
||||||
|
type equalMultiStringSliceMatcher struct {
|
||||||
|
values []string
|
||||||
|
|
||||||
|
caseSensitive bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) add(s string) {
|
||||||
|
m.values = append(m.values, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) setMatches() []string {
|
||||||
|
return m.values
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
|
||||||
|
if m.caseSensitive {
|
||||||
|
for _, v := range m.values {
|
||||||
|
if s == v {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, v := range m.values {
|
||||||
|
if strings.EqualFold(s, v) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// equalMultiStringMapMatcher matches a string exactly against a map of valid values.
|
||||||
|
type equalMultiStringMapMatcher struct {
|
||||||
|
// values contains values to match a string against. If the matching is case insensitive,
|
||||||
// the values here must be lowercase.
|
// the values here must be lowercase.
|
||||||
values map[string]struct{}
|
values map[string]struct{}
|
||||||
|
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *equalMultiStringMatcher) Matches(s string) bool {
|
func (m *equalMultiStringMapMatcher) add(s string) {
|
||||||
|
if !m.caseSensitive {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.values[s] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) setMatches() []string {
|
||||||
|
if len(m.values) >= maxSetMatches {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := make([]string, 0, len(m.values))
|
||||||
|
for s := range m.values {
|
||||||
|
matches = append(matches, s)
|
||||||
|
}
|
||||||
|
return matches
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) Matches(s string) bool {
|
||||||
if !m.caseSensitive {
|
if !m.caseSensitive {
|
||||||
s = strings.ToLower(s)
|
s = strings.ToLower(s)
|
||||||
}
|
}
|
||||||
|
@ -657,24 +786,16 @@ func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatch
|
||||||
// Parse again the input StringMatcher to extract all values and storing them.
|
// Parse again the input StringMatcher to extract all values and storing them.
|
||||||
// We can skip the case sensitivity check because we've already checked it and
|
// We can skip the case sensitivity check because we've already checked it and
|
||||||
// if the code reach this point then it means all matchers have the same case sensitivity.
|
// if the code reach this point then it means all matchers have the same case sensitivity.
|
||||||
values := make(map[string]struct{}, numValues)
|
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
|
||||||
|
|
||||||
// Ignore the return value because we already iterated over the input StringMatcher
|
// Ignore the return value because we already iterated over the input StringMatcher
|
||||||
// and it was all good.
|
// and it was all good.
|
||||||
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
||||||
if caseSensitive {
|
multiMatcher.add(matcher.s)
|
||||||
values[matcher.s] = struct{}{}
|
|
||||||
} else {
|
|
||||||
values[strings.ToLower(matcher.s)] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
return &equalMultiStringMatcher{
|
return multiMatcher
|
||||||
values: values,
|
|
||||||
caseSensitive: caseSensitive,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
|
// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue