Merge pull request #13461 from pracucci/upstream-fastregexmatcher

Further optimise FastRegexMatcher
2025-03-05 20:59:13 -08:00 · 2024-03-25 13:49:29 +01:00 · 2024-03-25 13:49:29 +01:00 · 5540d34d94
parent 773170f372 48786ad4e8
commit 5540d34d94
8 changed files with 1991 additions and 230 deletions
--- a/go.mod
+++ b/go.mod
@ -74,7 +74,6 @@ require (
 	go.uber.org/automaxprocs v1.5.3
 	go.uber.org/goleak v1.3.0
 	go.uber.org/multierr v1.11.0
 	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
 	golang.org/x/net v0.22.0
 	golang.org/x/oauth2 v0.18.0
 	golang.org/x/sync v0.6.0
@ -186,6 +185,7 @@ require (
 	go.opentelemetry.io/otel/metric v1.24.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.1.0 // indirect
 	golang.org/x/crypto v0.21.0 // indirect
 	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
 	golang.org/x/mod v0.16.0 // indirect
 	golang.org/x/term v0.18.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
--- a/model/labels/matcher.go
+++ b/model/labels/matcher.go
@ -118,3 +118,30 @@ func (m *Matcher) GetRegexString() string {
 	}
 	return m.re.GetRegexString()
 }
 // SetMatches returns a set of equality matchers for the current regex matchers if possible.
 // For examples the regexp `a(b|f)` will returns "ab" and "af".
 // Returns nil if we can't replace the regexp by only equality matchers.
 func (m *Matcher) SetMatches() []string {
 	if m.re == nil {
 		return nil
 	}
 	return m.re.SetMatches()
 }
 // Prefix returns the required prefix of the value to match, if possible.
 // It will be empty if it's an equality matcher or if the prefix can't be determined.
 func (m *Matcher) Prefix() string {
 	if m.re == nil {
 		return ""
 	}
 	return m.re.prefix
 }
 // IsRegexOptimized returns whether regex is optimized.
 func (m *Matcher) IsRegexOptimized() bool {
 	if m.re == nil {
 		return false
 	}
 	return m.re.IsOptimized()
 }
--- a/model/labels/matcher_test.go
+++ b/model/labels/matcher_test.go
@ -14,13 +14,14 @@
 package labels
 import (
 	"fmt"
 	"testing"
 	"github.com/stretchr/testify/require"
 )
 func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher {
-	m, err := NewMatcher(mType, "", value)
+	m, err := NewMatcher(mType, "test_label_name", value)
 	require.NoError(t, err)
 	return m
 }
@ -81,6 +82,21 @@ func TestMatcher(t *testing.T) {
 			value:   "foo-bar",
 			match:   false,
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "$*bar"),
 			value:   "foo-bar",
 			match:   false,
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "bar^+"),
 			value:   "foo-bar",
 			match:   false,
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "$+bar"),
 			value:   "foo-bar",
 			match:   false,
 		},
 	}
 	for _, test := range tests {
@ -118,6 +134,82 @@ func TestInverse(t *testing.T) {
 	}
 }
 func TestPrefix(t *testing.T) {
 	for i, tc := range []struct {
 		matcher *Matcher
 		prefix  string
 	}{
 		{
 			matcher: mustNewMatcher(t, MatchEqual, "abc"),
 			prefix:  "",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchNotEqual, "abc"),
 			prefix:  "",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "abc.+"),
 			prefix:  "abc",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "abcd|abc.+"),
 			prefix:  "abc",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchNotRegexp, "abcd|abc.+"),
 			prefix:  "abc",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "abc(def|ghj)|ab|a."),
 			prefix:  "a",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "foo.+bar|foo.*baz"),
 			prefix:  "foo",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "abc|.*"),
 			prefix:  "",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, "abc|def"),
 			prefix:  "",
 		},
 		{
 			matcher: mustNewMatcher(t, MatchRegexp, ".+def"),
 			prefix:  "",
 		},
 	} {
 		t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
 			require.Equal(t, tc.prefix, tc.matcher.Prefix())
 		})
 	}
 }
 func TestIsRegexOptimized(t *testing.T) {
 	for i, tc := range []struct {
 		matcher          *Matcher
 		isRegexOptimized bool
 	}{
 		{
 			matcher:          mustNewMatcher(t, MatchEqual, "abc"),
 			isRegexOptimized: false,
 		},
 		{
 			matcher:          mustNewMatcher(t, MatchRegexp, "."),
 			isRegexOptimized: false,
 		},
 		{
 			matcher:          mustNewMatcher(t, MatchRegexp, "abc.+"),
 			isRegexOptimized: true,
 		},
 	} {
 		t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
 			require.Equal(t, tc.isRegexOptimized, tc.matcher.IsRegexOptimized())
 		})
 	}
 }
 func BenchmarkMatchType_String(b *testing.B) {
 	for i := 0; i <= b.N; i++ {
 		_ = MatchType(i % int(MatchNotRegexp+1)).String()
--- a/model/labels/regexp.go
+++ b/model/labels/regexp.go
@ -14,73 +14,348 @@
 package labels
 import (
 	"slices"
 	"strings"
 	"github.com/grafana/regexp"
 	"github.com/grafana/regexp/syntax"
 )
-type FastRegexMatcher struct {
+const (
-	re       *regexp.Regexp
+	maxSetMatches = 256
 	prefix   string
 	suffix   string
 	contains string
-	// shortcut for literals
+	// The minimum number of alternate values a regex should have to trigger
-	literal bool
+	// the optimization done by optimizeEqualStringMatchers() and so use a map
-	value   string
+	// to match values instead of iterating over a list. This value has
 	// been computed running BenchmarkOptimizeEqualStringMatchers.
 	minEqualMultiStringMatcherMapThreshold = 16
 )
 type FastRegexMatcher struct {
 	// Under some conditions, re is nil because the expression is never parsed.
 	// We store the original string to be able to return it in GetRegexString().
 	reString string
 	re       *regexp.Regexp
 	setMatches    []string
 	stringMatcher StringMatcher
 	prefix        string
 	suffix        string
 	contains      string
 	// matchString is the "compiled" function to run by MatchString().
 	matchString func(string) bool
 }
 func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
 	if isLiteral(v) {
 		return &FastRegexMatcher{literal: true, value: v}, nil
 	}
 	re, err := regexp.Compile("^(?:" + v + ")$")
 	if err != nil {
 		return nil, err
 	}
 	parsed, err := syntax.Parse(v, syntax.Perl)
 	if err != nil {
 		return nil, err
 	}
 	m := &FastRegexMatcher{
-		re: re,
+		reString: v,
 	}
-	if parsed.Op == syntax.OpConcat {
+	m.stringMatcher, m.setMatches = optimizeAlternatingLiterals(v)
-		m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
+	if m.stringMatcher != nil {
 		// If we already have a string matcher, we don't need to parse the regex
 		// or compile the matchString function. This also avoids the behavior in
 		// compileMatchStringFunction where it prefers to use setMatches when
 		// available, even if the string matcher is faster.
 		m.matchString = m.stringMatcher.Matches
 	} else {
 		parsed, err := syntax.Parse(v, syntax.Perl)
 		if err != nil {
 			return nil, err
 		}
 		// Simplify the syntax tree to run faster.
 		parsed = parsed.Simplify()
 		m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$")
 		if err != nil {
 			return nil, err
 		}
 		if parsed.Op == syntax.OpConcat {
 			m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
 		}
 		if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
 			m.setMatches = matches
 		}
 		m.stringMatcher = stringMatcherFromRegexp(parsed)
 		m.matchString = m.compileMatchStringFunction()
 	}
 	return m, nil
 }
 // compileMatchStringFunction returns the function to run by MatchString().
 func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
 	// If the only optimization available is the string matcher, then we can just run it.
 	if len(m.setMatches) == 0 && m.prefix == "" && m.suffix == "" && m.contains == "" && m.stringMatcher != nil {
 		return m.stringMatcher.Matches
 	}
 	return func(s string) bool {
 		if len(m.setMatches) != 0 {
 			for _, match := range m.setMatches {
 				if match == s {
 					return true
 				}
 			}
 			return false
 		}
 		if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
 			return false
 		}
 		if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
 			return false
 		}
 		if m.contains != "" && !strings.Contains(s, m.contains) {
 			return false
 		}
 		if m.stringMatcher != nil {
 			return m.stringMatcher.Matches(s)
 		}
 		return m.re.MatchString(s)
 	}
 }
 // IsOptimized returns true if any fast-path optimization is applied to the
 // regex matcher.
 func (m *FastRegexMatcher) IsOptimized() bool {
 	return len(m.setMatches) > 0 || m.stringMatcher != nil || m.prefix != "" || m.suffix != "" || m.contains != ""
 }
 // findSetMatches extract equality matches from a regexp.
 // Returns nil if we can't replace the regexp by only equality matchers or the regexp contains
 // a mix of case sensitive and case insensitive matchers.
 func findSetMatches(re *syntax.Regexp) (matches []string, caseSensitive bool) {
 	clearBeginEndText(re)
 	return findSetMatchesInternal(re, "")
 }
 func findSetMatchesInternal(re *syntax.Regexp, base string) (matches []string, caseSensitive bool) {
 	switch re.Op {
 	case syntax.OpBeginText:
 		// Correctly handling the begin text operator inside a regex is tricky,
 		// so in this case we fallback to the regex engine.
 		return nil, false
 	case syntax.OpEndText:
 		// Correctly handling the end text operator inside a regex is tricky,
 		// so in this case we fallback to the regex engine.
 		return nil, false
 	case syntax.OpLiteral:
 		return []string{base + string(re.Rune)}, isCaseSensitive(re)
 	case syntax.OpEmptyMatch:
 		if base != "" {
 			return []string{base}, isCaseSensitive(re)
 		}
 	case syntax.OpAlternate:
 		return findSetMatchesFromAlternate(re, base)
 	case syntax.OpCapture:
 		clearCapture(re)
 		return findSetMatchesInternal(re, base)
 	case syntax.OpConcat:
 		return findSetMatchesFromConcat(re, base)
 	case syntax.OpCharClass:
 		if len(re.Rune)%2 != 0 {
 			return nil, false
 		}
 		var matches []string
 		var totalSet int
 		for i := 0; i+1 < len(re.Rune); i += 2 {
 			totalSet += int(re.Rune[i+1]-re.Rune[i]) + 1
 		}
 		// limits the total characters that can be used to create matches.
 		// In some case like negation [^0-9] a lot of possibilities exists and that
 		// can create thousands of possible matches at which points we're better off using regexp.
 		if totalSet > maxSetMatches {
 			return nil, false
 		}
 		for i := 0; i+1 < len(re.Rune); i += 2 {
 			lo, hi := re.Rune[i], re.Rune[i+1]
 			for c := lo; c <= hi; c++ {
 				matches = append(matches, base+string(c))
 			}
 		}
 		return matches, isCaseSensitive(re)
 	default:
 		return nil, false
 	}
 	return nil, false
 }
 func findSetMatchesFromConcat(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) {
 	if len(re.Sub) == 0 {
 		return nil, false
 	}
 	clearCapture(re.Sub...)
 	matches = []string{base}
 	for i := 0; i < len(re.Sub); i++ {
 		var newMatches []string
 		for j, b := range matches {
 			m, caseSensitive := findSetMatchesInternal(re.Sub[i], b)
 			if m == nil {
 				return nil, false
 			}
 			if tooManyMatches(newMatches, m...) {
 				return nil, false
 			}
 			// All matches must have the same case sensitivity. If it's the first set of matches
 			// returned, we store its sensitivity as the expected case, and then we'll check all
 			// other ones.
 			if i == 0 && j == 0 {
 				matchesCaseSensitive = caseSensitive
 			}
 			if matchesCaseSensitive != caseSensitive {
 				return nil, false
 			}
 			newMatches = append(newMatches, m...)
 		}
 		matches = newMatches
 	}
 	return matches, matchesCaseSensitive
 }
 func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) {
 	for i, sub := range re.Sub {
 		found, caseSensitive := findSetMatchesInternal(sub, base)
 		if found == nil {
 			return nil, false
 		}
 		if tooManyMatches(matches, found...) {
 			return nil, false
 		}
 		// All matches must have the same case sensitivity. If it's the first set of matches
 		// returned, we store its sensitivity as the expected case, and then we'll check all
 		// other ones.
 		if i == 0 {
 			matchesCaseSensitive = caseSensitive
 		}
 		if matchesCaseSensitive != caseSensitive {
 			return nil, false
 		}
 		matches = append(matches, found...)
 	}
 	return matches, matchesCaseSensitive
 }
 // clearCapture removes capture operation as they are not used for matching.
 func clearCapture(regs ...*syntax.Regexp) {
 	for _, r := range regs {
 		// Iterate on the regexp because capture groups could be nested.
 		for r.Op == syntax.OpCapture {
 			*r = *r.Sub[0]
 		}
 	}
 }
 // clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string.
 func clearBeginEndText(re *syntax.Regexp) {
 	// Do not clear begin/end text from an alternate operator because it could
 	// change the actual regexp properties.
 	if re.Op == syntax.OpAlternate {
 		return
 	}
 	if len(re.Sub) == 0 {
 		return
 	}
 	if len(re.Sub) == 1 {
 		if re.Sub[0].Op == syntax.OpBeginText || re.Sub[0].Op == syntax.OpEndText {
 			// We need to remove this element. Since it's the only one, we convert into a matcher of an empty string.
 			// OpEmptyMatch is regexp's nop operator.
 			re.Op = syntax.OpEmptyMatch
 			re.Sub = nil
 			return
 		}
 	}
 	if re.Sub[0].Op == syntax.OpBeginText {
 		re.Sub = re.Sub[1:]
 	}
 	if re.Sub[len(re.Sub)-1].Op == syntax.OpEndText {
 		re.Sub = re.Sub[:len(re.Sub)-1]
 	}
 }
 // isCaseInsensitive tells if a regexp is case insensitive.
 // The flag should be check at each level of the syntax tree.
 func isCaseInsensitive(reg *syntax.Regexp) bool {
 	return (reg.Flags & syntax.FoldCase) != 0
 }
 // isCaseSensitive tells if a regexp is case sensitive.
 // The flag should be check at each level of the syntax tree.
 func isCaseSensitive(reg *syntax.Regexp) bool {
 	return !isCaseInsensitive(reg)
 }
 // tooManyMatches guards against creating too many set matches.
 func tooManyMatches(matches []string, added ...string) bool {
 	return len(matches)+len(added) > maxSetMatches
 }
 func (m *FastRegexMatcher) MatchString(s string) bool {
-	if m.literal {
+	return m.matchString(s)
-		return s == m.value
+}
-	}
+
-	if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
+func (m *FastRegexMatcher) SetMatches() []string {
-		return false
+	// IMPORTANT: always return a copy, otherwise if the caller manipulate this slice it will
-	}
+	// also get manipulated in the cached FastRegexMatcher instance.
-	if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
+	return slices.Clone(m.setMatches)
 		return false
 	}
 	if m.contains != "" && !strings.Contains(s, m.contains) {
 		return false
 	}
 	return m.re.MatchString(s)
 }
 func (m *FastRegexMatcher) GetRegexString() string {
-	if m.literal {
+	return m.reString
 		return m.value
 	}
 	return m.re.String()
 }
-func isLiteral(re string) bool {
+// optimizeAlternatingLiterals optimizes a regex of the form
-	return regexp.QuoteMeta(re) == re
+//
 //	`literal1|literal2|literal3|...`
 //
 // this function returns an optimized StringMatcher or nil if the regex
 // cannot be optimized in this way, and a list of setMatches up to maxSetMatches.
 func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
 	if len(s) == 0 {
 		return emptyStringMatcher{}, nil
 	}
 	estimatedAlternates := strings.Count(s, "|") + 1
 	// If there are no alternates, check if the string is a literal
 	if estimatedAlternates == 1 {
 		if regexp.QuoteMeta(s) == s {
 			return &equalStringMatcher{s: s, caseSensitive: true}, []string{s}
 		}
 		return nil, nil
 	}
 	multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
 	for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
 		// Split the string into the next literal and the remainder
 		subMatch := s[:end]
 		s = s[end+1:]
 		// break if any of the submatches are not literals
 		if regexp.QuoteMeta(subMatch) != subMatch {
 			return nil, nil
 		}
 		multiMatcher.add(subMatch)
 	}
 	// break if the remainder is not a literal
 	if regexp.QuoteMeta(s) != s {
 		return nil, nil
 	}
 	multiMatcher.add(s)
 	return multiMatcher, multiMatcher.setMatches()
 }
 // optimizeConcatRegex returns literal prefix/suffix text that can be safely
@ -123,3 +398,540 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) {
 	return
 }
 // StringMatcher is a matcher that matches a string in place of a regular expression.
 type StringMatcher interface {
 	Matches(s string) bool
 }
 // stringMatcherFromRegexp attempts to replace a common regexp with a string matcher.
 // It returns nil if the regexp is not supported.
 func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
 	clearBeginEndText(re)
 	m := stringMatcherFromRegexpInternal(re)
 	m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
 	return m
 }
 func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
 	clearCapture(re)
 	switch re.Op {
 	case syntax.OpBeginText:
 		// Correctly handling the begin text operator inside a regex is tricky,
 		// so in this case we fallback to the regex engine.
 		return nil
 	case syntax.OpEndText:
 		// Correctly handling the end text operator inside a regex is tricky,
 		// so in this case we fallback to the regex engine.
 		return nil
 	case syntax.OpPlus:
 		if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
 			return nil
 		}
 		return &anyNonEmptyStringMatcher{
 			matchNL: re.Sub[0].Op == syntax.OpAnyChar,
 		}
 	case syntax.OpStar:
 		if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL {
 			return nil
 		}
 		// If the newline is valid, than this matcher literally match any string (even empty).
 		if re.Sub[0].Op == syntax.OpAnyChar {
 			return trueMatcher{}
 		}
 		// Any string is fine (including an empty one), as far as it doesn't contain any newline.
 		return anyStringWithoutNewlineMatcher{}
 	case syntax.OpQuest:
 		// Only optimize for ".?".
 		if len(re.Sub) != 1 || (re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL) {
 			return nil
 		}
 		return &zeroOrOneCharacterStringMatcher{
 			matchNL: re.Sub[0].Op == syntax.OpAnyChar,
 		}
 	case syntax.OpEmptyMatch:
 		return emptyStringMatcher{}
 	case syntax.OpLiteral:
 		return &equalStringMatcher{
 			s:             string(re.Rune),
 			caseSensitive: !isCaseInsensitive(re),
 		}
 	case syntax.OpAlternate:
 		or := make([]StringMatcher, 0, len(re.Sub))
 		for _, sub := range re.Sub {
 			m := stringMatcherFromRegexpInternal(sub)
 			if m == nil {
 				return nil
 			}
 			or = append(or, m)
 		}
 		return orStringMatcher(or)
 	case syntax.OpConcat:
 		clearCapture(re.Sub...)
 		if len(re.Sub) == 0 {
 			return emptyStringMatcher{}
 		}
 		if len(re.Sub) == 1 {
 			return stringMatcherFromRegexpInternal(re.Sub[0])
 		}
 		var left, right StringMatcher
 		// Let's try to find if there's a first and last any matchers.
 		if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar || re.Sub[0].Op == syntax.OpQuest {
 			left = stringMatcherFromRegexpInternal(re.Sub[0])
 			if left == nil {
 				return nil
 			}
 			re.Sub = re.Sub[1:]
 		}
 		if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar || re.Sub[len(re.Sub)-1].Op == syntax.OpQuest {
 			right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1])
 			if right == nil {
 				return nil
 			}
 			re.Sub = re.Sub[:len(re.Sub)-1]
 		}
 		matches, matchesCaseSensitive := findSetMatchesInternal(re, "")
 		if len(matches) == 0 && len(re.Sub) == 2 {
 			// We have not find fixed set matches. We look for other known cases that
 			// we can optimize.
 			switch {
 			// Prefix is literal.
 			case right == nil && re.Sub[0].Op == syntax.OpLiteral:
 				right = stringMatcherFromRegexpInternal(re.Sub[1])
 				if right != nil {
 					matches = []string{string(re.Sub[0].Rune)}
 					matchesCaseSensitive = !isCaseInsensitive(re.Sub[0])
 				}
 			// Suffix is literal.
 			case left == nil && re.Sub[1].Op == syntax.OpLiteral:
 				left = stringMatcherFromRegexpInternal(re.Sub[0])
 				if left != nil {
 					matches = []string{string(re.Sub[1].Rune)}
 					matchesCaseSensitive = !isCaseInsensitive(re.Sub[1])
 				}
 			}
 		}
 		// Ensure we've found some literals to match (optionally with a left and/or right matcher).
 		// If not, then this optimization doesn't trigger.
 		if len(matches) == 0 {
 			return nil
 		}
 		// Use the right (and best) matcher based on what we've found.
 		switch {
 		// No left and right matchers (only fixed set matches).
 		case left == nil && right == nil:
 			// if there's no any matchers on both side it's a concat of literals
 			or := make([]StringMatcher, 0, len(matches))
 			for _, match := range matches {
 				or = append(or, &equalStringMatcher{
 					s:             match,
 					caseSensitive: matchesCaseSensitive,
 				})
 			}
 			return orStringMatcher(or)
 		// Right matcher with 1 fixed set match.
 		case left == nil && len(matches) == 1:
 			return &literalPrefixStringMatcher{
 				prefix:              matches[0],
 				prefixCaseSensitive: matchesCaseSensitive,
 				right:               right,
 			}
 		// Left matcher with 1 fixed set match.
 		case right == nil && len(matches) == 1:
 			return &literalSuffixStringMatcher{
 				left:                left,
 				suffix:              matches[0],
 				suffixCaseSensitive: matchesCaseSensitive,
 			}
 		// We found literals in the middle. We can trigger the fast path only if
 		// the matches are case sensitive because containsStringMatcher doesn't
 		// support case insensitive.
 		case matchesCaseSensitive:
 			return &containsStringMatcher{
 				substrings: matches,
 				left:       left,
 				right:      right,
 			}
 		}
 	}
 	return nil
 }
 // containsStringMatcher matches a string if it contains any of the substrings.
 // If left and right are not nil, it's a contains operation where left and right must match.
 // If left is nil, it's a hasPrefix operation and right must match.
 // Finally, if right is nil it's a hasSuffix operation and left must match.
 type containsStringMatcher struct {
 	// The matcher that must match the left side. Can be nil.
 	left StringMatcher
 	// At least one of these strings must match in the "middle", between left and right matchers.
 	substrings []string
 	// The matcher that must match the right side. Can be nil.
 	right StringMatcher
 }
 func (m *containsStringMatcher) Matches(s string) bool {
 	for _, substr := range m.substrings {
 		switch {
 		case m.right != nil && m.left != nil:
 			searchStartPos := 0
 			for {
 				pos := strings.Index(s[searchStartPos:], substr)
 				if pos < 0 {
 					break
 				}
 				// Since we started searching from searchStartPos, we have to add that offset
 				// to get the actual position of the substring inside the text.
 				pos += searchStartPos
 				// If both the left and right matchers match, then we can stop searching because
 				// we've found a match.
 				if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) {
 					return true
 				}
 				// Continue searching for another occurrence of the substring inside the text.
 				searchStartPos = pos + 1
 			}
 		case m.left != nil:
 			// If we have to check for characters on the left then we need to match a suffix.
 			if strings.HasSuffix(s, substr) && m.left.Matches(s[:len(s)-len(substr)]) {
 				return true
 			}
 		case m.right != nil:
 			if strings.HasPrefix(s, substr) && m.right.Matches(s[len(substr):]) {
 				return true
 			}
 		}
 	}
 	return false
 }
 // literalPrefixStringMatcher matches a string with the given literal prefix and right side matcher.
 type literalPrefixStringMatcher struct {
 	prefix              string
 	prefixCaseSensitive bool
 	// The matcher that must match the right side. Can be nil.
 	right StringMatcher
 }
 func (m *literalPrefixStringMatcher) Matches(s string) bool {
 	// Ensure the prefix matches.
 	if m.prefixCaseSensitive && !strings.HasPrefix(s, m.prefix) {
 		return false
 	}
 	if !m.prefixCaseSensitive && !hasPrefixCaseInsensitive(s, m.prefix) {
 		return false
 	}
 	// Ensure the right side matches.
 	return m.right.Matches(s[len(m.prefix):])
 }
 // literalSuffixStringMatcher matches a string with the given literal suffix and left side matcher.
 type literalSuffixStringMatcher struct {
 	// The matcher that must match the left side. Can be nil.
 	left StringMatcher
 	suffix              string
 	suffixCaseSensitive bool
 }
 func (m *literalSuffixStringMatcher) Matches(s string) bool {
 	// Ensure the suffix matches.
 	if m.suffixCaseSensitive && !strings.HasSuffix(s, m.suffix) {
 		return false
 	}
 	if !m.suffixCaseSensitive && !hasSuffixCaseInsensitive(s, m.suffix) {
 		return false
 	}
 	// Ensure the left side matches.
 	return m.left.Matches(s[:len(s)-len(m.suffix)])
 }
 // emptyStringMatcher matches an empty string.
 type emptyStringMatcher struct{}
 func (m emptyStringMatcher) Matches(s string) bool {
 	return len(s) == 0
 }
 // orStringMatcher matches any of the sub-matchers.
 type orStringMatcher []StringMatcher
 func (m orStringMatcher) Matches(s string) bool {
 	for _, matcher := range m {
 		if matcher.Matches(s) {
 			return true
 		}
 	}
 	return false
 }
 // equalStringMatcher matches a string exactly and support case insensitive.
 type equalStringMatcher struct {
 	s             string
 	caseSensitive bool
 }
 func (m *equalStringMatcher) Matches(s string) bool {
 	if m.caseSensitive {
 		return m.s == s
 	}
 	return strings.EqualFold(m.s, s)
 }
 type multiStringMatcherBuilder interface {
 	StringMatcher
 	add(s string)
 	setMatches() []string
 }
 func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
 	// If the estimated size is low enough, it's faster to use a slice instead of a map.
 	if estimatedSize < minEqualMultiStringMatcherMapThreshold {
 		return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
 	}
 	return &equalMultiStringMapMatcher{
 		values:        make(map[string]struct{}, estimatedSize),
 		caseSensitive: caseSensitive,
 	}
 }
 // equalMultiStringSliceMatcher matches a string exactly against a slice of valid values.
 type equalMultiStringSliceMatcher struct {
 	values []string
 	caseSensitive bool
 }
 func (m *equalMultiStringSliceMatcher) add(s string) {
 	m.values = append(m.values, s)
 }
 func (m *equalMultiStringSliceMatcher) setMatches() []string {
 	return m.values
 }
 func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
 	if m.caseSensitive {
 		for _, v := range m.values {
 			if s == v {
 				return true
 			}
 		}
 	} else {
 		for _, v := range m.values {
 			if strings.EqualFold(s, v) {
 				return true
 			}
 		}
 	}
 	return false
 }
 // equalMultiStringMapMatcher matches a string exactly against a map of valid values.
 type equalMultiStringMapMatcher struct {
 	// values contains values to match a string against. If the matching is case insensitive,
 	// the values here must be lowercase.
 	values map[string]struct{}
 	caseSensitive bool
 }
 func (m *equalMultiStringMapMatcher) add(s string) {
 	if !m.caseSensitive {
 		s = strings.ToLower(s)
 	}
 	m.values[s] = struct{}{}
 }
 func (m *equalMultiStringMapMatcher) setMatches() []string {
 	if len(m.values) >= maxSetMatches {
 		return nil
 	}
 	matches := make([]string, 0, len(m.values))
 	for s := range m.values {
 		matches = append(matches, s)
 	}
 	return matches
 }
 func (m *equalMultiStringMapMatcher) Matches(s string) bool {
 	if !m.caseSensitive {
 		s = strings.ToLower(s)
 	}
 	_, ok := m.values[s]
 	return ok
 }
 // anyStringWithoutNewlineMatcher is a stringMatcher which matches any string
 // (including an empty one) as far as it doesn't contain any newline character.
 type anyStringWithoutNewlineMatcher struct{}
 func (m anyStringWithoutNewlineMatcher) Matches(s string) bool {
 	// We need to make sure it doesn't contain a newline. Since the newline is
 	// an ASCII character, we can use strings.IndexByte().
 	return strings.IndexByte(s, '\n') == -1
 }
 // anyNonEmptyStringMatcher is a stringMatcher which matches any non-empty string.
 type anyNonEmptyStringMatcher struct {
 	matchNL bool
 }
 func (m *anyNonEmptyStringMatcher) Matches(s string) bool {
 	if m.matchNL {
 		// It's OK if the string contains a newline so we just need to make
 		// sure it's non-empty.
 		return len(s) > 0
 	}
 	// We need to make sure it non-empty and doesn't contain a newline.
 	// Since the newline is an ASCII character, we can use strings.IndexByte().
 	return len(s) > 0 && strings.IndexByte(s, '\n') == -1
 }
 // zeroOrOneCharacterStringMatcher is a StringMatcher which matches zero or one occurrence
 // of any character. The newline character is matches only if matchNL is set to true.
 type zeroOrOneCharacterStringMatcher struct {
 	matchNL bool
 }
 func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
 	// Zero or one.
 	if len(s) > 1 {
 		return false
 	}
 	// No need to check for the newline if the string is empty or matching a newline is OK.
 	if m.matchNL || len(s) == 0 {
 		return true
 	}
 	return s[0] != '\n'
 }
 // trueMatcher is a stringMatcher which matches any string (always returns true).
 type trueMatcher struct{}
 func (m trueMatcher) Matches(_ string) bool {
 	return true
 }
 // optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
 // alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
 // this specific case, when we have many strings to match against we can use a map instead
 // of iterating over the list of strings.
 func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
 	var (
 		caseSensitive    bool
 		caseSensitiveSet bool
 		numValues        int
 	)
 	// Analyse the input StringMatcher to count the number of occurrences
 	// and ensure all of them have the same case sensitivity.
 	analyseCallback := func(matcher *equalStringMatcher) bool {
 		// Ensure we don't have mixed case sensitivity.
 		if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
 			return false
 		} else if !caseSensitiveSet {
 			caseSensitive = matcher.caseSensitive
 			caseSensitiveSet = true
 		}
 		numValues++
 		return true
 	}
 	if !findEqualStringMatchers(input, analyseCallback) {
 		return input
 	}
 	// If the number of values found is less than the threshold, then we should skip the optimization.
 	if numValues < threshold {
 		return input
 	}
 	// Parse again the input StringMatcher to extract all values and storing them.
 	// We can skip the case sensitivity check because we've already checked it and
 	// if the code reach this point then it means all matchers have the same case sensitivity.
 	multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
 	// Ignore the return value because we already iterated over the input StringMatcher
 	// and it was all good.
 	findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
 		multiMatcher.add(matcher.s)
 		return true
 	})
 	return multiMatcher
 }
 // findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
 // equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
 // composed by an alternation of equalStringMatcher.
 func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
 	orInput, ok := input.(orStringMatcher)
 	if !ok {
 		return false
 	}
 	for _, m := range orInput {
 		switch casted := m.(type) {
 		case orStringMatcher:
 			if !findEqualStringMatchers(m, callback) {
 				return false
 			}
 		case *equalStringMatcher:
 			if !callback(casted) {
 				return false
 			}
 		default:
 			// It's not an equal string matcher, so we have to stop searching
 			// cause this optimization can't be applied.
 			return false
 		}
 	}
 	return true
 }
 func hasPrefixCaseInsensitive(s, prefix string) bool {
 	return len(s) >= len(prefix) && strings.EqualFold(s[0:len(prefix)], prefix)
 }
 func hasSuffixCaseInsensitive(s, suffix string) bool {
 	return len(s) >= len(suffix) && strings.EqualFold(s[len(s)-len(suffix):], suffix)
 }
--- a/model/labels/regexp_test.go
+++ b/model/labels/regexp_test.go
--- a/promql/parser/parse_test.go
+++ b/promql/parser/parse_test.go
@ -3706,7 +3706,31 @@ func TestParseExpressions(t *testing.T) {
 			if !test.fail {
 				require.NoError(t, err)
-				require.Equal(t, test.expected, expr, "error on input '%s'", test.input)
+				expected := test.expected
 				// The FastRegexMatcher is not comparable with a deep equal, so only compare its String() version.
 				if actualVector, ok := expr.(*VectorSelector); ok {
 					require.IsType(t, &VectorSelector{}, test.expected, "error on input '%s'", test.input)
 					expectedVector := test.expected.(*VectorSelector)
 					require.Len(t, actualVector.LabelMatchers, len(expectedVector.LabelMatchers), "error on input '%s'", test.input)
 					for i := 0; i < len(actualVector.LabelMatchers); i++ {
 						expectedMatcher := expectedVector.LabelMatchers[i].String()
 						actualMatcher := actualVector.LabelMatchers[i].String()
 						require.Equal(t, expectedMatcher, actualMatcher, "unexpected label matcher '%s' on input '%s'", actualMatcher, test.input)
 					}
 					// Make a shallow copy of the expected expr (because the test cases are defined in a global variable)
 					// and then reset the LabelMatcher to not compared them with the following deep equal.
 					expectedCopy := *expectedVector
 					expectedCopy.LabelMatchers = nil
 					expected = &expectedCopy
 					actualVector.LabelMatchers = nil
 				}
 				require.Equal(t, expected, expr, "error on input '%s'", test.input)
 			} else {
 				require.Error(t, err)
 				require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error())
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@ -19,8 +19,6 @@ import (
 	"fmt"
 	"math"
 	"slices"
 	"strings"
 	"unicode/utf8"
 	"github.com/oklog/ulid"
@ -35,20 +33,6 @@ import (
 	"github.com/prometheus/prometheus/util/annotations"
 )
 // Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
 var regexMetaCharacterBytes [16]byte
 // isRegexMetaCharacter reports whether byte b needs to be escaped.
 func isRegexMetaCharacter(b byte) bool {
 	return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
 }
 func init() {
 	for _, b := range []byte(`.+*?()|[]{}^$`) {
 		regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
 	}
 }
 type blockBaseQuerier struct {
 	blockID    ulid.ULID
 	index      IndexReader
@ -195,55 +179,6 @@ func (q *blockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *
 	return NewBlockChunkSeriesSet(q.blockID, q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming)
 }
 func findSetMatches(pattern string) []string {
 	// Return empty matches if the wrapper from Prometheus is missing.
 	if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
 		return nil
 	}
 	escaped := false
 	sets := []*strings.Builder{{}}
 	init := 4
 	end := len(pattern) - 2
 	// If the regex is wrapped in a group we can remove the first and last parentheses
 	if pattern[init] == '(' && pattern[end-1] == ')' {
 		init++
 		end--
 	}
 	for i := init; i < end; i++ {
 		if escaped {
 			switch {
 			case isRegexMetaCharacter(pattern[i]):
 				sets[len(sets)-1].WriteByte(pattern[i])
 			case pattern[i] == '\\':
 				sets[len(sets)-1].WriteByte('\\')
 			default:
 				return nil
 			}
 			escaped = false
 		} else {
 			switch {
 			case isRegexMetaCharacter(pattern[i]):
 				if pattern[i] == '|' {
 					sets = append(sets, &strings.Builder{})
 				} else {
 					return nil
 				}
 			case pattern[i] == '\\':
 				escaped = true
 			default:
 				sets[len(sets)-1].WriteByte(pattern[i])
 			}
 		}
 	}
 	matches := make([]string, 0, len(sets))
 	for _, s := range sets {
 		if s.Len() > 0 {
 			matches = append(matches, s.String())
 		}
 	}
 	return matches
 }
 // PostingsForMatchers assembles a single postings iterator against the index reader
 // based on the given matchers. The resulting postings are not ordered by series.
 func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
@ -385,7 +320,7 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher)
 	// Fast-path for set matching.
 	if m.Type == labels.MatchRegexp {
-		setMatches := findSetMatches(m.GetRegexString())
+		setMatches := m.SetMatches()
 		if len(setMatches) > 0 {
 			return ix.Postings(ctx, m.Name, setMatches...)
 		}
@ -416,7 +351,7 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma
 	// Inverse of a MatchNotRegexp is MatchRegexp (double negation).
 	// Fast-path for set matching.
 	if m.Type == labels.MatchNotRegexp {
-		setMatches := findSetMatches(m.GetRegexString())
+		setMatches := m.SetMatches()
 		if len(setMatches) > 0 {
 			return ix.Postings(ctx, m.Name, setMatches...)
 		}
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@ -2658,54 +2658,6 @@ func BenchmarkSetMatcher(b *testing.B) {
 	}
 }
 // Refer to https://github.com/prometheus/prometheus/issues/2651.
 func TestFindSetMatches(t *testing.T) {
 	cases := []struct {
 		pattern string
 		exp     []string
 	}{
 		// Single value, coming from a `bar=~"foo"` selector.
 		{
 			pattern: "^(?:foo)$",
 			exp: []string{
 				"foo",
 			},
 		},
 		// Simple sets.
 		{
 			pattern: "^(?:foo|bar|baz)$",
 			exp: []string{
 				"foo",
 				"bar",
 				"baz",
 			},
 		},
 		// Simple sets containing escaped characters.
 		{
 			pattern: "^(?:fo\\.o|bar\\?|\\^baz)$",
 			exp: []string{
 				"fo.o",
 				"bar?",
 				"^baz",
 			},
 		},
 		// Simple sets containing special characters without escaping.
 		{
 			pattern: "^(?:fo.o|bar?|^baz)$",
 			exp:     nil,
 		},
 		// Missing wrapper.
 		{
 			pattern: "foo|bar|baz",
 			exp:     nil,
 		},
 	}
 	for _, c := range cases {
 		require.Equal(t, c.exp, findSetMatches(c.pattern), "Evaluating %s, unexpected result.", c.pattern)
 	}
 }
 func TestPostingsForMatchers(t *testing.T) {
 	ctx := context.Background()
@ -3310,7 +3262,7 @@ func TestPostingsForMatcher(t *testing.T) {
 		{
 			// Test case for double quoted regex matcher
 			matcher:  labels.MustNewMatcher(labels.MatchRegexp, "test", "^(?:a|b)$"),
-			hasError: true,
+			hasError: false,
 		},
 	}