diff --git a/pkg/labels/matcher.go b/pkg/labels/matcher.go index bed26b10f..1b40f77fa 100644 --- a/pkg/labels/matcher.go +++ b/pkg/labels/matcher.go @@ -113,6 +113,7 @@ func (m *Matcher) Inverse() (*Matcher, error) { // SetMatches returns a set of equality matchers for the current regex matchers if possible. // For examples the regexp `a(b|f)` will returns "ab" and "af". +// Returns nil if we can't replace the regexp by only equality matchers. func (m *Matcher) SetMatches() []string { if m.re == nil { return nil diff --git a/pkg/labels/regexp.go b/pkg/labels/regexp.go index 3e1fe0dc3..559d2a4ae 100644 --- a/pkg/labels/regexp.go +++ b/pkg/labels/regexp.go @@ -56,7 +56,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { // findSetMatches extract equality matches from a regexp. // Returns nil if we can't replace the regexp by only equality matchers. func findSetMatches(re *syntax.Regexp, base string) []string { - // Matches are not case sensitive, if we find a case insensitive regexp. + // Matches are case sensitive, if we find a case insensitive regexp. // We have to abort. if isCaseInsensitive(re) { return nil @@ -69,25 +69,19 @@ func findSetMatches(re *syntax.Regexp, base string) []string { return []string{base} } case syntax.OpAlternate: - found := findSetMatchesFromAlternate(re, base) - if found != nil { - return found - } + return findSetMatchesFromAlternate(re, base) case syntax.OpCapture: clearCapture(re) return findSetMatches(re, base) case syntax.OpConcat: - found := findSetMatchesFromConcat(re, base) - if found != nil { - return found - } + return findSetMatchesFromConcat(re, base) case syntax.OpCharClass: - if len(re.Rune) == 1 { - return []string{base + string(re.Rune)} + if len(re.Rune)%2 != 0 { + return nil } var matches []string var totalSet int - for i := 0; i < len(re.Rune); i = i + 2 { + for i := 0; i+1 < len(re.Rune); i = i + 2 { totalSet += int(re.Rune[i+1]-re.Rune[i]) + 1 } // limits the total characters that can be used to create matches. @@ -96,7 +90,7 @@ func findSetMatches(re *syntax.Regexp, base string) []string { if totalSet > maxSetMatches { return nil } - for i := 0; i < len(re.Rune); i = i + 2 { + for i := 0; i+1 < len(re.Rune); i = i + 2 { lo, hi := re.Rune[i], re.Rune[i+1] for c := lo; c <= hi; c++ { matches = append(matches, base+string(c)) @@ -111,19 +105,14 @@ func findSetMatches(re *syntax.Regexp, base string) []string { } func findSetMatchesFromConcat(re *syntax.Regexp, base string) []string { - if isCaseInsensitive(re) { - return nil - } if len(re.Sub) == 0 { return nil } + clearBeginEndText(re) clearCapture(re.Sub...) - matches := findSetMatches(re.Sub[0], base) - if matches == nil { - return nil - } + matches := []string{base} - for i := 1; i < len(re.Sub); i++ { + for i := 0; i < len(re.Sub); i++ { var newMatches []string for _, b := range matches { m := findSetMatches(re.Sub[i], b) @@ -165,6 +154,25 @@ func clearCapture(regs ...*syntax.Regexp) { } } +// clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string. +func clearBeginEndText(re *syntax.Regexp) { + if len(re.Sub) == 0 { + return + } + if len(re.Sub) == 1 { + if re.Sub[0].Op == syntax.OpBeginText || re.Sub[0].Op == syntax.OpEndText { + re.Sub = nil + return + } + } + if re.Sub[0].Op == syntax.OpBeginText { + re.Sub = re.Sub[1:] + } + if re.Sub[len(re.Sub)-1].Op == syntax.OpEndText { + re.Sub = re.Sub[:len(re.Sub)-1] + } +} + // isCaseInsensitive tells if a regexp is case insensitive. // The flag should be check at each level of the syntax tree. func isCaseInsensitive(reg *syntax.Regexp) bool { diff --git a/pkg/labels/regexp_test.go b/pkg/labels/regexp_test.go index 86d69248b..4e29bf21e 100644 --- a/pkg/labels/regexp_test.go +++ b/pkg/labels/regexp_test.go @@ -122,6 +122,8 @@ func TestFindSetMatches(t *testing.T) { }{ // Single value, coming from a `bar=~"foo"` selector. {"foo", []string{"foo"}}, + {"^foo", []string{"foo"}}, + {"^foo$", []string{"foo"}}, // Simple sets alternates. {"foo|bar|zz", []string{"foo", "bar", "zz"}}, // Simple sets alternate and concat (bar|baz is parsed as ba(r|z)). @@ -131,6 +133,8 @@ func TestFindSetMatches(t *testing.T) { // Simple sets alternate and concat and alternates with empty matches // parsed as b(ar|(?:)|uzz) where b(?:) means literal b. {"bar|b|buzz", []string{"bar", "b", "buzz"}}, + // Skip anchors it's enforced anyway at the root. + {"(^bar$)|(b$)|(^buzz)", []string{"bar", "b", "buzz"}}, // Simple sets containing escaped characters. {"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}}, // using charclass @@ -142,6 +146,9 @@ func TestFindSetMatches(t *testing.T) { // triple concat with multiple alternates {"(api|rpc)_(v1|prom)_push", []string{"api_v1_push", "api_prom_push", "rpc_v1_push", "rpc_prom_push"}}, {"(api|rpc)_(v1|prom)_(push|query)", []string{"api_v1_push", "api_v1_query", "api_prom_push", "api_prom_query", "rpc_v1_push", "rpc_v1_query", "rpc_prom_push", "rpc_prom_query"}}, + // class starting with "-" + {"[-1-2][a-c]", []string{"-a", "-b", "-c", "1a", "1b", "1c", "2a", "2b", "2c"}}, + {"[1^3]", []string{"1", "3", "^"}}, // OpPlus with concat {"(.+)/(foo|bar)", nil}, // Simple sets containing special characters without escaping. diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index 242759b79..72e4b287f 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -108,7 +108,9 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix) iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$") - + jFooBar := labels.MustNewMatcher(labels.MatchRegexp, "j", "foo|bar") + iCharSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "1[0-9]") + iAlternate := labels.MustNewMatcher(labels.MatchRegexp, "i", "(1|2|3|4|5|6|20|55)") cases := []struct { name string matchers []*labels.Matcher @@ -117,6 +119,9 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { {`n="1",j="foo"`, []*labels.Matcher{n1, jFoo}}, {`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}}, {`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}}, + {`i=~"1[0-9]",j=~"foo|bar"`, []*labels.Matcher{iCharSet, jFooBar}}, + {`j=~"foo|bar"`, []*labels.Matcher{jFooBar}}, + {`i=~"(1|2|3|4|5|6|20|55)"`, []*labels.Matcher{iAlternate}}, {`i=~".*"`, []*labels.Matcher{iStar}}, {`i=~"1.*"`, []*labels.Matcher{i1Star}}, {`i=~".*1"`, []*labels.Matcher{iStar1}},