Fix regexp set matches for literal matchers

I was surprised to find out that posting lookups for `foo=~"(bar|bar)"`
are faster than `foo=~"bar"`. It turns out we introduced a performance
regression in https://github.com/grafana/mimir-prometheus/pull/463.

When we added the `optimizeAlternatingLiterals` function, we subtly
broke one edge case. A regexp matcher which matches a single literal,
like `foo=~"bar"` used to return `bar` from `SetMatches()`, but
currently does not. The implication is that the tsdb will first do a
LabelValues call to get all values for `foo`, then match them against
the regexp `bar`. This PR restores the previous behavior which is able
to directly lookup postings for `foo="bar"` instead.
This commit is contained in:
Patrick Oyarzun 2024-01-02 13:42:01 -06:00
parent 12d2c1038b
commit 84841a4c4e
No known key found for this signature in database
GPG key ID: 44B349E67EC0257A
2 changed files with 9 additions and 1 deletions

View file

@ -366,7 +366,7 @@ func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
// If there are no alternates, check if the string is a literal
if estimatedAlternates == 1 {
if regexp.QuoteMeta(s) == s {
return &equalStringMatcher{s: s, caseSensitive: true}, nil
return &equalStringMatcher{s: s, caseSensitive: true}, []string{s}
}
return nil, nil
}

View file

@ -342,6 +342,14 @@ func TestFindSetMatches(t *testing.T) {
matches, actualCaseSensitive := findSetMatches(parsed)
require.Equal(t, c.expMatches, matches)
require.Equal(t, c.expCaseSensitive, actualCaseSensitive)
if c.expCaseSensitive {
// When the regexp is case sensitive, we want to ensure that the
// set matches are maintained in the final matcher.
r, err := newFastRegexMatcherWithoutCache(c.pattern)
require.NoError(t, err)
require.Equal(t, c.expMatches, r.SetMatches())
}
})
}
}