mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 05:34:05 -08:00
Merge pull request #15 from grafana/improvesetmatches
Improve `findSetMatches `to support concatenation.
This commit is contained in:
commit
9173cade01
|
@ -118,6 +118,8 @@ ifeq ($(GOHOSTARCH),amd64)
|
|||
endif
|
||||
endif
|
||||
|
||||
test-flags += -timeout 20m
|
||||
|
||||
# This rule is used to forward a target like "build" to "common-build". This
|
||||
# allows a new "build" target to be defined in a Makefile which includes this
|
||||
# one and override "common-build" without override warnings.
|
||||
|
|
|
@ -111,10 +111,19 @@ func (m *Matcher) Inverse() (*Matcher, error) {
|
|||
panic("labels.Matcher.Matches: invalid match type")
|
||||
}
|
||||
|
||||
// GetRegexString returns the regex string.
|
||||
func (m *Matcher) GetRegexString() string {
|
||||
if m.re == nil {
|
||||
return ""
|
||||
}
|
||||
return m.re.GetRegexString()
|
||||
}
|
||||
|
||||
// SetMatches returns a set of equality matchers for the current regex matchers if possible.
|
||||
// For examples the regexp `a(b|f)` will returns "ab" and "af".
|
||||
// Returns nil if we can't replace the regexp by only equality matchers.
|
||||
func (m *Matcher) SetMatches() []string {
|
||||
if m.re == nil {
|
||||
return nil
|
||||
}
|
||||
return m.re.setMatches
|
||||
}
|
||||
|
|
|
@ -19,26 +19,31 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
const maxSetMatches = 256
|
||||
|
||||
type FastRegexMatcher struct {
|
||||
re *regexp.Regexp
|
||||
prefix string
|
||||
suffix string
|
||||
contains string
|
||||
re *regexp.Regexp
|
||||
|
||||
setMatches []string
|
||||
prefix string
|
||||
suffix string
|
||||
contains string
|
||||
}
|
||||
|
||||
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
||||
re, err := regexp.Compile("^(?:" + v + ")$")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
parsed, err := syntax.Parse(v, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Simplify the syntax tree to run faster.
|
||||
parsed = parsed.Simplify()
|
||||
re, err := regexp.Compile("^(?:" + parsed.String() + ")$")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := &FastRegexMatcher{
|
||||
re: re,
|
||||
re: re,
|
||||
setMatches: findSetMatches(parsed, ""),
|
||||
}
|
||||
|
||||
if parsed.Op == syntax.OpConcat {
|
||||
|
@ -48,7 +53,146 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|||
return m, nil
|
||||
}
|
||||
|
||||
// findSetMatches extract equality matches from a regexp.
|
||||
// Returns nil if we can't replace the regexp by only equality matchers.
|
||||
func findSetMatches(re *syntax.Regexp, base string) []string {
|
||||
// Matches are case sensitive, if we find a case insensitive regexp.
|
||||
// We have to abort.
|
||||
if isCaseInsensitive(re) {
|
||||
return nil
|
||||
}
|
||||
clearBeginEndText(re)
|
||||
switch re.Op {
|
||||
case syntax.OpLiteral:
|
||||
return []string{base + string(re.Rune)}
|
||||
case syntax.OpEmptyMatch:
|
||||
if base != "" {
|
||||
return []string{base}
|
||||
}
|
||||
case syntax.OpAlternate:
|
||||
return findSetMatchesFromAlternate(re, base)
|
||||
case syntax.OpCapture:
|
||||
clearCapture(re)
|
||||
return findSetMatches(re, base)
|
||||
case syntax.OpConcat:
|
||||
return findSetMatchesFromConcat(re, base)
|
||||
case syntax.OpCharClass:
|
||||
if len(re.Rune)%2 != 0 {
|
||||
return nil
|
||||
}
|
||||
var matches []string
|
||||
var totalSet int
|
||||
for i := 0; i+1 < len(re.Rune); i = i + 2 {
|
||||
totalSet += int(re.Rune[i+1]-re.Rune[i]) + 1
|
||||
}
|
||||
// limits the total characters that can be used to create matches.
|
||||
// In some case like negation [^0-9] a lot of possibilities exists and that
|
||||
// can create thousands of possible matches at which points we're better off using regexp.
|
||||
if totalSet > maxSetMatches {
|
||||
return nil
|
||||
}
|
||||
for i := 0; i+1 < len(re.Rune); i = i + 2 {
|
||||
lo, hi := re.Rune[i], re.Rune[i+1]
|
||||
for c := lo; c <= hi; c++ {
|
||||
matches = append(matches, base+string(c))
|
||||
}
|
||||
|
||||
}
|
||||
return matches
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findSetMatchesFromConcat(re *syntax.Regexp, base string) []string {
|
||||
if len(re.Sub) == 0 {
|
||||
return nil
|
||||
}
|
||||
clearCapture(re.Sub...)
|
||||
matches := []string{base}
|
||||
|
||||
for i := 0; i < len(re.Sub); i++ {
|
||||
var newMatches []string
|
||||
for _, b := range matches {
|
||||
m := findSetMatches(re.Sub[i], b)
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
if tooManyMatches(newMatches, m...) {
|
||||
return nil
|
||||
}
|
||||
newMatches = append(newMatches, m...)
|
||||
}
|
||||
matches = newMatches
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
func findSetMatchesFromAlternate(re *syntax.Regexp, base string) []string {
|
||||
var setMatches []string
|
||||
for _, sub := range re.Sub {
|
||||
found := findSetMatches(sub, base)
|
||||
if found == nil {
|
||||
return nil
|
||||
}
|
||||
if tooManyMatches(setMatches, found...) {
|
||||
return nil
|
||||
}
|
||||
setMatches = append(setMatches, found...)
|
||||
}
|
||||
return setMatches
|
||||
}
|
||||
|
||||
// clearCapture removes capture operation as they are not used for matching.
|
||||
func clearCapture(regs ...*syntax.Regexp) {
|
||||
for _, r := range regs {
|
||||
if r.Op == syntax.OpCapture {
|
||||
*r = *r.Sub[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string.
|
||||
func clearBeginEndText(re *syntax.Regexp) {
|
||||
if len(re.Sub) == 0 {
|
||||
return
|
||||
}
|
||||
if len(re.Sub) == 1 {
|
||||
if re.Sub[0].Op == syntax.OpBeginText || re.Sub[0].Op == syntax.OpEndText {
|
||||
re.Sub = nil
|
||||
return
|
||||
}
|
||||
}
|
||||
if re.Sub[0].Op == syntax.OpBeginText {
|
||||
re.Sub = re.Sub[1:]
|
||||
}
|
||||
if re.Sub[len(re.Sub)-1].Op == syntax.OpEndText {
|
||||
re.Sub = re.Sub[:len(re.Sub)-1]
|
||||
}
|
||||
}
|
||||
|
||||
// isCaseInsensitive tells if a regexp is case insensitive.
|
||||
// The flag should be check at each level of the syntax tree.
|
||||
func isCaseInsensitive(reg *syntax.Regexp) bool {
|
||||
return (reg.Flags & syntax.FoldCase) != 0
|
||||
}
|
||||
|
||||
// tooManyMatches guards against creating too many set matches
|
||||
func tooManyMatches(matches []string, new ...string) bool {
|
||||
return len(matches)+len(new) > maxSetMatches
|
||||
}
|
||||
|
||||
func (m *FastRegexMatcher) MatchString(s string) bool {
|
||||
if len(m.setMatches) != 0 {
|
||||
for _, match := range m.setMatches {
|
||||
if match == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
||||
return false
|
||||
}
|
||||
|
@ -61,6 +205,10 @@ func (m *FastRegexMatcher) MatchString(s string) bool {
|
|||
return m.re.MatchString(s)
|
||||
}
|
||||
|
||||
func (m *FastRegexMatcher) SetMatches() []string {
|
||||
return m.setMatches
|
||||
}
|
||||
|
||||
func (m *FastRegexMatcher) GetRegexString() string {
|
||||
return m.re.String()
|
||||
}
|
||||
|
|
|
@ -96,3 +96,65 @@ func TestOptimizeConcatRegex(t *testing.T) {
|
|||
require.Equal(t, c.contains, contains)
|
||||
}
|
||||
}
|
||||
|
||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
||||
func TestFindSetMatches(t *testing.T) {
|
||||
for _, c := range []struct {
|
||||
pattern string
|
||||
exp []string
|
||||
}{
|
||||
// Single value, coming from a `bar=~"foo"` selector.
|
||||
{"foo", []string{"foo"}},
|
||||
{"^foo", []string{"foo"}},
|
||||
{"^foo$", []string{"foo"}},
|
||||
// Simple sets alternates.
|
||||
{"foo|bar|zz", []string{"foo", "bar", "zz"}},
|
||||
// Simple sets alternate and concat (bar|baz is parsed as "ba[rz]").
|
||||
{"foo|bar|baz", []string{"foo", "bar", "baz"}},
|
||||
// Simple sets alternate and concat and capture
|
||||
{"foo|bar|baz|(zz)", []string{"foo", "bar", "baz", "zz"}},
|
||||
// Simple sets alternate and concat and alternates with empty matches
|
||||
// parsed as b(ar|(?:)|uzz) where b(?:) means literal b.
|
||||
{"bar|b|buzz", []string{"bar", "b", "buzz"}},
|
||||
// Skip anchors it's enforced anyway at the root.
|
||||
{"(^bar$)|(b$)|(^buzz)", []string{"bar", "b", "buzz"}},
|
||||
// Simple sets containing escaped characters.
|
||||
{"fo\\.o|bar\\?|\\^baz", []string{"fo.o", "bar?", "^baz"}},
|
||||
// using charclass
|
||||
{"[abc]d", []string{"ad", "bd", "cd"}},
|
||||
// high low charset different => A(B[CD]|EF)|BC[XY]
|
||||
{"ABC|ABD|AEF|BCX|BCY", []string{"ABC", "ABD", "AEF", "BCX", "BCY"}},
|
||||
// triple concat
|
||||
{"api_(v1|prom)_push", []string{"api_v1_push", "api_prom_push"}},
|
||||
// triple concat with multiple alternates
|
||||
{"(api|rpc)_(v1|prom)_push", []string{"api_v1_push", "api_prom_push", "rpc_v1_push", "rpc_prom_push"}},
|
||||
{"(api|rpc)_(v1|prom)_(push|query)", []string{"api_v1_push", "api_v1_query", "api_prom_push", "api_prom_query", "rpc_v1_push", "rpc_v1_query", "rpc_prom_push", "rpc_prom_query"}},
|
||||
// class starting with "-"
|
||||
{"[-1-2][a-c]", []string{"-a", "-b", "-c", "1a", "1b", "1c", "2a", "2b", "2c"}},
|
||||
{"[1^3]", []string{"1", "3", "^"}},
|
||||
// OpPlus with concat
|
||||
{"(.+)/(foo|bar)", nil},
|
||||
// Simple sets containing special characters without escaping.
|
||||
{"fo.o|bar?|^baz", nil},
|
||||
// case sensitive wrapper.
|
||||
{"(?i)foo", nil},
|
||||
// case sensitive wrapper on alternate.
|
||||
{"(?i)foo|bar|baz", nil},
|
||||
// case sensitive wrapper on concat.
|
||||
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
|
||||
// too high charset combination
|
||||
{"(api|rpc)_[^0-9]", nil},
|
||||
// too many combinations
|
||||
{"[a-z][a-z]", nil},
|
||||
} {
|
||||
c := c
|
||||
t.Run(c.pattern, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
parsed, err := syntax.Parse(c.pattern, syntax.Perl)
|
||||
require.NoError(t, err)
|
||||
matches := findSetMatches(parsed, "")
|
||||
require.Equal(t, c.exp, matches)
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,8 +16,6 @@ package tsdb
|
|||
import (
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
|
@ -30,20 +28,6 @@ import (
|
|||
"github.com/prometheus/prometheus/tsdb/tombstones"
|
||||
)
|
||||
|
||||
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
|
||||
var regexMetaCharacterBytes [16]byte
|
||||
|
||||
// isRegexMetaCharacter reports whether byte b needs to be escaped.
|
||||
func isRegexMetaCharacter(b byte) bool {
|
||||
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
|
||||
}
|
||||
|
||||
func init() {
|
||||
for _, b := range []byte(`.+*?()|[]{}^$`) {
|
||||
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
|
||||
}
|
||||
}
|
||||
|
||||
type blockBaseQuerier struct {
|
||||
index IndexReader
|
||||
chunks ChunkReader
|
||||
|
@ -180,48 +164,6 @@ func (q *blockChunkQuerier) Select(sortSeries bool, hints *storage.SelectHints,
|
|||
return newBlockChunkSeriesSet(q.index, q.chunks, q.tombstones, p, mint, maxt)
|
||||
}
|
||||
|
||||
func findSetMatches(pattern string) []string {
|
||||
// Return empty matches if the wrapper from Prometheus is missing.
|
||||
if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
|
||||
return nil
|
||||
}
|
||||
escaped := false
|
||||
sets := []*strings.Builder{{}}
|
||||
for i := 4; i < len(pattern)-2; i++ {
|
||||
if escaped {
|
||||
switch {
|
||||
case isRegexMetaCharacter(pattern[i]):
|
||||
sets[len(sets)-1].WriteByte(pattern[i])
|
||||
case pattern[i] == '\\':
|
||||
sets[len(sets)-1].WriteByte('\\')
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
escaped = false
|
||||
} else {
|
||||
switch {
|
||||
case isRegexMetaCharacter(pattern[i]):
|
||||
if pattern[i] == '|' {
|
||||
sets = append(sets, &strings.Builder{})
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
case pattern[i] == '\\':
|
||||
escaped = true
|
||||
default:
|
||||
sets[len(sets)-1].WriteByte(pattern[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
matches := make([]string, 0, len(sets))
|
||||
for _, s := range sets {
|
||||
if s.Len() > 0 {
|
||||
matches = append(matches, s.String())
|
||||
}
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
// PostingsForMatchers assembles a single postings iterator against the index reader
|
||||
// based on the given matchers. The resulting postings are not ordered by series.
|
||||
func PostingsForMatchers(ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
|
||||
|
@ -316,7 +258,7 @@ func postingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Postings, erro
|
|||
|
||||
// Fast-path for set matching.
|
||||
if m.Type == labels.MatchRegexp {
|
||||
setMatches := findSetMatches(m.GetRegexString())
|
||||
setMatches := m.SetMatches()
|
||||
if len(setMatches) > 0 {
|
||||
sort.Strings(setMatches)
|
||||
return ix.Postings(m.Name, setMatches...)
|
||||
|
@ -612,6 +554,7 @@ func (p *populateWithDelGenericSeriesIterator) Err() error { return p.err }
|
|||
func (p *populateWithDelGenericSeriesIterator) toSeriesIterator() chunkenc.Iterator {
|
||||
return &populateWithDelSeriesIterator{populateWithDelGenericSeriesIterator: p}
|
||||
}
|
||||
|
||||
func (p *populateWithDelGenericSeriesIterator) toChunkSeriesIterator() chunks.Iterator {
|
||||
return &populateWithDelChunkSeriesIterator{populateWithDelGenericSeriesIterator: p}
|
||||
}
|
||||
|
@ -881,7 +824,6 @@ Outer:
|
|||
|
||||
if ts <= tr.Maxt {
|
||||
return true
|
||||
|
||||
}
|
||||
it.Intervals = it.Intervals[1:]
|
||||
}
|
||||
|
|
|
@ -108,7 +108,9 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
|||
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix)
|
||||
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
|
||||
iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$")
|
||||
|
||||
jFooBar := labels.MustNewMatcher(labels.MatchRegexp, "j", "foo|bar")
|
||||
iCharSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "1[0-9]")
|
||||
iAlternate := labels.MustNewMatcher(labels.MatchRegexp, "i", "(1|2|3|4|5|6|20|55)")
|
||||
cases := []struct {
|
||||
name string
|
||||
matchers []*labels.Matcher
|
||||
|
@ -117,6 +119,9 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
|||
{`n="1",j="foo"`, []*labels.Matcher{n1, jFoo}},
|
||||
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
|
||||
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
|
||||
{`i=~"1[0-9]",j=~"foo|bar"`, []*labels.Matcher{iCharSet, jFooBar}},
|
||||
{`j=~"foo|bar"`, []*labels.Matcher{jFooBar}},
|
||||
{`i=~"(1|2|3|4|5|6|20|55)"`, []*labels.Matcher{iAlternate}},
|
||||
{`i=~".*"`, []*labels.Matcher{iStar}},
|
||||
{`i=~"1.*"`, []*labels.Matcher{i1Star}},
|
||||
{`i=~".*1"`, []*labels.Matcher{iStar1}},
|
||||
|
|
|
@ -918,7 +918,7 @@ func TestPopulateWithDelSeriesIterator_NextWithMinTime(t *testing.T) {
|
|||
// The subset are all equivalent so this does not capture merging of partial or non-overlapping sets well.
|
||||
// TODO(bwplotka): Merge with storage merged series set benchmark.
|
||||
func BenchmarkMergedSeriesSet(b *testing.B) {
|
||||
var sel = func(sets []storage.SeriesSet) storage.SeriesSet {
|
||||
sel := func(sets []storage.SeriesSet) storage.SeriesSet {
|
||||
return storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
|
||||
}
|
||||
|
||||
|
@ -1560,69 +1560,6 @@ func BenchmarkSetMatcher(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
||||
func TestFindSetMatches(t *testing.T) {
|
||||
cases := []struct {
|
||||
pattern string
|
||||
exp []string
|
||||
}{
|
||||
// Single value, coming from a `bar=~"foo"` selector.
|
||||
{
|
||||
pattern: "^(?:foo)$",
|
||||
exp: []string{
|
||||
"foo",
|
||||
},
|
||||
},
|
||||
// Simple sets.
|
||||
{
|
||||
pattern: "^(?:foo|bar|baz)$",
|
||||
exp: []string{
|
||||
"foo",
|
||||
"bar",
|
||||
"baz",
|
||||
},
|
||||
},
|
||||
// Simple sets containing escaped characters.
|
||||
{
|
||||
pattern: "^(?:fo\\.o|bar\\?|\\^baz)$",
|
||||
exp: []string{
|
||||
"fo.o",
|
||||
"bar?",
|
||||
"^baz",
|
||||
},
|
||||
},
|
||||
// Simple sets containing special characters without escaping.
|
||||
{
|
||||
pattern: "^(?:fo.o|bar?|^baz)$",
|
||||
exp: nil,
|
||||
},
|
||||
// Missing wrapper.
|
||||
{
|
||||
pattern: "foo|bar|baz",
|
||||
exp: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
matches := findSetMatches(c.pattern)
|
||||
if len(c.exp) == 0 {
|
||||
if len(matches) != 0 {
|
||||
t.Errorf("Evaluating %s, unexpected result %v", c.pattern, matches)
|
||||
}
|
||||
} else {
|
||||
if len(matches) != len(c.exp) {
|
||||
t.Errorf("Evaluating %s, length of result not equal to exp", c.pattern)
|
||||
} else {
|
||||
for i := 0; i < len(c.exp); i++ {
|
||||
if c.exp[i] != matches[i] {
|
||||
t.Errorf("Evaluating %s, unexpected result %s", c.pattern, matches[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPostingsForMatchers(t *testing.T) {
|
||||
chunkDir, err := ioutil.TempDir("", "chunk_dir")
|
||||
require.NoError(t, err)
|
||||
|
@ -1881,7 +1818,6 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
t.Errorf("Evaluating %v, missing results %+v", c.matchers, exp)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TestClose ensures that calling Close more than once doesn't block and doesn't panic.
|
||||
|
@ -2106,7 +2042,7 @@ func TestPostingsForMatcher(t *testing.T) {
|
|||
{
|
||||
// Test case for double quoted regex matcher
|
||||
matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", "^(?:a|b)$"),
|
||||
hasError: true,
|
||||
hasError: false,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -2141,7 +2077,12 @@ func TestBlockBaseSeriesSet(t *testing.T) {
|
|||
{
|
||||
lset: labels.New([]labels.Label{{Name: "a", Value: "a"}}...),
|
||||
chunks: []chunks.Meta{
|
||||
{Ref: 29}, {Ref: 45}, {Ref: 245}, {Ref: 123}, {Ref: 4232}, {Ref: 5344},
|
||||
{Ref: 29},
|
||||
{Ref: 45},
|
||||
{Ref: 245},
|
||||
{Ref: 123},
|
||||
{Ref: 4232},
|
||||
{Ref: 5344},
|
||||
{Ref: 121},
|
||||
},
|
||||
ref: 12,
|
||||
|
|
Loading…
Reference in a new issue