From 259847a6b1326f0a9a2382a045ca8c40b03f5965 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Tue, 9 Apr 2019 11:59:45 +0100 Subject: [PATCH] Be smarter in how we look at matchers. (#572) * Add unittests for PostingsForMatcher. * Selector methods are all stateless, don't need a reference. * Be smarter in how we look at matchers. Look at all matchers to see if a label can be empty. Optimise Not handling, so i!="2" is a simple lookup rather than an inverse postings list. All all the Withouts together, rather than having to subtract each from all postings. Change the pre-expand the postings logic to always do it before doing a Without only. Don't do that if it's already a list. The initial goal here was that the oft-seen pattern i=~"something.+",i!="foo",i!="bar" becomes more efficient. benchmark old ns/op new ns/op delta BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62% BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65% BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90% BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57% BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04% BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75% BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10% BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70% BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12% BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96% BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03% BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12% BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39% BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93% BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01% Signed-off-by: Brian Brazil --- CHANGELOG.md | 2 + head_bench_test.go | 8 +- index/postings.go | 16 ++-- labels/selector.go | 26 ++++-- querier.go | 112 ++++++++++++++++-------- querier_test.go | 210 ++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 318 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3c8a62fb..aa8949865 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ - [BUGFIX] Fsync the meta file to persist it on disk to avoid data loss in case of a host crash. - [BUGFIX] Fix fd and vm_area leak on error path in chunks.NewDirReader. - [BUGFIX] Fix fd and vm_area leak on error path in index.NewFileReader. + - [ENHANCEMENT] Be smarter in how we look at matchers. + - [ENHANCEMENT] PostListings and NotMatcher now public. ## 0.6.1 - [BUGFIX] Update `last` after appending a non-overlapping chunk in `chunks.MergeOverlappingChunks`. [#539](https://github.com/prometheus/tsdb/pull/539) diff --git a/head_bench_test.go b/head_bench_test.go index ebae304d7..68505ea2c 100644 --- a/head_bench_test.go +++ b/head_bench_test.go @@ -49,9 +49,7 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) { }) } -// TODO: generalize benchmark and pass all postings for matchers here func BenchmarkHeadPostingForMatchers(b *testing.B) { - // Put a series, select it. GC it and then access it. h, err := NewHead(nil, nil, nil, 1000) testutil.Ok(b, err) defer func() { @@ -66,6 +64,12 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) { // Have some series that won't be matched, to properly test inverted matches. h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "bar")) hash++ + h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(i), "j", "bar")) + hash++ + h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(i), "j", "bar")) + hash++ + h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(i), "j", "bar")) + hash++ } } diff --git a/index/postings.go b/index/postings.go index 9270c6818..84cda2b21 100644 --- a/index/postings.go +++ b/index/postings.go @@ -600,8 +600,8 @@ func (rp *removedPostings) Err() error { return rp.remove.Err() } -// listPostings implements the Postings interface over a plain list. -type listPostings struct { +// ListPostings implements the Postings interface over a plain list. +type ListPostings struct { list []uint64 cur uint64 } @@ -610,15 +610,15 @@ func NewListPostings(list []uint64) Postings { return newListPostings(list...) } -func newListPostings(list ...uint64) *listPostings { - return &listPostings{list: list} +func newListPostings(list ...uint64) *ListPostings { + return &ListPostings{list: list} } -func (it *listPostings) At() uint64 { +func (it *ListPostings) At() uint64 { return it.cur } -func (it *listPostings) Next() bool { +func (it *ListPostings) Next() bool { if len(it.list) > 0 { it.cur = it.list[0] it.list = it.list[1:] @@ -628,7 +628,7 @@ func (it *listPostings) Next() bool { return false } -func (it *listPostings) Seek(x uint64) bool { +func (it *ListPostings) Seek(x uint64) bool { // If the current value satisfies, then return. if it.cur >= x { return true @@ -650,7 +650,7 @@ func (it *listPostings) Seek(x uint64) bool { return false } -func (it *listPostings) Err() error { +func (it *ListPostings) Err() error { return nil } diff --git a/labels/selector.go b/labels/selector.go index bcc5b51e6..a0565f57e 100644 --- a/labels/selector.go +++ b/labels/selector.go @@ -14,6 +14,7 @@ package labels import ( + "fmt" "regexp" ) @@ -36,6 +37,8 @@ type Matcher interface { Name() string // Matches checks whether a value fulfills the constraints. Matches(v string) bool + // String returns a human readable matcher. + String() string } // EqualMatcher matches on equality. @@ -44,13 +47,16 @@ type EqualMatcher struct { } // Name implements Matcher interface. -func (m *EqualMatcher) Name() string { return m.name } +func (m EqualMatcher) Name() string { return m.name } // Matches implements Matcher interface. -func (m *EqualMatcher) Matches(v string) bool { return v == m.value } +func (m EqualMatcher) Matches(v string) bool { return v == m.value } + +// String implements Matcher interface. +func (m EqualMatcher) String() string { return fmt.Sprintf("%s=%q", m.name, m.value) } // Value returns the matched value. -func (m *EqualMatcher) Value() string { return m.value } +func (m EqualMatcher) Value() string { return m.value } // NewEqualMatcher returns a new matcher matching an exact label value. func NewEqualMatcher(name, value string) Matcher { @@ -62,8 +68,9 @@ type regexpMatcher struct { re *regexp.Regexp } -func (m *regexpMatcher) Name() string { return m.name } -func (m *regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) } +func (m regexpMatcher) Name() string { return m.name } +func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) } +func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) } // NewRegexpMatcher returns a new matcher verifying that a value matches // the regular expression pattern. @@ -87,14 +94,15 @@ func NewMustRegexpMatcher(name, pattern string) Matcher { } -// notMatcher inverts the matching result for a matcher. -type notMatcher struct { +// NotMatcher inverts the matching result for a matcher. +type NotMatcher struct { Matcher } -func (m *notMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) } +func (m NotMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) } +func (m NotMatcher) String() string { return fmt.Sprintf("not(%s)", m.Matcher.String()) } // Not inverts the matcher's matching result. func Not(m Matcher) Matcher { - return ¬Matcher{m} + return &NotMatcher{m} } diff --git a/querier.go b/querier.go index 71910f18c..3ec2ff3dc 100644 --- a/querier.go +++ b/querier.go @@ -262,37 +262,92 @@ func (q *blockQuerier) Close() error { } // PostingsForMatchers assembles a single postings iterator against the index reader -// based on the given matchers. It returns a list of label names that must be manually -// checked to not exist in series the postings list points to. -// It returns EmptyPostings() if it can be determined beforehand that no results will be found. +// based on the given matchers. func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) { - var its []index.Postings + var its, notIts []index.Postings + // See which label must be non-empty. + labelMustBeSet := make(map[string]bool, len(ms)) + for _, m := range ms { + if !m.Matches("") { + labelMustBeSet[m.Name()] = true + } + } for _, m := range ms { - it, err := postingsForMatcher(ix, m) + matchesEmpty := m.Matches("") + if labelMustBeSet[m.Name()] || !matchesEmpty { + // If this matcher must be non-empty, we can be smarter. + nm, isNot := m.(*labels.NotMatcher) + if isNot && matchesEmpty { // l!="foo" + // If the label can't be empty and is a Not and the inner matcher + // doesn't match empty, then subtract it out at the end. + it, err := postingsForMatcher(ix, nm.Matcher) + if err != nil { + return nil, err + } + notIts = append(notIts, it) + } else if isNot && !matchesEmpty { // l!="" + // If the label can't be empty and is a Not, but the inner matcher can + // be empty we need to use inversePostingsForMatcher. + it, err := inversePostingsForMatcher(ix, nm.Matcher) + if err != nil { + return nil, err + } + its = append(its, it) + } else { // l="a" + // Non-Not matcher, use normal postingsForMatcher. + it, err := postingsForMatcher(ix, m) + if err != nil { + return nil, err + } + its = append(its, it) + } + } else { // l="" + // If the matchers for a labelname selects an empty value, it selects all + // the series which don't have the label name set too. See: + // https://github.com/prometheus/prometheus/issues/3575 and + // https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555 + it, err := inversePostingsForMatcher(ix, m) + if err != nil { + return nil, err + } + notIts = append(notIts, it) + } + } + + // If there's nothing to subtract from, add in everything and remove the notIts later. + if len(its) == 0 && len(notIts) != 0 { + allPostings, err := ix.Postings(index.AllPostingsKey()) if err != nil { return nil, err } - its = append(its, it) + its = append(its, allPostings) } - return ix.SortedPostings(index.Intersect(its...)), nil + + it := index.Intersect(its...) + + for _, n := range notIts { + if _, ok := n.(*index.ListPostings); !ok { + // Best to pre-calculate the merged lists via next rather than have a ton + // of seeks in Without. + pl, err := index.ExpandPostings(n) + if err != nil { + return nil, err + } + n = index.NewListPostings(pl) + } + it = index.Without(it, n) + } + + return ix.SortedPostings(it), nil } func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) { - // If the matcher selects an empty value, it selects all the series which don't - // have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575 - // and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555 - if m.Matches("") { - return postingsForUnsetLabelMatcher(ix, m) - } + // This method will not return postings for missing labels. // Fast-path for equal matching. if em, ok := m.(*labels.EqualMatcher); ok { - it, err := ix.Postings(em.Name(), em.Value()) - if err != nil { - return nil, err - } - return it, nil + return ix.Postings(em.Name(), em.Value()) } tpls, err := ix.LabelValues(m.Name()) @@ -328,7 +383,8 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error return index.Merge(rit...), nil } -func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) { +// inversePostingsForMatcher eeturns the postings for the series with the label name set but not matching the matcher. +func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) { tpls, err := ix.LabelValues(m.Name()) if err != nil { return nil, err @@ -356,23 +412,7 @@ func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Posti rit = append(rit, it) } - merged := index.Merge(rit...) - // With many many postings, it's best to pre-calculate - // the merged list via next rather than have a ton of seeks - // in Without/Intersection. - if len(rit) > 100 { - pl, err := index.ExpandPostings(merged) - if err != nil { - return nil, err - } - merged = index.NewListPostings(pl) - } - - allPostings, err := ix.Postings(index.AllPostingsKey()) - if err != nil { - return nil, err - } - return index.Without(allPostings, merged), nil + return index.Merge(rit...), nil } func mergeStrings(a, b []string) []string { diff --git a/querier_test.go b/querier_test.go index 0ce114d9b..a9bb0086c 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1366,7 +1366,7 @@ func (m mockIndex) Symbols() (map[string]struct{}, error) { return m.symbols, nil } -func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error { +func (m *mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error { if _, ok := m.series[ref]; ok { return errors.Errorf("series with reference %d already added", ref) } @@ -1691,3 +1691,211 @@ func BenchmarkQuerySeek(b *testing.B) { } } } + +func TestPostingsForMatchers(t *testing.T) { + h, err := NewHead(nil, nil, nil, 1000) + testutil.Ok(t, err) + defer func() { + testutil.Ok(t, h.Close()) + }() + + app := h.Appender() + app.Add(labels.FromStrings("n", "1"), 0, 0) + app.Add(labels.FromStrings("n", "1", "i", "a"), 0, 0) + app.Add(labels.FromStrings("n", "1", "i", "b"), 0, 0) + app.Add(labels.FromStrings("n", "2"), 0, 0) + testutil.Ok(t, app.Commit()) + + cases := []struct { + matchers []labels.Matcher + exp []labels.Labels + }{ + // Simple equals. + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewEqualMatcher("i", "a")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewEqualMatcher("i", "missing")}, + exp: []labels.Labels{}, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("missing", "")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "2"), + }, + }, + // Not equals. + { + matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("n", "1"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "2"), + }, + }, + { + matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("i", ""))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("missing", ""))}, + exp: []labels.Labels{}, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "a"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", ""))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + // Regex. + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^1$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^a$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^a?$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + }, + }, + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "2"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^.*$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^.+$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + // Not regex. + { + matchers: []labels.Matcher{labels.Not(labels.NewMustRegexpMatcher("n", "^1$"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "2"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^a$"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^a?$"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^$"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^.*$"))}, + exp: []labels.Labels{}, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^.+$"))}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + }, + }, + // Combinations. + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "")), labels.NewEqualMatcher("i", "a")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + { + matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "b")), labels.NewMustRegexpMatcher("i", "^(b|a).*$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + } + + ir, err := h.Index() + testutil.Ok(t, err) + + for _, c := range cases { + exp := map[string]struct{}{} + for _, l := range c.exp { + exp[l.String()] = struct{}{} + } + p, err := PostingsForMatchers(ir, c.matchers...) + testutil.Ok(t, err) + + for p.Next() { + lbls := labels.Labels{} + ir.Series(p.At(), &lbls, &[]chunks.Meta{}) + if _, ok := exp[lbls.String()]; !ok { + t.Errorf("Evaluating %v, unexpected result %s", c.matchers, lbls.String()) + } else { + delete(exp, lbls.String()) + } + } + testutil.Ok(t, p.Err()) + if len(exp) != 0 { + t.Errorf("Evaluating %v, missing results %+v", c.matchers, exp) + } + } + +}