Be smarter in how we look at matchers. (#572)

* Add unittests for PostingsForMatcher.

* Selector methods are all stateless, don't need a reference.

* Be smarter in how we look at matchers.

Look at all matchers to see if a label can be empty.

Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.

All all the Withouts together, rather than
having to subtract each from all postings.

Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.

The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.

benchmark                                                            old ns/op     new ns/op     delta
BenchmarkHeadPostingForMatchers/n="1"-4                              5888          6160          +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4                      7190          6640          -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4                      6038          5923          -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4                     6030884       4850525       -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4                            887377940     230329137     -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4                            490316101     319931758     -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4                              594961991     130279313     -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4                              537542388     318751015     -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4              10460243      8565195       -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4       44964267      8561546       -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4                42244885      29137737      -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4              35285834      32774584      -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4             8951047       8379024       -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4       63813335      30672688      -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4     45381112      44924397      -1.01%

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2019-04-09 11:59:45 +01:00 committed by GitHub
parent 7ab060c864
commit 259847a6b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 318 additions and 56 deletions

View file

@ -7,6 +7,8 @@
- [BUGFIX] Fsync the meta file to persist it on disk to avoid data loss in case of a host crash.
- [BUGFIX] Fix fd and vm_area leak on error path in chunks.NewDirReader.
- [BUGFIX] Fix fd and vm_area leak on error path in index.NewFileReader.
- [ENHANCEMENT] Be smarter in how we look at matchers.
- [ENHANCEMENT] PostListings and NotMatcher now public.
## 0.6.1
- [BUGFIX] Update `last` after appending a non-overlapping chunk in `chunks.MergeOverlappingChunks`. [#539](https://github.com/prometheus/tsdb/pull/539)

View file

@ -49,9 +49,7 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) {
})
}
// TODO: generalize benchmark and pass all postings for matchers here
func BenchmarkHeadPostingForMatchers(b *testing.B) {
// Put a series, select it. GC it and then access it.
h, err := NewHead(nil, nil, nil, 1000)
testutil.Ok(b, err)
defer func() {
@ -66,6 +64,12 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
// Have some series that won't be matched, to properly test inverted matches.
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(i), "j", "bar"))
hash++
}
}

View file

@ -600,8 +600,8 @@ func (rp *removedPostings) Err() error {
return rp.remove.Err()
}
// listPostings implements the Postings interface over a plain list.
type listPostings struct {
// ListPostings implements the Postings interface over a plain list.
type ListPostings struct {
list []uint64
cur uint64
}
@ -610,15 +610,15 @@ func NewListPostings(list []uint64) Postings {
return newListPostings(list...)
}
func newListPostings(list ...uint64) *listPostings {
return &listPostings{list: list}
func newListPostings(list ...uint64) *ListPostings {
return &ListPostings{list: list}
}
func (it *listPostings) At() uint64 {
func (it *ListPostings) At() uint64 {
return it.cur
}
func (it *listPostings) Next() bool {
func (it *ListPostings) Next() bool {
if len(it.list) > 0 {
it.cur = it.list[0]
it.list = it.list[1:]
@ -628,7 +628,7 @@ func (it *listPostings) Next() bool {
return false
}
func (it *listPostings) Seek(x uint64) bool {
func (it *ListPostings) Seek(x uint64) bool {
// If the current value satisfies, then return.
if it.cur >= x {
return true
@ -650,7 +650,7 @@ func (it *listPostings) Seek(x uint64) bool {
return false
}
func (it *listPostings) Err() error {
func (it *ListPostings) Err() error {
return nil
}

View file

@ -14,6 +14,7 @@
package labels
import (
"fmt"
"regexp"
)
@ -36,6 +37,8 @@ type Matcher interface {
Name() string
// Matches checks whether a value fulfills the constraints.
Matches(v string) bool
// String returns a human readable matcher.
String() string
}
// EqualMatcher matches on equality.
@ -44,13 +47,16 @@ type EqualMatcher struct {
}
// Name implements Matcher interface.
func (m *EqualMatcher) Name() string { return m.name }
func (m EqualMatcher) Name() string { return m.name }
// Matches implements Matcher interface.
func (m *EqualMatcher) Matches(v string) bool { return v == m.value }
func (m EqualMatcher) Matches(v string) bool { return v == m.value }
// String implements Matcher interface.
func (m EqualMatcher) String() string { return fmt.Sprintf("%s=%q", m.name, m.value) }
// Value returns the matched value.
func (m *EqualMatcher) Value() string { return m.value }
func (m EqualMatcher) Value() string { return m.value }
// NewEqualMatcher returns a new matcher matching an exact label value.
func NewEqualMatcher(name, value string) Matcher {
@ -62,8 +68,9 @@ type regexpMatcher struct {
re *regexp.Regexp
}
func (m *regexpMatcher) Name() string { return m.name }
func (m *regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m regexpMatcher) Name() string { return m.name }
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
// NewRegexpMatcher returns a new matcher verifying that a value matches
// the regular expression pattern.
@ -87,14 +94,15 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {
}
// notMatcher inverts the matching result for a matcher.
type notMatcher struct {
// NotMatcher inverts the matching result for a matcher.
type NotMatcher struct {
Matcher
}
func (m *notMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
func (m NotMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
func (m NotMatcher) String() string { return fmt.Sprintf("not(%s)", m.Matcher.String()) }
// Not inverts the matcher's matching result.
func Not(m Matcher) Matcher {
return &notMatcher{m}
return &NotMatcher{m}
}

View file

@ -262,37 +262,92 @@ func (q *blockQuerier) Close() error {
}
// PostingsForMatchers assembles a single postings iterator against the index reader
// based on the given matchers. It returns a list of label names that must be manually
// checked to not exist in series the postings list points to.
// It returns EmptyPostings() if it can be determined beforehand that no results will be found.
// based on the given matchers.
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
var its []index.Postings
var its, notIts []index.Postings
// See which label must be non-empty.
labelMustBeSet := make(map[string]bool, len(ms))
for _, m := range ms {
if !m.Matches("") {
labelMustBeSet[m.Name()] = true
}
}
for _, m := range ms {
matchesEmpty := m.Matches("")
if labelMustBeSet[m.Name()] || !matchesEmpty {
// If this matcher must be non-empty, we can be smarter.
nm, isNot := m.(*labels.NotMatcher)
if isNot && matchesEmpty { // l!="foo"
// If the label can't be empty and is a Not and the inner matcher
// doesn't match empty, then subtract it out at the end.
it, err := postingsForMatcher(ix, nm.Matcher)
if err != nil {
return nil, err
}
notIts = append(notIts, it)
} else if isNot && !matchesEmpty { // l!=""
// If the label can't be empty and is a Not, but the inner matcher can
// be empty we need to use inversePostingsForMatcher.
it, err := inversePostingsForMatcher(ix, nm.Matcher)
if err != nil {
return nil, err
}
its = append(its, it)
} else { // l="a"
// Non-Not matcher, use normal postingsForMatcher.
it, err := postingsForMatcher(ix, m)
if err != nil {
return nil, err
}
its = append(its, it)
}
return ix.SortedPostings(index.Intersect(its...)), nil
}
func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
if m.Matches("") {
return postingsForUnsetLabelMatcher(ix, m)
}
// Fast-path for equal matching.
if em, ok := m.(*labels.EqualMatcher); ok {
it, err := ix.Postings(em.Name(), em.Value())
} else { // l=""
// If the matchers for a labelname selects an empty value, it selects all
// the series which don't have the label name set too. See:
// https://github.com/prometheus/prometheus/issues/3575 and
// https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
it, err := inversePostingsForMatcher(ix, m)
if err != nil {
return nil, err
}
return it, nil
notIts = append(notIts, it)
}
}
// If there's nothing to subtract from, add in everything and remove the notIts later.
if len(its) == 0 && len(notIts) != 0 {
allPostings, err := ix.Postings(index.AllPostingsKey())
if err != nil {
return nil, err
}
its = append(its, allPostings)
}
it := index.Intersect(its...)
for _, n := range notIts {
if _, ok := n.(*index.ListPostings); !ok {
// Best to pre-calculate the merged lists via next rather than have a ton
// of seeks in Without.
pl, err := index.ExpandPostings(n)
if err != nil {
return nil, err
}
n = index.NewListPostings(pl)
}
it = index.Without(it, n)
}
return ix.SortedPostings(it), nil
}
func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
// This method will not return postings for missing labels.
// Fast-path for equal matching.
if em, ok := m.(*labels.EqualMatcher); ok {
return ix.Postings(em.Name(), em.Value())
}
tpls, err := ix.LabelValues(m.Name())
@ -328,7 +383,8 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
return index.Merge(rit...), nil
}
func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
// inversePostingsForMatcher eeturns the postings for the series with the label name set but not matching the matcher.
func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
tpls, err := ix.LabelValues(m.Name())
if err != nil {
return nil, err
@ -356,23 +412,7 @@ func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Posti
rit = append(rit, it)
}
merged := index.Merge(rit...)
// With many many postings, it's best to pre-calculate
// the merged list via next rather than have a ton of seeks
// in Without/Intersection.
if len(rit) > 100 {
pl, err := index.ExpandPostings(merged)
if err != nil {
return nil, err
}
merged = index.NewListPostings(pl)
}
allPostings, err := ix.Postings(index.AllPostingsKey())
if err != nil {
return nil, err
}
return index.Without(allPostings, merged), nil
return index.Merge(rit...), nil
}
func mergeStrings(a, b []string) []string {

View file

@ -1366,7 +1366,7 @@ func (m mockIndex) Symbols() (map[string]struct{}, error) {
return m.symbols, nil
}
func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error {
func (m *mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error {
if _, ok := m.series[ref]; ok {
return errors.Errorf("series with reference %d already added", ref)
}
@ -1691,3 +1691,211 @@ func BenchmarkQuerySeek(b *testing.B) {
}
}
}
func TestPostingsForMatchers(t *testing.T) {
h, err := NewHead(nil, nil, nil, 1000)
testutil.Ok(t, err)
defer func() {
testutil.Ok(t, h.Close())
}()
app := h.Appender()
app.Add(labels.FromStrings("n", "1"), 0, 0)
app.Add(labels.FromStrings("n", "1", "i", "a"), 0, 0)
app.Add(labels.FromStrings("n", "1", "i", "b"), 0, 0)
app.Add(labels.FromStrings("n", "2"), 0, 0)
testutil.Ok(t, app.Commit())
cases := []struct {
matchers []labels.Matcher
exp []labels.Labels
}{
// Simple equals.
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewEqualMatcher("i", "a")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewEqualMatcher("i", "missing")},
exp: []labels.Labels{},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("missing", "")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
labels.FromStrings("n", "2"),
},
},
// Not equals.
{
matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("n", "1"))},
exp: []labels.Labels{
labels.FromStrings("n", "2"),
},
},
{
matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("i", ""))},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("missing", ""))},
exp: []labels.Labels{},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "a"))},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", ""))},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
// Regex.
{
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^1$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^a$")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^a?$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
},
},
{
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "2"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^.*$")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.NewMustRegexpMatcher("i", "^.+$")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
// Not regex.
{
matchers: []labels.Matcher{labels.Not(labels.NewMustRegexpMatcher("n", "^1$"))},
exp: []labels.Labels{
labels.FromStrings("n", "2"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^a$"))},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^a?$"))},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^$"))},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^.*$"))},
exp: []labels.Labels{},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewMustRegexpMatcher("i", "^.+$"))},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
},
},
// Combinations.
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "")), labels.NewEqualMatcher("i", "a")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
},
},
{
matchers: []labels.Matcher{labels.NewEqualMatcher("n", "1"), labels.Not(labels.NewEqualMatcher("i", "b")), labels.NewMustRegexpMatcher("i", "^(b|a).*$")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
},
},
}
ir, err := h.Index()
testutil.Ok(t, err)
for _, c := range cases {
exp := map[string]struct{}{}
for _, l := range c.exp {
exp[l.String()] = struct{}{}
}
p, err := PostingsForMatchers(ir, c.matchers...)
testutil.Ok(t, err)
for p.Next() {
lbls := labels.Labels{}
ir.Series(p.At(), &lbls, &[]chunks.Meta{})
if _, ok := exp[lbls.String()]; !ok {
t.Errorf("Evaluating %v, unexpected result %s", c.matchers, lbls.String())
} else {
delete(exp, lbls.String())
}
}
testutil.Ok(t, p.Err())
if len(exp) != 0 {
t.Errorf("Evaluating %v, missing results %+v", c.matchers, exp)
}
}
}