Simplify mergedPostings.Seek (#595)

The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.

Adjust benchmark to catch this.

Remove flattening of without lists, not needed anymore.

Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark                                                            old ns/op      new ns/op      delta
BenchmarkHeadPostingForMatchers/n="1"-8                              189907976      188863880      -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8                      113950106      110791414      -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8                      104965646      102388760      -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8                     138743592      104424250      -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8                            5279594954     5206096267     -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8                            8004610589     6184527719     -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8                              2476042646     1003920432     -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8                              7178244655     6059725323     -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8              199342649      166642946      -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8       215774683      167515095      -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8                        2214714769     392943663      -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8                2148727410     322289262      -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8              2170658009     338458171      -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8             235720135      70597905       -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8       2190570590     343034307      -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8     2373784439     387297908      -83.68%

benchmark                                                            old allocs     new allocs     delta
BenchmarkHeadPostingForMatchers/n="1"-8                              33             33             +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8                      33             33             +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8                      33             33             +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8                     41             39             -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8                            56             56             +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8                            251577         100115         -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8                              251123         100077         -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8                              251525         100112         -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8              42             39             -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8       52             42             -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8                        251069         100101         -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8                251473         100101         -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8              250914         100102         -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8             30038          11181          -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8       250813         100105         -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8     281503         111260         -60.48%

benchmark                                                            old bytes     new bytes     delta
BenchmarkHeadPostingForMatchers/n="1"-8                              10887600      10887600      +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8                      5456416       5456416       +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8                      5456416       5456416       +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8                     5456640       5456544       -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8                            258254504     258254472     -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8                            520126192     281554792     -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8                              263446640     24908456      -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8                              520121144     281553664     -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8              7062448       7062272       -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8       7063473       7062384       -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8                        274325656     35793776      -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8                268926824     30362624      -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8              268882992     30363000      -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8             33193401      4269304       -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8       268875024     30363096      -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8     300589656     33099784      -88.99%
```

Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
This commit is contained in:
Brian Brazil 2019-05-13 10:51:07 +01:00 committed by GitHub
parent 6ac81cc7a9
commit 89a90fe96c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 48 deletions

View file

@ -56,20 +56,21 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
testutil.Ok(b, h.Close())
}()
var hash uint64
var ref uint64
addSeries := func(l labels.Labels) {
ref++
h.getOrCreateWithID(ref, l.Hash(), l)
}
for n := 0; n < 10; n++ {
for i := 0; i < 100000; i++ {
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "foo"))
hash++
addSeries(labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(n), "j", "foo"))
// Have some series that won't be matched, to properly test inverted matches.
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(i), "j", "bar"))
hash++
addSeries(labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(n), "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(n), "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(n), "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(n), "j", "foo"))
}
}
@ -100,6 +101,7 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
{`i!=""`, []labels.Matcher{iNotEmpty}},
{`n="1",i=~".*",j="foo"`, []labels.Matcher{n1, iStar, jFoo}},
{`n="1",i=~".*",i!="2",j="foo"`, []labels.Matcher{n1, iStar, iNot2, jFoo}},
{`n="1",i!=""`, []labels.Matcher{n1, iNotEmpty}},
{`n="1",i!="",j="foo"`, []labels.Matcher{n1, iNotEmpty, jFoo}},
{`n="1",i=~".+",j="foo"`, []labels.Matcher{n1, iPlus, jFoo}},
{`n="1",i=~"1.+",j="foo"`, []labels.Matcher{n1, i1Plus, jFoo}},

View file

@ -404,7 +404,6 @@ func (h *postingsHeap) Pop() interface{} {
type mergedPostings struct {
h postingsHeap
initilized bool
heaped bool
cur uint64
err error
}
@ -434,12 +433,9 @@ func (it *mergedPostings) Next() bool {
return false
}
if !it.heaped {
heap.Init(&it.h)
it.heaped = true
}
// The user must issue an initial Next.
if !it.initilized {
heap.Init(&it.h)
it.cur = it.h[0].At()
it.initilized = true
return true
@ -477,33 +473,24 @@ func (it *mergedPostings) Seek(id uint64) bool {
return false
}
}
if it.cur >= id {
return true
}
// Heapifying when there is lots of Seeks is inefficient,
// mark to be re-heapified on the Next() call.
it.heaped = false
lowest := ^uint64(0)
n := 0
for _, i := range it.h {
if i.Seek(id) {
it.h[n] = i
n++
if i.At() < lowest {
lowest = i.At()
}
} else {
if i.Err() != nil {
it.err = i.Err()
for it.cur < id {
cur := it.h[0]
if !cur.Seek(id) {
heap.Pop(&it.h)
if cur.Err() != nil {
it.err = cur.Err()
return false
}
if it.h.Len() == 0 {
return false
}
} else {
// Value of top of heap has changed, re-heapify.
heap.Fix(&it.h, 0)
}
it.cur = it.h[0].At()
}
it.h = it.h[:n]
if len(it.h) == 0 {
return false
}
it.cur = lowest
return true
}

View file

@ -332,15 +332,6 @@ func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings,
it := index.Intersect(its...)
for _, n := range notIts {
if _, ok := n.(*index.ListPostings); !ok {
// Best to pre-calculate the merged lists via next rather than have a ton
// of seeks in Without.
pl, err := index.ExpandPostings(n)
if err != nil {
return nil, err
}
n = index.NewListPostings(pl)
}
it = index.Without(it, n)
}