Improvement on postings intersection (#616)

* improvement on postings intersection

Signed-off-by: naivewong <867245430@qq.com>
This commit is contained in:
naivewong 2019-06-11 16:14:25 +08:00 committed by Krasi Georgiev
parent e809cb477d
commit 6ab483071a
2 changed files with 114 additions and 60 deletions

View file

@ -303,68 +303,68 @@ func Intersect(its ...Postings) Postings {
if len(its) == 1 {
return its[0]
}
l := len(its) / 2
a := Intersect(its[:l]...)
b := Intersect(its[l:]...)
if a == EmptyPostings() || b == EmptyPostings() {
for _, p := range its {
if p == EmptyPostings() {
return EmptyPostings()
}
return newIntersectPostings(a, b)
}
return newIntersectPostings(its...)
}
type intersectPostings struct {
a, b Postings
arr []Postings
cur uint64
}
func newIntersectPostings(a, b Postings) *intersectPostings {
return &intersectPostings{a: a, b: b}
func newIntersectPostings(its ...Postings) *intersectPostings {
return &intersectPostings{arr: its}
}
func (it *intersectPostings) At() uint64 {
return it.cur
}
func (it *intersectPostings) doNext(id uint64) bool {
func (it *intersectPostings) doNext() bool {
Loop:
for {
if !it.b.Seek(id) {
for _, p := range it.arr {
if !p.Seek(it.cur) {
return false
}
if vb := it.b.At(); vb != id {
if !it.a.Seek(vb) {
return false
}
id = it.a.At()
if vb != id {
continue
if p.At() > it.cur {
it.cur = p.At()
continue Loop
}
}
it.cur = id
return true
}
}
func (it *intersectPostings) Next() bool {
if !it.a.Next() {
for _, p := range it.arr {
if !p.Next() {
return false
}
return it.doNext(it.a.At())
if p.At() > it.cur {
it.cur = p.At()
}
}
return it.doNext()
}
func (it *intersectPostings) Seek(id uint64) bool {
if !it.a.Seek(id) {
return false
}
return it.doNext(it.a.At())
it.cur = id
return it.doNext()
}
func (it *intersectPostings) Err() error {
if it.a.Err() != nil {
return it.a.Err()
for _, p := range it.arr {
if p.Err() != nil {
return p.Err()
}
return it.b.Err()
}
return nil
}
// Merge returns a new iterator over the union of the input iterators.

View file

@ -221,6 +221,7 @@ func TestMultiIntersect(t *testing.T) {
}
func BenchmarkIntersect(t *testing.B) {
t.Run("LongPostings1", func(bench *testing.B) {
var a, b, c, d []uint64
for i := 0; i < 10000000; i += 2 {
@ -244,13 +245,66 @@ func BenchmarkIntersect(t *testing.B) {
i3 := newListPostings(c...)
i4 := newListPostings(d...)
t.ResetTimer()
for i := 0; i < t.N; i++ {
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil {
t.Fatal(err)
bench.Fatal(err)
}
}
})
t.Run("LongPostings2", func(bench *testing.B) {
var a, b, c, d []uint64
for i := 0; i < 12500000; i++ {
a = append(a, uint64(i))
}
for i := 7500000; i < 12500000; i++ {
b = append(b, uint64(i))
}
for i := 9000000; i < 20000000; i++ {
c = append(c, uint64(i))
}
for i := 10000000; i < 12000000; i++ {
d = append(d, uint64(i))
}
i1 := newListPostings(a...)
i2 := newListPostings(b...)
i3 := newListPostings(c...)
i4 := newListPostings(d...)
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(i1, i2, i3, i4)); err != nil {
bench.Fatal(err)
}
}
})
// Many matchers(k >> n).
t.Run("ManyPostings", func(bench *testing.B) {
var its []Postings
// 100000 matchers(k=100000).
for i := 0; i < 100000; i++ {
var temp []uint64
for j := 1; j < 100; j++ {
temp = append(temp, uint64(j))
}
its = append(its, newListPostings(temp...))
}
bench.ResetTimer()
bench.ReportAllocs()
for i := 0; i < bench.N; i++ {
if _, err := ExpandPostings(Intersect(its...)); err != nil {
bench.Fatal(err)
}
}
})
}
func TestMultiMerge(t *testing.T) {