2017-04-10 11:59:45 -07:00
|
|
|
// Copyright 2017 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
package tsdb
|
|
|
|
|
2016-12-09 01:41:51 -08:00
|
|
|
import (
|
2017-03-26 11:10:12 -07:00
|
|
|
"encoding/binary"
|
2016-12-09 01:41:51 -08:00
|
|
|
"sort"
|
|
|
|
"strings"
|
2017-09-05 02:45:18 -07:00
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/prometheus/tsdb/labels"
|
2016-12-09 01:41:51 -08:00
|
|
|
)
|
2016-12-04 04:16:11 -08:00
|
|
|
|
2016-12-21 16:12:28 -08:00
|
|
|
type memPostings struct {
|
2017-09-05 02:45:18 -07:00
|
|
|
mtx sync.RWMutex
|
|
|
|
m map[labels.Label][]uint64
|
2016-12-10 00:44:00 -08:00
|
|
|
}
|
|
|
|
|
2017-09-05 02:45:18 -07:00
|
|
|
func newMemPostings() *memPostings {
|
|
|
|
return &memPostings{
|
|
|
|
m: make(map[labels.Label][]uint64, 512),
|
|
|
|
}
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-09 01:41:51 -08:00
|
|
|
// Postings returns an iterator over the postings list for s.
|
2017-09-05 02:45:18 -07:00
|
|
|
func (p *memPostings) get(name, value string) Postings {
|
|
|
|
p.mtx.RLock()
|
|
|
|
l := p.m[labels.Label{Name: name, Value: value}]
|
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
2016-12-31 01:19:02 -08:00
|
|
|
if l == nil {
|
|
|
|
return emptyPostings
|
|
|
|
}
|
2017-04-23 16:53:56 -07:00
|
|
|
return newListPostings(l)
|
2016-12-09 01:41:51 -08:00
|
|
|
}
|
|
|
|
|
2017-09-05 02:45:18 -07:00
|
|
|
var allLabel = labels.Label{}
|
|
|
|
|
2016-12-09 01:41:51 -08:00
|
|
|
// add adds a document to the index. The caller has to ensure that no
|
|
|
|
// term argument appears twice.
|
2017-09-05 02:45:18 -07:00
|
|
|
func (p *memPostings) add(id uint64, lset labels.Labels) {
|
|
|
|
p.mtx.Lock()
|
|
|
|
|
|
|
|
for _, l := range lset {
|
|
|
|
p.m[l] = append(p.m[l], id)
|
2016-12-09 01:41:51 -08:00
|
|
|
}
|
2017-09-05 02:45:18 -07:00
|
|
|
p.m[allLabel] = append(p.m[allLabel], id)
|
|
|
|
|
|
|
|
p.mtx.Unlock()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// Postings provides iterative access over a postings list.
|
|
|
|
type Postings interface {
|
2016-12-13 06:26:58 -08:00
|
|
|
// Next advances the iterator and returns true if another value was found.
|
2016-12-04 04:16:11 -08:00
|
|
|
Next() bool
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Seek advances the iterator to value v or greater and returns
|
|
|
|
// true if a value was found.
|
2017-09-04 07:08:38 -07:00
|
|
|
Seek(v uint64) bool
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2017-01-02 04:27:52 -08:00
|
|
|
// At returns the value at the current iterator position.
|
2017-09-04 07:08:38 -07:00
|
|
|
At() uint64
|
2016-12-13 06:26:58 -08:00
|
|
|
|
|
|
|
// Err returns the last error of the iterator.
|
|
|
|
Err() error
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// errPostings is an empty iterator that always errors.
|
|
|
|
type errPostings struct {
|
2016-12-13 06:26:58 -08:00
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
func (e errPostings) Next() bool { return false }
|
2017-09-04 07:08:38 -07:00
|
|
|
func (e errPostings) Seek(uint64) bool { return false }
|
|
|
|
func (e errPostings) At() uint64 { return 0 }
|
2016-12-14 12:58:29 -08:00
|
|
|
func (e errPostings) Err() error { return e.err }
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2017-04-23 16:53:56 -07:00
|
|
|
var emptyPostings = errPostings{}
|
2016-12-27 02:32:10 -08:00
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// Intersect returns a new postings list over the intersection of the
|
|
|
|
// input postings.
|
|
|
|
func Intersect(its ...Postings) Postings {
|
2016-12-04 04:16:11 -08:00
|
|
|
if len(its) == 0 {
|
2017-04-23 16:53:56 -07:00
|
|
|
return emptyPostings
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
if len(its) == 1 {
|
|
|
|
return its[0]
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
l := len(its) / 2
|
|
|
|
return newIntersectPostings(Intersect(its[:l]...), Intersect(its[l:]...))
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
type intersectPostings struct {
|
2016-12-27 02:32:10 -08:00
|
|
|
a, b Postings
|
|
|
|
aok, bok bool
|
2017-09-04 07:08:38 -07:00
|
|
|
cur uint64
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func newIntersectPostings(a, b Postings) *intersectPostings {
|
2017-04-23 16:53:56 -07:00
|
|
|
return &intersectPostings{a: a, b: b}
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *intersectPostings) At() uint64 {
|
2016-12-27 02:32:10 -08:00
|
|
|
return it.cur
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *intersectPostings) doNext(id uint64) bool {
|
2016-12-27 02:32:10 -08:00
|
|
|
for {
|
2017-04-23 16:53:56 -07:00
|
|
|
if !it.b.Seek(id) {
|
2016-12-27 02:32:10 -08:00
|
|
|
return false
|
|
|
|
}
|
2017-04-23 16:53:56 -07:00
|
|
|
if vb := it.b.At(); vb != id {
|
|
|
|
if !it.a.Seek(vb) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
id = it.a.At()
|
|
|
|
if vb != id {
|
|
|
|
continue
|
|
|
|
}
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
2017-04-23 16:53:56 -07:00
|
|
|
it.cur = id
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *intersectPostings) Next() bool {
|
|
|
|
if !it.a.Next() {
|
|
|
|
return false
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
2017-04-23 16:53:56 -07:00
|
|
|
return it.doNext(it.a.At())
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *intersectPostings) Seek(id uint64) bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if !it.a.Seek(id) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return it.doNext(it.a.At())
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
func (it *intersectPostings) Err() error {
|
2016-12-27 02:32:10 -08:00
|
|
|
if it.a.Err() != nil {
|
|
|
|
return it.a.Err()
|
|
|
|
}
|
|
|
|
return it.b.Err()
|
2016-12-13 06:26:58 -08:00
|
|
|
}
|
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Merge returns a new iterator over the union of the input iterators.
|
2016-12-14 12:58:29 -08:00
|
|
|
func Merge(its ...Postings) Postings {
|
2016-12-04 04:16:11 -08:00
|
|
|
if len(its) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
if len(its) == 1 {
|
|
|
|
return its[0]
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
l := len(its) / 2
|
|
|
|
return newMergedPostings(Merge(its[:l]...), Merge(its[l:]...))
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-04-08 08:12:29 -07:00
|
|
|
type mergedPostings struct {
|
2017-04-23 16:53:56 -07:00
|
|
|
a, b Postings
|
|
|
|
initialized bool
|
|
|
|
aok, bok bool
|
2017-09-04 07:08:38 -07:00
|
|
|
cur uint64
|
2016-12-28 02:02:19 -08:00
|
|
|
}
|
|
|
|
|
2017-04-08 08:12:29 -07:00
|
|
|
func newMergedPostings(a, b Postings) *mergedPostings {
|
2017-04-23 16:53:56 -07:00
|
|
|
return &mergedPostings{a: a, b: b}
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *mergedPostings) At() uint64 {
|
2016-12-28 02:02:19 -08:00
|
|
|
return it.cur
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-04-08 08:12:29 -07:00
|
|
|
func (it *mergedPostings) Next() bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if !it.initialized {
|
|
|
|
it.aok = it.a.Next()
|
|
|
|
it.bok = it.b.Next()
|
|
|
|
it.initialized = true
|
|
|
|
}
|
|
|
|
|
2016-12-28 02:02:19 -08:00
|
|
|
if !it.aok && !it.bok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if !it.aok {
|
2017-01-02 04:27:52 -08:00
|
|
|
it.cur = it.b.At()
|
2016-12-28 02:02:19 -08:00
|
|
|
it.bok = it.b.Next()
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if !it.bok {
|
2017-01-02 04:27:52 -08:00
|
|
|
it.cur = it.a.At()
|
2016-12-28 02:02:19 -08:00
|
|
|
it.aok = it.a.Next()
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2017-01-02 04:27:52 -08:00
|
|
|
acur, bcur := it.a.At(), it.b.At()
|
2016-12-28 02:02:19 -08:00
|
|
|
|
|
|
|
if acur < bcur {
|
|
|
|
it.cur = acur
|
|
|
|
it.aok = it.a.Next()
|
2017-04-23 16:53:56 -07:00
|
|
|
} else if acur > bcur {
|
2016-12-28 02:02:19 -08:00
|
|
|
it.cur = bcur
|
|
|
|
it.bok = it.b.Next()
|
2017-04-23 16:53:56 -07:00
|
|
|
} else {
|
|
|
|
it.cur = acur
|
|
|
|
it.aok = it.a.Next()
|
|
|
|
it.bok = it.b.Next()
|
2016-12-28 02:02:19 -08:00
|
|
|
}
|
|
|
|
return true
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *mergedPostings) Seek(id uint64) bool {
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
if it.cur >= id {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2016-12-28 02:02:19 -08:00
|
|
|
it.aok = it.a.Seek(id)
|
|
|
|
it.bok = it.b.Seek(id)
|
2017-04-23 16:53:56 -07:00
|
|
|
it.initialized = true
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
|
|
|
|
return it.Next()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-04-08 08:12:29 -07:00
|
|
|
func (it *mergedPostings) Err() error {
|
2016-12-28 02:02:19 -08:00
|
|
|
if it.a.Err() != nil {
|
|
|
|
return it.a.Err()
|
|
|
|
}
|
|
|
|
return it.b.Err()
|
2016-12-13 06:26:58 -08:00
|
|
|
}
|
|
|
|
|
2016-12-19 02:44:11 -08:00
|
|
|
// listPostings implements the Postings interface over a plain list.
|
|
|
|
type listPostings struct {
|
2017-09-04 07:08:38 -07:00
|
|
|
list []uint64
|
|
|
|
cur uint64
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func newListPostings(list []uint64) *listPostings {
|
2017-04-23 16:53:56 -07:00
|
|
|
return &listPostings{list: list}
|
2016-12-27 23:50:20 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *listPostings) At() uint64 {
|
2017-04-23 16:53:56 -07:00
|
|
|
return it.cur
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-19 02:44:11 -08:00
|
|
|
func (it *listPostings) Next() bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if len(it.list) > 0 {
|
|
|
|
it.cur = it.list[0]
|
|
|
|
it.list = it.list[1:]
|
|
|
|
return true
|
|
|
|
}
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
it.cur = 0
|
2017-04-23 16:53:56 -07:00
|
|
|
return false
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *listPostings) Seek(x uint64) bool {
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
// If the current value satisfies, then return.
|
|
|
|
if it.cur >= x {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Do binary search between current position and end.
|
2017-04-23 16:53:56 -07:00
|
|
|
i := sort.Search(len(it.list), func(i int) bool {
|
|
|
|
return it.list[i] >= x
|
2016-12-04 04:16:11 -08:00
|
|
|
})
|
2017-04-23 16:53:56 -07:00
|
|
|
if i < len(it.list) {
|
|
|
|
it.cur = it.list[i]
|
|
|
|
it.list = it.list[i+1:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
it.list = nil
|
|
|
|
return false
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2016-12-09 01:41:51 -08:00
|
|
|
|
2016-12-19 02:44:11 -08:00
|
|
|
func (it *listPostings) Err() error {
|
2016-12-13 06:26:58 -08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
// bigEndianPostings implements the Postings interface over a byte stream of
|
|
|
|
// big endian numbers.
|
|
|
|
type bigEndianPostings struct {
|
2017-03-26 11:10:12 -07:00
|
|
|
list []byte
|
2017-04-23 16:53:56 -07:00
|
|
|
cur uint32
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func newBigEndianPostings(list []byte) *bigEndianPostings {
|
2017-04-23 16:53:56 -07:00
|
|
|
return &bigEndianPostings{list: list}
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *bigEndianPostings) At() uint64 {
|
|
|
|
return uint64(it.cur)
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func (it *bigEndianPostings) Next() bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if len(it.list) >= 4 {
|
|
|
|
it.cur = binary.BigEndian.Uint32(it.list)
|
|
|
|
it.list = it.list[4:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *bigEndianPostings) Seek(x uint64) bool {
|
|
|
|
if uint64(it.cur) >= x {
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2017-03-26 11:10:12 -07:00
|
|
|
num := len(it.list) / 4
|
|
|
|
// Do binary search between current position and end.
|
2017-04-23 16:53:56 -07:00
|
|
|
i := sort.Search(num, func(i int) bool {
|
2017-09-04 07:08:38 -07:00
|
|
|
return binary.BigEndian.Uint32(it.list[i*4:]) >= uint32(x)
|
2017-03-26 11:10:12 -07:00
|
|
|
})
|
2017-04-23 16:53:56 -07:00
|
|
|
if i < num {
|
|
|
|
j := i * 4
|
|
|
|
it.cur = binary.BigEndian.Uint32(it.list[j:])
|
|
|
|
it.list = it.list[j+4:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
it.list = nil
|
|
|
|
return false
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func (it *bigEndianPostings) Err() error {
|
2017-03-26 11:10:12 -07:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-12-09 01:41:51 -08:00
|
|
|
type stringset map[string]struct{}
|
|
|
|
|
|
|
|
func (ss stringset) set(s string) {
|
|
|
|
ss[s] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ss stringset) has(s string) bool {
|
|
|
|
_, ok := ss[s]
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ss stringset) String() string {
|
|
|
|
return strings.Join(ss.slice(), ",")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ss stringset) slice() []string {
|
|
|
|
slice := make([]string, 0, len(ss))
|
|
|
|
for k := range ss {
|
|
|
|
slice = append(slice, k)
|
|
|
|
}
|
|
|
|
sort.Strings(slice)
|
|
|
|
return slice
|
|
|
|
}
|