2017-04-10 11:59:45 -07:00
|
|
|
// Copyright 2017 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
package index
|
2016-12-04 04:16:11 -08:00
|
|
|
|
2016-12-09 01:41:51 -08:00
|
|
|
import (
|
2019-02-28 09:23:55 -08:00
|
|
|
"container/heap"
|
2017-03-26 11:10:12 -07:00
|
|
|
"encoding/binary"
|
2017-10-05 13:22:14 -07:00
|
|
|
"runtime"
|
2016-12-09 01:41:51 -08:00
|
|
|
"sort"
|
|
|
|
"strings"
|
2017-09-05 02:45:18 -07:00
|
|
|
"sync"
|
|
|
|
|
2019-11-18 11:53:33 -08:00
|
|
|
"github.com/prometheus/prometheus/pkg/labels"
|
2016-12-09 01:41:51 -08:00
|
|
|
)
|
2016-12-04 04:16:11 -08:00
|
|
|
|
2017-12-22 00:43:34 -08:00
|
|
|
var allPostingsKey = labels.Label{}
|
|
|
|
|
|
|
|
// AllPostingsKey returns the label key that is used to store the postings list of all existing IDs.
|
|
|
|
func AllPostingsKey() (name, value string) {
|
|
|
|
return allPostingsKey.Name, allPostingsKey.Value
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// MemPostings holds postings list for series ID per label pair. They may be written
|
2017-10-05 13:22:14 -07:00
|
|
|
// to out of order.
|
|
|
|
// ensureOrder() must be called once before any reads are done. This allows for quick
|
|
|
|
// unordered batch fills on startup.
|
2017-11-30 06:34:49 -08:00
|
|
|
type MemPostings struct {
|
2017-10-05 13:22:14 -07:00
|
|
|
mtx sync.RWMutex
|
2018-11-02 07:27:19 -07:00
|
|
|
m map[string]map[string][]uint64
|
2017-10-05 13:22:14 -07:00
|
|
|
ordered bool
|
2016-12-10 00:44:00 -08:00
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// NewMemPostings returns a memPostings that's ready for reads and writes.
|
|
|
|
func NewMemPostings() *MemPostings {
|
|
|
|
return &MemPostings{
|
2018-11-02 07:27:19 -07:00
|
|
|
m: make(map[string]map[string][]uint64, 512),
|
2017-10-05 13:22:14 -07:00
|
|
|
ordered: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// NewUnorderedMemPostings returns a memPostings that is not safe to be read from
|
2017-10-05 13:22:14 -07:00
|
|
|
// until ensureOrder was called once.
|
2017-11-30 06:34:49 -08:00
|
|
|
func NewUnorderedMemPostings() *MemPostings {
|
|
|
|
return &MemPostings{
|
2018-11-02 07:27:19 -07:00
|
|
|
m: make(map[string]map[string][]uint64, 512),
|
2017-10-05 13:22:14 -07:00
|
|
|
ordered: false,
|
2017-09-05 02:45:18 -07:00
|
|
|
}
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2021-09-08 02:18:48 -07:00
|
|
|
// Symbols returns an iterator over all unique name and value strings, in order.
|
|
|
|
func (p *MemPostings) Symbols() StringIter {
|
|
|
|
p.mtx.RLock()
|
|
|
|
|
|
|
|
// Add all the strings to a map to de-duplicate.
|
|
|
|
symbols := make(map[string]struct{}, 512)
|
|
|
|
for n, e := range p.m {
|
|
|
|
symbols[n] = struct{}{}
|
|
|
|
for v := range e {
|
|
|
|
symbols[v] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
|
|
|
res := make([]string, 0, len(symbols))
|
|
|
|
for k := range symbols {
|
|
|
|
res = append(res, k)
|
|
|
|
}
|
|
|
|
|
|
|
|
sort.Strings(res)
|
|
|
|
return NewStringListIter(res)
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// SortedKeys returns a list of sorted label keys of the postings.
|
|
|
|
func (p *MemPostings) SortedKeys() []labels.Label {
|
2017-11-28 23:28:55 -08:00
|
|
|
p.mtx.RLock()
|
|
|
|
keys := make([]labels.Label, 0, len(p.m))
|
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
for n, e := range p.m {
|
|
|
|
for v := range e {
|
|
|
|
keys = append(keys, labels.Label{Name: n, Value: v})
|
|
|
|
}
|
2017-11-28 23:28:55 -08:00
|
|
|
}
|
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
|
|
|
sort.Slice(keys, func(i, j int) bool {
|
|
|
|
if d := strings.Compare(keys[i].Name, keys[j].Name); d != 0 {
|
|
|
|
return d < 0
|
|
|
|
}
|
|
|
|
return keys[i].Value < keys[j].Value
|
|
|
|
})
|
|
|
|
return keys
|
|
|
|
}
|
|
|
|
|
2020-09-10 08:05:47 -07:00
|
|
|
// LabelNames returns all the unique label names.
|
|
|
|
func (p *MemPostings) LabelNames() []string {
|
|
|
|
p.mtx.RLock()
|
|
|
|
defer p.mtx.RUnlock()
|
|
|
|
n := len(p.m)
|
|
|
|
if n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
names := make([]string, 0, n-1)
|
|
|
|
for name := range p.m {
|
|
|
|
if name != allPostingsKey.Name {
|
|
|
|
names = append(names, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return names
|
|
|
|
}
|
|
|
|
|
|
|
|
// LabelValues returns label values for the given name.
|
|
|
|
func (p *MemPostings) LabelValues(name string) []string {
|
|
|
|
p.mtx.RLock()
|
|
|
|
defer p.mtx.RUnlock()
|
|
|
|
|
|
|
|
values := make([]string, 0, len(p.m[name]))
|
|
|
|
for v := range p.m[name] {
|
|
|
|
values = append(values, v)
|
|
|
|
}
|
|
|
|
return values
|
|
|
|
}
|
|
|
|
|
2019-11-04 18:06:13 -08:00
|
|
|
// PostingsStats contains cardinality based statistics for postings.
|
|
|
|
type PostingsStats struct {
|
|
|
|
CardinalityMetricsStats []Stat
|
|
|
|
CardinalityLabelStats []Stat
|
|
|
|
LabelValueStats []Stat
|
|
|
|
LabelValuePairsStats []Stat
|
2021-01-06 22:41:32 -08:00
|
|
|
NumLabelPairs int
|
2019-11-04 18:06:13 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Stats calculates the cardinality statistics from postings.
|
|
|
|
func (p *MemPostings) Stats(label string) *PostingsStats {
|
|
|
|
const maxNumOfRecords = 10
|
|
|
|
var size uint64
|
|
|
|
|
|
|
|
p.mtx.RLock()
|
|
|
|
|
|
|
|
metrics := &maxHeap{}
|
|
|
|
labels := &maxHeap{}
|
2019-11-19 13:03:24 -08:00
|
|
|
labelValueLength := &maxHeap{}
|
2019-11-04 18:06:13 -08:00
|
|
|
labelValuePairs := &maxHeap{}
|
2021-01-06 22:41:32 -08:00
|
|
|
numLabelPairs := 0
|
2019-11-04 18:06:13 -08:00
|
|
|
|
|
|
|
metrics.init(maxNumOfRecords)
|
|
|
|
labels.init(maxNumOfRecords)
|
2019-11-19 13:03:24 -08:00
|
|
|
labelValueLength.init(maxNumOfRecords)
|
2019-11-04 18:06:13 -08:00
|
|
|
labelValuePairs.init(maxNumOfRecords)
|
|
|
|
|
|
|
|
for n, e := range p.m {
|
|
|
|
if n == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
labels.push(Stat{Name: n, Count: uint64(len(e))})
|
2021-01-06 22:41:32 -08:00
|
|
|
numLabelPairs += len(e)
|
2019-11-04 18:06:13 -08:00
|
|
|
size = 0
|
|
|
|
for name, values := range e {
|
|
|
|
if n == label {
|
|
|
|
metrics.push(Stat{Name: name, Count: uint64(len(values))})
|
|
|
|
}
|
|
|
|
labelValuePairs.push(Stat{Name: n + "=" + name, Count: uint64(len(values))})
|
|
|
|
size += uint64(len(name))
|
|
|
|
}
|
2019-11-19 13:03:24 -08:00
|
|
|
labelValueLength.push(Stat{Name: n, Count: size})
|
2019-11-04 18:06:13 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
|
|
|
return &PostingsStats{
|
|
|
|
CardinalityMetricsStats: metrics.get(),
|
|
|
|
CardinalityLabelStats: labels.get(),
|
2019-11-19 13:03:24 -08:00
|
|
|
LabelValueStats: labelValueLength.get(),
|
2019-11-04 18:06:13 -08:00
|
|
|
LabelValuePairsStats: labelValuePairs.get(),
|
2021-01-06 22:41:32 -08:00
|
|
|
NumLabelPairs: numLabelPairs,
|
2019-11-04 18:06:13 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// Get returns a postings list for the given label pair.
|
|
|
|
func (p *MemPostings) Get(name, value string) Postings {
|
2018-11-02 07:27:19 -07:00
|
|
|
var lp []uint64
|
2017-09-05 02:45:18 -07:00
|
|
|
p.mtx.RLock()
|
2018-11-02 07:27:19 -07:00
|
|
|
l := p.m[name]
|
|
|
|
if l != nil {
|
|
|
|
lp = l[value]
|
|
|
|
}
|
2017-09-05 02:45:18 -07:00
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
if lp == nil {
|
2017-11-30 06:34:49 -08:00
|
|
|
return EmptyPostings()
|
2016-12-31 01:19:02 -08:00
|
|
|
}
|
2019-03-21 09:23:00 -07:00
|
|
|
return newListPostings(lp...)
|
2016-12-09 01:41:51 -08:00
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// All returns a postings list over all documents ever added.
|
|
|
|
func (p *MemPostings) All() Postings {
|
2017-12-22 00:43:34 -08:00
|
|
|
return p.Get(AllPostingsKey())
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// EnsureOrder ensures that all postings lists are sorted. After it returns all further
|
2017-10-05 13:22:14 -07:00
|
|
|
// calls to add and addFor will insert new IDs in a sorted manner.
|
2017-11-30 06:34:49 -08:00
|
|
|
func (p *MemPostings) EnsureOrder() {
|
2017-10-05 13:22:14 -07:00
|
|
|
p.mtx.Lock()
|
|
|
|
defer p.mtx.Unlock()
|
|
|
|
|
|
|
|
if p.ordered {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
n := runtime.GOMAXPROCS(0)
|
|
|
|
workc := make(chan []uint64)
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(n)
|
|
|
|
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
go func() {
|
|
|
|
for l := range workc {
|
2020-04-28 03:02:26 -07:00
|
|
|
sort.Slice(l, func(a, b int) bool { return l[a] < l[b] })
|
2017-10-05 13:22:14 -07:00
|
|
|
}
|
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
for _, e := range p.m {
|
|
|
|
for _, l := range e {
|
|
|
|
workc <- l
|
|
|
|
}
|
2017-10-05 13:22:14 -07:00
|
|
|
}
|
|
|
|
close(workc)
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
p.ordered = true
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// Delete removes all ids in the given map from the postings lists.
|
|
|
|
func (p *MemPostings) Delete(deleted map[uint64]struct{}) {
|
2018-11-02 07:27:19 -07:00
|
|
|
var keys, vals []string
|
2017-11-30 06:34:49 -08:00
|
|
|
|
2017-12-22 00:43:34 -08:00
|
|
|
// Collect all keys relevant for deletion once. New keys added afterwards
|
|
|
|
// can by definition not be affected by any of the given deletes.
|
2017-11-30 06:34:49 -08:00
|
|
|
p.mtx.RLock()
|
2018-11-02 07:27:19 -07:00
|
|
|
for n := range p.m {
|
|
|
|
keys = append(keys, n)
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
for _, n := range keys {
|
|
|
|
p.mtx.RLock()
|
|
|
|
vals = vals[:0]
|
|
|
|
for v := range p.m[n] {
|
|
|
|
vals = append(vals, v)
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
2018-11-02 07:27:19 -07:00
|
|
|
p.mtx.RUnlock()
|
|
|
|
|
|
|
|
// For each posting we first analyse whether the postings list is affected by the deletes.
|
|
|
|
// If yes, we actually reallocate a new postings list.
|
|
|
|
for _, l := range vals {
|
|
|
|
// Only lock for processing one postings list so we don't block reads for too long.
|
|
|
|
p.mtx.Lock()
|
|
|
|
|
|
|
|
found := false
|
|
|
|
for _, id := range p.m[n][l] {
|
|
|
|
if _, ok := deleted[id]; ok {
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
p.mtx.Unlock()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
repl := make([]uint64, 0, len(p.m[n][l]))
|
2017-11-30 06:34:49 -08:00
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
for _, id := range p.m[n][l] {
|
|
|
|
if _, ok := deleted[id]; !ok {
|
|
|
|
repl = append(repl, id)
|
|
|
|
}
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
2018-11-02 07:27:19 -07:00
|
|
|
if len(repl) > 0 {
|
|
|
|
p.m[n][l] = repl
|
|
|
|
} else {
|
|
|
|
delete(p.m[n], l)
|
|
|
|
}
|
|
|
|
p.mtx.Unlock()
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
2018-11-02 07:27:19 -07:00
|
|
|
p.mtx.Lock()
|
|
|
|
if len(p.m[n]) == 0 {
|
|
|
|
delete(p.m, n)
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
|
|
|
p.mtx.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iter calls f for each postings list. It aborts if f returns an error and returns it.
|
|
|
|
func (p *MemPostings) Iter(f func(labels.Label, Postings) error) error {
|
|
|
|
p.mtx.RLock()
|
|
|
|
defer p.mtx.RUnlock()
|
|
|
|
|
2018-11-02 07:27:19 -07:00
|
|
|
for n, e := range p.m {
|
|
|
|
for v, p := range e {
|
2019-03-21 09:23:00 -07:00
|
|
|
if err := f(labels.Label{Name: n, Value: v}, newListPostings(p...)); err != nil {
|
2018-11-02 07:27:19 -07:00
|
|
|
return err
|
|
|
|
}
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add a label set to the postings index.
|
|
|
|
func (p *MemPostings) Add(id uint64, lset labels.Labels) {
|
2017-09-05 02:45:18 -07:00
|
|
|
p.mtx.Lock()
|
|
|
|
|
|
|
|
for _, l := range lset {
|
2017-09-20 09:08:57 -07:00
|
|
|
p.addFor(id, l)
|
2016-12-09 01:41:51 -08:00
|
|
|
}
|
2017-09-20 09:08:57 -07:00
|
|
|
p.addFor(id, allPostingsKey)
|
2017-09-05 02:45:18 -07:00
|
|
|
|
|
|
|
p.mtx.Unlock()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
func (p *MemPostings) addFor(id uint64, l labels.Label) {
|
2018-11-02 07:27:19 -07:00
|
|
|
nm, ok := p.m[l.Name]
|
|
|
|
if !ok {
|
|
|
|
nm = map[string][]uint64{}
|
|
|
|
p.m[l.Name] = nm
|
|
|
|
}
|
|
|
|
list := append(nm[l.Value], id)
|
|
|
|
nm[l.Value] = list
|
2017-09-20 09:08:57 -07:00
|
|
|
|
2017-10-05 13:22:14 -07:00
|
|
|
if !p.ordered {
|
|
|
|
return
|
|
|
|
}
|
2017-09-20 09:08:57 -07:00
|
|
|
// There is no guarantee that no higher ID was inserted before as they may
|
|
|
|
// be generated independently before adding them to postings.
|
|
|
|
// We repair order violations on insert. The invariant is that the first n-1
|
|
|
|
// items in the list are already sorted.
|
|
|
|
for i := len(list) - 1; i >= 1; i-- {
|
|
|
|
if list[i] >= list[i-1] {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
list[i], list[i-1] = list[i-1], list[i]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// ExpandPostings returns the postings expanded as a slice.
|
|
|
|
func ExpandPostings(p Postings) (res []uint64, err error) {
|
2017-09-20 09:08:57 -07:00
|
|
|
for p.Next() {
|
|
|
|
res = append(res, p.At())
|
|
|
|
}
|
|
|
|
return res, p.Err()
|
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// Postings provides iterative access over a postings list.
|
|
|
|
type Postings interface {
|
2016-12-13 06:26:58 -08:00
|
|
|
// Next advances the iterator and returns true if another value was found.
|
2016-12-04 04:16:11 -08:00
|
|
|
Next() bool
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Seek advances the iterator to value v or greater and returns
|
|
|
|
// true if a value was found.
|
2017-09-04 07:08:38 -07:00
|
|
|
Seek(v uint64) bool
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2017-01-02 04:27:52 -08:00
|
|
|
// At returns the value at the current iterator position.
|
2017-09-04 07:08:38 -07:00
|
|
|
At() uint64
|
2016-12-13 06:26:58 -08:00
|
|
|
|
|
|
|
// Err returns the last error of the iterator.
|
|
|
|
Err() error
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// errPostings is an empty iterator that always errors.
|
|
|
|
type errPostings struct {
|
2016-12-13 06:26:58 -08:00
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
func (e errPostings) Next() bool { return false }
|
2017-09-04 07:08:38 -07:00
|
|
|
func (e errPostings) Seek(uint64) bool { return false }
|
|
|
|
func (e errPostings) At() uint64 { return 0 }
|
2016-12-14 12:58:29 -08:00
|
|
|
func (e errPostings) Err() error { return e.err }
|
2016-12-13 06:26:58 -08:00
|
|
|
|
2017-04-23 16:53:56 -07:00
|
|
|
var emptyPostings = errPostings{}
|
2016-12-27 02:32:10 -08:00
|
|
|
|
2017-11-13 03:16:58 -08:00
|
|
|
// EmptyPostings returns a postings list that's always empty.
|
2019-03-21 09:23:00 -07:00
|
|
|
// NOTE: Returning EmptyPostings sentinel when index.Postings struct has no postings is recommended.
|
|
|
|
// It triggers optimized flow in other functions like Intersect, Without etc.
|
2017-11-13 03:16:58 -08:00
|
|
|
func EmptyPostings() Postings {
|
|
|
|
return emptyPostings
|
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// ErrPostings returns new postings that immediately error.
|
|
|
|
func ErrPostings(err error) Postings {
|
|
|
|
return errPostings{err}
|
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
// Intersect returns a new postings list over the intersection of the
|
|
|
|
// input postings.
|
|
|
|
func Intersect(its ...Postings) Postings {
|
2016-12-04 04:16:11 -08:00
|
|
|
if len(its) == 0 {
|
2019-03-21 09:23:00 -07:00
|
|
|
return EmptyPostings()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
if len(its) == 1 {
|
|
|
|
return its[0]
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2019-06-11 01:14:25 -07:00
|
|
|
for _, p := range its {
|
|
|
|
if p == EmptyPostings() {
|
|
|
|
return EmptyPostings()
|
|
|
|
}
|
2019-03-21 09:23:00 -07:00
|
|
|
}
|
2019-06-11 01:14:25 -07:00
|
|
|
|
|
|
|
return newIntersectPostings(its...)
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
type intersectPostings struct {
|
2019-06-11 01:14:25 -07:00
|
|
|
arr []Postings
|
|
|
|
cur uint64
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
|
|
|
|
2019-06-11 01:14:25 -07:00
|
|
|
func newIntersectPostings(its ...Postings) *intersectPostings {
|
|
|
|
return &intersectPostings{arr: its}
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *intersectPostings) At() uint64 {
|
2016-12-27 02:32:10 -08:00
|
|
|
return it.cur
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2019-06-11 01:14:25 -07:00
|
|
|
func (it *intersectPostings) doNext() bool {
|
|
|
|
Loop:
|
2016-12-27 02:32:10 -08:00
|
|
|
for {
|
2019-06-11 01:14:25 -07:00
|
|
|
for _, p := range it.arr {
|
|
|
|
if !p.Seek(it.cur) {
|
2017-04-23 16:53:56 -07:00
|
|
|
return false
|
|
|
|
}
|
2019-06-11 01:14:25 -07:00
|
|
|
if p.At() > it.cur {
|
|
|
|
it.cur = p.At()
|
|
|
|
continue Loop
|
2017-04-23 16:53:56 -07:00
|
|
|
}
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
2017-04-23 16:53:56 -07:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *intersectPostings) Next() bool {
|
2019-06-11 01:14:25 -07:00
|
|
|
for _, p := range it.arr {
|
|
|
|
if !p.Next() {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if p.At() > it.cur {
|
|
|
|
it.cur = p.At()
|
|
|
|
}
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
2019-06-11 01:14:25 -07:00
|
|
|
return it.doNext()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *intersectPostings) Seek(id uint64) bool {
|
2019-06-11 01:14:25 -07:00
|
|
|
it.cur = id
|
|
|
|
return it.doNext()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2016-12-14 12:58:29 -08:00
|
|
|
func (it *intersectPostings) Err() error {
|
2019-06-11 01:14:25 -07:00
|
|
|
for _, p := range it.arr {
|
|
|
|
if p.Err() != nil {
|
|
|
|
return p.Err()
|
|
|
|
}
|
2016-12-27 02:32:10 -08:00
|
|
|
}
|
2019-06-11 01:14:25 -07:00
|
|
|
return nil
|
2016-12-13 06:26:58 -08:00
|
|
|
}
|
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Merge returns a new iterator over the union of the input iterators.
|
2016-12-14 12:58:29 -08:00
|
|
|
func Merge(its ...Postings) Postings {
|
2016-12-04 04:16:11 -08:00
|
|
|
if len(its) == 0 {
|
2017-12-17 10:08:21 -08:00
|
|
|
return EmptyPostings()
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2017-06-12 23:25:13 -07:00
|
|
|
if len(its) == 1 {
|
|
|
|
return its[0]
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2019-03-21 09:23:00 -07:00
|
|
|
|
|
|
|
p, ok := newMergedPostings(its)
|
|
|
|
if !ok {
|
|
|
|
return EmptyPostings()
|
|
|
|
}
|
|
|
|
return p
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
type postingsHeap []Postings
|
|
|
|
|
|
|
|
func (h postingsHeap) Len() int { return len(h) }
|
|
|
|
func (h postingsHeap) Less(i, j int) bool { return h[i].At() < h[j].At() }
|
|
|
|
func (h *postingsHeap) Swap(i, j int) { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] }
|
|
|
|
|
|
|
|
func (h *postingsHeap) Push(x interface{}) {
|
|
|
|
*h = append(*h, x.(Postings))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *postingsHeap) Pop() interface{} {
|
|
|
|
old := *h
|
|
|
|
n := len(old)
|
|
|
|
x := old[n-1]
|
|
|
|
*h = old[0 : n-1]
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
|
|
|
type mergedPostings struct {
|
2020-01-02 06:54:09 -08:00
|
|
|
h postingsHeap
|
|
|
|
initialized bool
|
|
|
|
cur uint64
|
|
|
|
err error
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
|
|
|
|
2019-03-21 09:23:00 -07:00
|
|
|
func newMergedPostings(p []Postings) (m *mergedPostings, nonEmpty bool) {
|
2019-02-28 09:23:55 -08:00
|
|
|
ph := make(postingsHeap, 0, len(p))
|
2019-03-21 09:23:00 -07:00
|
|
|
|
2019-02-28 09:23:55 -08:00
|
|
|
for _, it := range p {
|
2019-03-21 09:23:00 -07:00
|
|
|
// NOTE: mergedPostings struct requires the user to issue an initial Next.
|
2019-02-28 09:23:55 -08:00
|
|
|
if it.Next() {
|
|
|
|
ph = append(ph, it)
|
|
|
|
} else {
|
|
|
|
if it.Err() != nil {
|
2019-03-21 09:23:00 -07:00
|
|
|
return &mergedPostings{err: it.Err()}, true
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-03-21 09:23:00 -07:00
|
|
|
|
|
|
|
if len(ph) == 0 {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
return &mergedPostings{h: ph}, true
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *mergedPostings) Next() bool {
|
|
|
|
if it.h.Len() == 0 || it.err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// The user must issue an initial Next.
|
2020-01-02 06:54:09 -08:00
|
|
|
if !it.initialized {
|
Simplify mergedPostings.Seek (#595)
The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.
Adjust benchmark to catch this.
Remove flattening of without lists, not needed anymore.
Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-8 189907976 188863880 -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 113950106 110791414 -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 104965646 102388760 -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 138743592 104424250 -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8 5279594954 5206096267 -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8 8004610589 6184527719 -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8 2476042646 1003920432 -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8 7178244655 6059725323 -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 199342649 166642946 -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 215774683 167515095 -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 2214714769 392943663 -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 2148727410 322289262 -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 2170658009 338458171 -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 235720135 70597905 -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 2190570590 343034307 -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 2373784439 387297908 -83.68%
benchmark old allocs new allocs delta
BenchmarkHeadPostingForMatchers/n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 41 39 -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8 56 56 +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 251577 100115 -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8 251123 100077 -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8 251525 100112 -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 42 39 -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 52 42 -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 251069 100101 -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 251473 100101 -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 250914 100102 -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 30038 11181 -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 250813 100105 -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 281503 111260 -60.48%
benchmark old bytes new bytes delta
BenchmarkHeadPostingForMatchers/n="1"-8 10887600 10887600 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 5456640 5456544 -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8 258254504 258254472 -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 520126192 281554792 -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8 263446640 24908456 -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8 520121144 281553664 -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 7062448 7062272 -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 7063473 7062384 -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 274325656 35793776 -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 268926824 30362624 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 268882992 30363000 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 33193401 4269304 -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 268875024 30363096 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 300589656 33099784 -88.99%
```
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-05-13 02:51:07 -07:00
|
|
|
heap.Init(&it.h)
|
2019-02-28 09:23:55 -08:00
|
|
|
it.cur = it.h[0].At()
|
2020-01-02 06:54:09 -08:00
|
|
|
it.initialized = true
|
2019-02-28 09:23:55 -08:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
cur := it.h[0]
|
|
|
|
if !cur.Next() {
|
|
|
|
heap.Pop(&it.h)
|
|
|
|
if cur.Err() != nil {
|
|
|
|
it.err = cur.Err()
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if it.h.Len() == 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Value of top of heap has changed, re-heapify.
|
|
|
|
heap.Fix(&it.h, 0)
|
2019-01-03 08:59:52 -08:00
|
|
|
}
|
2019-02-28 09:23:55 -08:00
|
|
|
|
|
|
|
if it.h[0].At() != it.cur {
|
|
|
|
it.cur = it.h[0].At()
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *mergedPostings) Seek(id uint64) bool {
|
|
|
|
if it.h.Len() == 0 || it.err != nil {
|
|
|
|
return false
|
|
|
|
}
|
2020-01-02 06:54:09 -08:00
|
|
|
if !it.initialized {
|
2019-02-28 09:23:55 -08:00
|
|
|
if !it.Next() {
|
|
|
|
return false
|
2019-01-03 08:59:52 -08:00
|
|
|
}
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
}
|
Simplify mergedPostings.Seek (#595)
The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.
Adjust benchmark to catch this.
Remove flattening of without lists, not needed anymore.
Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-8 189907976 188863880 -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 113950106 110791414 -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 104965646 102388760 -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 138743592 104424250 -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8 5279594954 5206096267 -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8 8004610589 6184527719 -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8 2476042646 1003920432 -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8 7178244655 6059725323 -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 199342649 166642946 -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 215774683 167515095 -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 2214714769 392943663 -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 2148727410 322289262 -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 2170658009 338458171 -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 235720135 70597905 -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 2190570590 343034307 -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 2373784439 387297908 -83.68%
benchmark old allocs new allocs delta
BenchmarkHeadPostingForMatchers/n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 41 39 -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8 56 56 +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 251577 100115 -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8 251123 100077 -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8 251525 100112 -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 42 39 -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 52 42 -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 251069 100101 -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 251473 100101 -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 250914 100102 -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 30038 11181 -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 250813 100105 -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 281503 111260 -60.48%
benchmark old bytes new bytes delta
BenchmarkHeadPostingForMatchers/n="1"-8 10887600 10887600 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 5456640 5456544 -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8 258254504 258254472 -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 520126192 281554792 -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8 263446640 24908456 -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8 520121144 281553664 -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 7062448 7062272 -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 7063473 7062384 -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 274325656 35793776 -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 268926824 30362624 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 268882992 30363000 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 33193401 4269304 -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 268875024 30363096 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 300589656 33099784 -88.99%
```
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-05-13 02:51:07 -07:00
|
|
|
for it.cur < id {
|
|
|
|
cur := it.h[0]
|
|
|
|
if !cur.Seek(id) {
|
|
|
|
heap.Pop(&it.h)
|
|
|
|
if cur.Err() != nil {
|
|
|
|
it.err = cur.Err()
|
|
|
|
return false
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
Simplify mergedPostings.Seek (#595)
The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.
Adjust benchmark to catch this.
Remove flattening of without lists, not needed anymore.
Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-8 189907976 188863880 -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 113950106 110791414 -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 104965646 102388760 -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 138743592 104424250 -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8 5279594954 5206096267 -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8 8004610589 6184527719 -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8 2476042646 1003920432 -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8 7178244655 6059725323 -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 199342649 166642946 -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 215774683 167515095 -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 2214714769 392943663 -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 2148727410 322289262 -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 2170658009 338458171 -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 235720135 70597905 -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 2190570590 343034307 -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 2373784439 387297908 -83.68%
benchmark old allocs new allocs delta
BenchmarkHeadPostingForMatchers/n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 41 39 -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8 56 56 +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 251577 100115 -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8 251123 100077 -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8 251525 100112 -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 42 39 -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 52 42 -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 251069 100101 -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 251473 100101 -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 250914 100102 -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 30038 11181 -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 250813 100105 -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 281503 111260 -60.48%
benchmark old bytes new bytes delta
BenchmarkHeadPostingForMatchers/n="1"-8 10887600 10887600 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 5456640 5456544 -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8 258254504 258254472 -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 520126192 281554792 -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8 263446640 24908456 -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8 520121144 281553664 -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 7062448 7062272 -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 7063473 7062384 -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 274325656 35793776 -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 268926824 30362624 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 268882992 30363000 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 33193401 4269304 -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 268875024 30363096 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 300589656 33099784 -88.99%
```
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-05-13 02:51:07 -07:00
|
|
|
if it.h.Len() == 0 {
|
2019-02-28 09:23:55 -08:00
|
|
|
return false
|
|
|
|
}
|
Simplify mergedPostings.Seek (#595)
The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.
Adjust benchmark to catch this.
Remove flattening of without lists, not needed anymore.
Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-8 189907976 188863880 -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 113950106 110791414 -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 104965646 102388760 -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 138743592 104424250 -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8 5279594954 5206096267 -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8 8004610589 6184527719 -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8 2476042646 1003920432 -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8 7178244655 6059725323 -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 199342649 166642946 -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 215774683 167515095 -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 2214714769 392943663 -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 2148727410 322289262 -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 2170658009 338458171 -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 235720135 70597905 -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 2190570590 343034307 -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 2373784439 387297908 -83.68%
benchmark old allocs new allocs delta
BenchmarkHeadPostingForMatchers/n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 41 39 -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8 56 56 +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 251577 100115 -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8 251123 100077 -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8 251525 100112 -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 42 39 -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 52 42 -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 251069 100101 -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 251473 100101 -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 250914 100102 -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 30038 11181 -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 250813 100105 -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 281503 111260 -60.48%
benchmark old bytes new bytes delta
BenchmarkHeadPostingForMatchers/n="1"-8 10887600 10887600 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 5456640 5456544 -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8 258254504 258254472 -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 520126192 281554792 -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8 263446640 24908456 -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8 520121144 281553664 -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 7062448 7062272 -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 7063473 7062384 -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 274325656 35793776 -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 268926824 30362624 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 268882992 30363000 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 33193401 4269304 -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 268875024 30363096 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 300589656 33099784 -88.99%
```
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-05-13 02:51:07 -07:00
|
|
|
} else {
|
|
|
|
// Value of top of heap has changed, re-heapify.
|
|
|
|
heap.Fix(&it.h, 0)
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
Simplify mergedPostings.Seek (#595)
The current implementation leads to very slow behaviour when there's
many lists, this no worse than n log k, where k is the number of posting
lists.
Adjust benchmark to catch this.
Remove flattening of without lists, not needed anymore.
Benchmark versus 0.4.0 (used in Prometheus 2.7):
```
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-8 189907976 188863880 -0.55%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 113950106 110791414 -2.77%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 104965646 102388760 -2.45%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 138743592 104424250 -24.74%
BenchmarkHeadPostingForMatchers/i=~".*"-8 5279594954 5206096267 -1.39%
BenchmarkHeadPostingForMatchers/i=~".+"-8 8004610589 6184527719 -22.74%
BenchmarkHeadPostingForMatchers/i=~""-8 2476042646 1003920432 -59.45%
BenchmarkHeadPostingForMatchers/i!=""-8 7178244655 6059725323 -15.58%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 199342649 166642946 -16.40%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 215774683 167515095 -22.37%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 2214714769 392943663 -82.26%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 2148727410 322289262 -85.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 2170658009 338458171 -84.41%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 235720135 70597905 -70.05%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 2190570590 343034307 -84.34%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 2373784439 387297908 -83.68%
benchmark old allocs new allocs delta
BenchmarkHeadPostingForMatchers/n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 33 33 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 41 39 -4.88%
BenchmarkHeadPostingForMatchers/i=~".*"-8 56 56 +0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 251577 100115 -60.21%
BenchmarkHeadPostingForMatchers/i=~""-8 251123 100077 -60.15%
BenchmarkHeadPostingForMatchers/i!=""-8 251525 100112 -60.20%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 42 39 -7.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 52 42 -19.23%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 251069 100101 -60.13%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 251473 100101 -60.19%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 250914 100102 -60.11%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 30038 11181 -62.78%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 250813 100105 -60.09%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 281503 111260 -60.48%
benchmark old bytes new bytes delta
BenchmarkHeadPostingForMatchers/n="1"-8 10887600 10887600 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-8 5456416 5456416 +0.00%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-8 5456640 5456544 -0.00%
BenchmarkHeadPostingForMatchers/i=~".*"-8 258254504 258254472 -0.00%
BenchmarkHeadPostingForMatchers/i=~".+"-8 520126192 281554792 -45.87%
BenchmarkHeadPostingForMatchers/i=~""-8 263446640 24908456 -90.55%
BenchmarkHeadPostingForMatchers/i!=""-8 520121144 281553664 -45.87%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-8 7062448 7062272 -0.00%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-8 7063473 7062384 -0.02%
BenchmarkHeadPostingForMatchers/n="1",i!=""-8 274325656 35793776 -86.95%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-8 268926824 30362624 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-8 268882992 30363000 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-8 33193401 4269304 -87.14%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-8 268875024 30363096 -88.71%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-8 300589656 33099784 -88.99%
```
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-05-13 02:51:07 -07:00
|
|
|
|
|
|
|
it.cur = it.h[0].At()
|
2019-02-28 09:23:55 -08:00
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it mergedPostings) At() uint64 {
|
|
|
|
return it.cur
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it mergedPostings) Err() error {
|
|
|
|
return it.err
|
2016-12-13 06:26:58 -08:00
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
// Without returns a new postings list that contains all elements from the full list that
|
2019-03-21 09:23:00 -07:00
|
|
|
// are not in the drop list.
|
2017-11-30 06:34:49 -08:00
|
|
|
func Without(full, drop Postings) Postings {
|
2019-03-21 09:23:00 -07:00
|
|
|
if full == EmptyPostings() {
|
|
|
|
return EmptyPostings()
|
|
|
|
}
|
|
|
|
|
|
|
|
if drop == EmptyPostings() {
|
|
|
|
return full
|
|
|
|
}
|
2017-11-30 06:34:49 -08:00
|
|
|
return newRemovedPostings(full, drop)
|
|
|
|
}
|
|
|
|
|
2017-12-17 10:08:21 -08:00
|
|
|
type removedPostings struct {
|
|
|
|
full, remove Postings
|
|
|
|
|
|
|
|
cur uint64
|
|
|
|
|
|
|
|
initialized bool
|
|
|
|
fok, rok bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func newRemovedPostings(full, remove Postings) *removedPostings {
|
|
|
|
return &removedPostings{
|
|
|
|
full: full,
|
|
|
|
remove: remove,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rp *removedPostings) At() uint64 {
|
|
|
|
return rp.cur
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rp *removedPostings) Next() bool {
|
|
|
|
if !rp.initialized {
|
|
|
|
rp.fok = rp.full.Next()
|
|
|
|
rp.rok = rp.remove.Next()
|
|
|
|
rp.initialized = true
|
|
|
|
}
|
2018-02-14 05:43:04 -08:00
|
|
|
for {
|
|
|
|
if !rp.fok {
|
|
|
|
return false
|
|
|
|
}
|
2017-12-17 10:08:21 -08:00
|
|
|
|
2018-02-14 05:43:04 -08:00
|
|
|
if !rp.rok {
|
|
|
|
rp.cur = rp.full.At()
|
|
|
|
rp.fok = rp.full.Next()
|
|
|
|
return true
|
|
|
|
}
|
2017-12-17 10:08:21 -08:00
|
|
|
|
2018-02-14 05:43:04 -08:00
|
|
|
fcur, rcur := rp.full.At(), rp.remove.At()
|
|
|
|
if fcur < rcur {
|
|
|
|
rp.cur = fcur
|
|
|
|
rp.fok = rp.full.Next()
|
2017-12-17 10:08:21 -08:00
|
|
|
|
2018-02-14 05:43:04 -08:00
|
|
|
return true
|
|
|
|
} else if rcur < fcur {
|
|
|
|
// Forward the remove postings to the right position.
|
|
|
|
rp.rok = rp.remove.Seek(fcur)
|
|
|
|
} else {
|
|
|
|
// Skip the current posting.
|
|
|
|
rp.fok = rp.full.Next()
|
|
|
|
}
|
2017-12-17 10:08:21 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rp *removedPostings) Seek(id uint64) bool {
|
|
|
|
if rp.cur >= id {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
rp.fok = rp.full.Seek(id)
|
|
|
|
rp.rok = rp.remove.Seek(id)
|
|
|
|
rp.initialized = true
|
|
|
|
|
|
|
|
return rp.Next()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rp *removedPostings) Err() error {
|
|
|
|
if rp.full.Err() != nil {
|
|
|
|
return rp.full.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
return rp.remove.Err()
|
|
|
|
}
|
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
// ListPostings implements the Postings interface over a plain list.
|
|
|
|
type ListPostings struct {
|
2017-09-04 07:08:38 -07:00
|
|
|
list []uint64
|
|
|
|
cur uint64
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
2017-11-30 06:34:49 -08:00
|
|
|
func NewListPostings(list []uint64) Postings {
|
2019-03-21 09:23:00 -07:00
|
|
|
return newListPostings(list...)
|
2017-11-30 06:34:49 -08:00
|
|
|
}
|
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
func newListPostings(list ...uint64) *ListPostings {
|
|
|
|
return &ListPostings{list: list}
|
2016-12-27 23:50:20 -08:00
|
|
|
}
|
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
func (it *ListPostings) At() uint64 {
|
2017-04-23 16:53:56 -07:00
|
|
|
return it.cur
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
func (it *ListPostings) Next() bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if len(it.list) > 0 {
|
|
|
|
it.cur = it.list[0]
|
|
|
|
it.list = it.list[1:]
|
|
|
|
return true
|
|
|
|
}
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
it.cur = 0
|
2017-04-23 16:53:56 -07:00
|
|
|
return false
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
func (it *ListPostings) Seek(x uint64) bool {
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
// If the current value satisfies, then return.
|
|
|
|
if it.cur >= x {
|
|
|
|
return true
|
|
|
|
}
|
2019-02-28 09:23:55 -08:00
|
|
|
if len(it.list) == 0 {
|
|
|
|
return false
|
|
|
|
}
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
|
2016-12-04 04:16:11 -08:00
|
|
|
// Do binary search between current position and end.
|
2017-04-23 16:53:56 -07:00
|
|
|
i := sort.Search(len(it.list), func(i int) bool {
|
|
|
|
return it.list[i] >= x
|
2016-12-04 04:16:11 -08:00
|
|
|
})
|
2017-04-23 16:53:56 -07:00
|
|
|
if i < len(it.list) {
|
|
|
|
it.cur = it.list[i]
|
|
|
|
it.list = it.list[i+1:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
it.list = nil
|
|
|
|
return false
|
2016-12-04 04:16:11 -08:00
|
|
|
}
|
2016-12-09 01:41:51 -08:00
|
|
|
|
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher.
* Selector methods are all stateless, don't need a reference.
* Be smarter in how we look at matchers.
Look at all matchers to see if a label can be empty.
Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.
All all the Withouts together, rather than
having to subtract each from all postings.
Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.
The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.
benchmark old ns/op new ns/op delta
BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01%
Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2019-04-09 03:59:45 -07:00
|
|
|
func (it *ListPostings) Err() error {
|
2016-12-13 06:26:58 -08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
// bigEndianPostings implements the Postings interface over a byte stream of
|
|
|
|
// big endian numbers.
|
|
|
|
type bigEndianPostings struct {
|
2017-03-26 11:10:12 -07:00
|
|
|
list []byte
|
2017-04-23 16:53:56 -07:00
|
|
|
cur uint32
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func newBigEndianPostings(list []byte) *bigEndianPostings {
|
2017-04-23 16:53:56 -07:00
|
|
|
return &bigEndianPostings{list: list}
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *bigEndianPostings) At() uint64 {
|
|
|
|
return uint64(it.cur)
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func (it *bigEndianPostings) Next() bool {
|
2017-04-23 16:53:56 -07:00
|
|
|
if len(it.list) >= 4 {
|
|
|
|
it.cur = binary.BigEndian.Uint32(it.list)
|
|
|
|
it.list = it.list[4:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-09-04 07:08:38 -07:00
|
|
|
func (it *bigEndianPostings) Seek(x uint64) bool {
|
|
|
|
if uint64(it.cur) >= x {
|
Fix missing postings in Merge and Intersect (#77)
* Test for a previous implematation of Intersect
Before we were moving the postings list everytime we create a new
chained `intersectPostings`. That was causing some postings to be
skipped. This test fails on the older version.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Advance on Seek only when valid.
Issue:
Before in mergedPostings and others we advance everytime we `Seek`,
which causes issues with `Intersect`.
Take the case, where we have a mergedPostings = m merging, a: {10, 20, 30} and
b: {15, 25, 35}. Everytime we `Seek`, we do a.Seek and b.Seek.
Now if we Intersect m with {21, 22, 23, 30}, we would do Seek({21,22,23}) which
would advance a and b beyond 30.
Fix:
Now we advance only when the seeking value is greater than the current
value, as the definition specifies.
Also, posting 0 will not be a valid posting and will be used to signal
finished or un-initialized PostingsList.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add test for Merge+Intersect edgecase.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
* Add comments to trivial tests.
Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2017-05-12 00:44:41 -07:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2017-03-26 11:10:12 -07:00
|
|
|
num := len(it.list) / 4
|
|
|
|
// Do binary search between current position and end.
|
2017-04-23 16:53:56 -07:00
|
|
|
i := sort.Search(num, func(i int) bool {
|
2017-09-04 07:08:38 -07:00
|
|
|
return binary.BigEndian.Uint32(it.list[i*4:]) >= uint32(x)
|
2017-03-26 11:10:12 -07:00
|
|
|
})
|
2017-04-23 16:53:56 -07:00
|
|
|
if i < num {
|
|
|
|
j := i * 4
|
|
|
|
it.cur = binary.BigEndian.Uint32(it.list[j:])
|
|
|
|
it.list = it.list[j+4:]
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
it.list = nil
|
|
|
|
return false
|
2017-03-26 11:10:12 -07:00
|
|
|
}
|
|
|
|
|
2017-03-27 01:34:42 -07:00
|
|
|
func (it *bigEndianPostings) Err() error {
|
2017-03-26 11:10:12 -07:00
|
|
|
return nil
|
|
|
|
}
|