Histogram performance: optimize floatBucketIterator

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
This commit is contained in:
Linas Medziunas 2023-10-09 09:40:59 +03:00
parent cdad64002a
commit c5c5c569fa
4 changed files with 110 additions and 37 deletions

View file

@ -434,25 +434,25 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
}
currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
if detectReset(currIt, prevIt) {
if detectReset(&currIt, &prevIt) {
return true
}
currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
return detectReset(currIt, prevIt)
return detectReset(&currIt, &prevIt)
}
func detectReset(currIt, prevIt BucketIterator[float64]) bool {
func detectReset(currIt, prevIt *floatBucketIterator) bool {
if !prevIt.Next() {
return false // If no buckets in previous histogram, nothing can be reset.
}
prevBucket := prevIt.At()
prevBucket := prevIt.strippedAt()
if !currIt.Next() {
// No bucket in current, but at least one in previous
// histogram. Check if any of those are non-zero, in which case
// this is a reset.
for {
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
if !prevIt.Next() {
@ -460,10 +460,10 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
}
}
}
currBucket := currIt.At()
currBucket := currIt.strippedAt()
for {
// Forward currIt until we find the bucket corresponding to prevBucket.
for currBucket.Index < prevBucket.Index {
for currBucket.index < prevBucket.index {
if !currIt.Next() {
// Reached end of currIt early, therefore
// previous histogram has a bucket that the
@ -471,7 +471,7 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// remaining buckets in the previous histogram
// are unpopulated, this is a reset.
for {
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
if !prevIt.Next() {
@ -479,18 +479,18 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
}
}
}
currBucket = currIt.At()
currBucket = currIt.strippedAt()
}
if currBucket.Index > prevBucket.Index {
if currBucket.index > prevBucket.index {
// Previous histogram has a bucket the current one does
// not have. If it's populated, it's a reset.
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
} else {
// We have reached corresponding buckets in both iterators.
// We can finally compare the counts.
if currBucket.Count < prevBucket.Count {
if currBucket.count < prevBucket.count {
return true
}
}
@ -498,35 +498,39 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// Reached end of prevIt without finding offending buckets.
return false
}
prevBucket = prevIt.At()
prevBucket = prevIt.strippedAt()
}
}
// PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up).
func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(true, 0, h.Schema)
it := h.floatBucketIterator(true, 0, h.Schema)
return &it
}
// NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going
// down).
func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(false, 0, h.Schema)
it := h.floatBucketIterator(false, 0, h.Schema)
return &it
}
// PositiveReverseBucketIterator returns a BucketIterator to iterate over all
// positive buckets in descending order (starting at the highest bucket and
// going down towards the zero bucket).
func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
it := newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
}
// NegativeReverseBucketIterator returns a BucketIterator to iterate over all
// negative buckets in ascending order (starting at the lowest bucket and going
// up towards the zero bucket).
func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
it := newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
}
// AllBucketIterator returns a BucketIterator to iterate over all negative,
@ -537,8 +541,8 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.NegativeReverseBucketIterator(),
rightIter: h.PositiveBucketIterator(),
leftIter: newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false),
rightIter: h.floatBucketIterator(true, 0, h.Schema),
state: -1,
}
}
@ -551,8 +555,8 @@ func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.PositiveReverseBucketIterator(),
rightIter: h.NegativeBucketIterator(),
leftIter: newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true),
rightIter: h.floatBucketIterator(false, 0, h.Schema),
state: -1,
}
}
@ -683,11 +687,11 @@ func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 {
// targetSchema prior to iterating (without mutating FloatHistogram).
func (h *FloatHistogram) floatBucketIterator(
positive bool, absoluteStartValue float64, targetSchema int32,
) *floatBucketIterator {
) floatBucketIterator {
if targetSchema > h.Schema {
panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema))
}
i := &floatBucketIterator{
i := floatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{
schema: h.Schema,
positive: positive,
@ -705,11 +709,11 @@ func (h *FloatHistogram) floatBucketIterator(
return i
}
// reverseFloatbucketiterator is a low-level constructor for reverse bucket iterators.
// reverseFloatBucketIterator is a low-level constructor for reverse bucket iterators.
func newReverseFloatBucketIterator(
spans []Span, buckets []float64, schema int32, positive bool,
) *reverseFloatBucketIterator {
r := &reverseFloatBucketIterator{
) reverseFloatBucketIterator {
r := reverseFloatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{
schema: schema,
spans: spans,
@ -737,6 +741,8 @@ type floatBucketIterator struct {
targetSchema int32 // targetSchema is the schema to merge to and must be ≤ schema.
origIdx int32 // The bucket index within the original schema.
absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value.
boundReachedStartValue bool // Has getBound reached absoluteStartValue already?
}
func (i *floatBucketIterator) At() Bucket[float64] {
@ -800,9 +806,10 @@ mergeLoop: // Merge together all buckets from the original schema that fall into
}
// Skip buckets before absoluteStartValue.
// TODO(beorn7): Maybe do something more efficient than this recursive call.
if getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
return i.Next()
}
i.boundReachedStartValue = true
return true
}
@ -843,8 +850,9 @@ func (i *reverseFloatBucketIterator) Next() bool {
}
type allFloatBucketIterator struct {
h *FloatHistogram
leftIter, rightIter BucketIterator[float64]
h *FloatHistogram
leftIter reverseFloatBucketIterator
rightIter floatBucketIterator
// -1 means we are iterating negative buckets.
// 0 means it is time for the zero bucket.
// 1 means we are iterating positive buckets.

View file

@ -16,6 +16,7 @@ package histogram
import (
"fmt"
"math"
"math/rand"
"testing"
"github.com/stretchr/testify/require"
@ -2291,3 +2292,51 @@ func TestFloatBucketIteratorTargetSchema(t *testing.T) {
}
require.False(t, it.Next(), "negative iterator not exhausted")
}
func BenchmarkFloatHistogramAllBucketIterator(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 1 alloc
b.ResetTimer()
for n := 0; n < b.N; n++ {
for it := fh.AllBucketIterator(); it.Next(); {
}
}
}
func BenchmarkFloatHistogramDetectReset(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 0 allocs
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Detect against the itself (no resets is the worst case input).
fh.DetectReset(fh)
}
}
func createRandomFloatHistogram(rng *rand.Rand, spanNum int32) *FloatHistogram {
f := &FloatHistogram{}
f.PositiveSpans, f.PositiveBuckets = createRandomSpans(rng, spanNum)
f.NegativeSpans, f.NegativeBuckets = createRandomSpans(rng, spanNum)
return f
}
func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
Spans := make([]Span, spanNum)
Buckets := make([]float64, 0)
for i := 0; i < int(spanNum); i++ {
Spans[i].Offset = rng.Int31n(spanNum) + 1
Spans[i].Length = uint32(rng.Int31n(spanNum) + 1)
for j := 0; j < int(Spans[i].Length); j++ {
Buckets = append(Buckets, float64(rng.Int31n(spanNum)+1))
}
}
return Spans, Buckets
}

View file

@ -53,6 +53,13 @@ type Bucket[BC BucketCount] struct {
Index int32
}
// strippedBucket is Bucket without bound values (which are expensive to calculate
// and not used in certain use cases).
type strippedBucket[BC BucketCount] struct {
count BC
index int32
}
// String returns a string representation of a Bucket, using the usual
// mathematical notation of '['/']' for inclusive bounds and '('/')' for
// non-inclusive bounds.
@ -101,13 +108,12 @@ type baseBucketIterator[BC BucketCount, IBC InternalBucketCount] struct {
currIdx int32 // The actual bucket index.
}
func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
func (b *baseBucketIterator[BC, IBC]) At() Bucket[BC] {
return b.at(b.schema)
}
// at is an internal version of the exported At to enable using a different
// schema.
func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
// at is an internal version of the exported At to enable using a different schema.
func (b *baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
bucket := Bucket[BC]{
Count: BC(b.currCount),
Index: b.currIdx,
@ -124,6 +130,14 @@ func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
return bucket
}
// strippedAt returns current strippedBucket (which lacks bucket bounds but is cheaper to compute).
func (b *baseBucketIterator[BC, IBC]) strippedAt() strippedBucket[BC] {
return strippedBucket[BC]{
count: BC(b.currCount),
index: b.currIdx,
}
}
// compactBuckets is a generic function used by both Histogram.Compact and
// FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
// deltas. Set it to false if the buckets contain absolute counts.

View file

@ -148,13 +148,15 @@ func (h *Histogram) ZeroBucket() Bucket[uint64] {
// PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up).
func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
it := newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
}
// NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going down).
func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
it := newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
}
// CumulativeBucketIterator returns a BucketIterator to iterate over a
@ -325,14 +327,14 @@ type regularBucketIterator struct {
baseBucketIterator[uint64, int64]
}
func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) *regularBucketIterator {
func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) regularBucketIterator {
i := baseBucketIterator[uint64, int64]{
schema: schema,
spans: spans,
buckets: buckets,
positive: positive,
}
return &regularBucketIterator{i}
return regularBucketIterator{i}
}
func (r *regularBucketIterator) Next() bool {