Merge pull request #12954 from linasm/linasm/optimize-floatBucketIterator2

Histogram performance: optimize floatBucketIterator
2025-03-05 20:59:13 -08:00 · 2023-11-02 23:59:32 +01:00 · 2023-11-02 23:59:32 +01:00 · fae4561369
parent 0b02cc5277 e3e22abadb
commit fae4561369
4 changed files with 110 additions and 37 deletions
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@ -466,25 +466,25 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
 	}
 	currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
 	prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
-	if detectReset(currIt, prevIt) {
+	if detectReset(&currIt, &prevIt) {
 		return true
 	}
 	currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
 	prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
-	return detectReset(currIt, prevIt)
+	return detectReset(&currIt, &prevIt)
 }
-func detectReset(currIt, prevIt BucketIterator[float64]) bool {
+func detectReset(currIt, prevIt *floatBucketIterator) bool {
 	if !prevIt.Next() {
 		return false // If no buckets in previous histogram, nothing can be reset.
 	}
-	prevBucket := prevIt.At()
+	prevBucket := prevIt.strippedAt()
 	if !currIt.Next() {
 		// No bucket in current, but at least one in previous
 		// histogram. Check if any of those are non-zero, in which case
 		// this is a reset.
 		for {
-			if prevBucket.Count != 0 {
+			if prevBucket.count != 0 {
 				return true
 			}
 			if !prevIt.Next() {
@ -492,10 +492,10 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 			}
 		}
 	}
-	currBucket := currIt.At()
+	currBucket := currIt.strippedAt()
 	for {
 		// Forward currIt until we find the bucket corresponding to prevBucket.
-		for currBucket.Index < prevBucket.Index {
+		for currBucket.index < prevBucket.index {
 			if !currIt.Next() {
 				// Reached end of currIt early, therefore
 				// previous histogram has a bucket that the
@ -503,7 +503,7 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 				// remaining buckets in the previous histogram
 				// are unpopulated, this is a reset.
 				for {
-					if prevBucket.Count != 0 {
+					if prevBucket.count != 0 {
 						return true
 					}
 					if !prevIt.Next() {
@ -511,18 +511,18 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 					}
 				}
 			}
-			currBucket = currIt.At()
+			currBucket = currIt.strippedAt()
 		}
-		if currBucket.Index > prevBucket.Index {
+		if currBucket.index > prevBucket.index {
 			// Previous histogram has a bucket the current one does
 			// not have. If it's populated, it's a reset.
-			if prevBucket.Count != 0 {
+			if prevBucket.count != 0 {
 				return true
 			}
 		} else {
 			// We have reached corresponding buckets in both iterators.
 			// We can finally compare the counts.
-			if currBucket.Count < prevBucket.Count {
+			if currBucket.count < prevBucket.count {
 				return true
 			}
 		}
@ -530,35 +530,39 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 			// Reached end of prevIt without finding offending buckets.
 			return false
 		}
-		prevBucket = prevIt.At()
+		prevBucket = prevIt.strippedAt()
 	}
 }
 // PositiveBucketIterator returns a BucketIterator to iterate over all positive
 // buckets in ascending order (starting next to the zero bucket and going up).
 func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] {
-	return h.floatBucketIterator(true, 0, h.Schema)
+	it := h.floatBucketIterator(true, 0, h.Schema)
 	return &it
 }
 // NegativeBucketIterator returns a BucketIterator to iterate over all negative
 // buckets in descending order (starting next to the zero bucket and going
 // down).
 func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] {
-	return h.floatBucketIterator(false, 0, h.Schema)
+	it := h.floatBucketIterator(false, 0, h.Schema)
 	return &it
 }
 // PositiveReverseBucketIterator returns a BucketIterator to iterate over all
 // positive buckets in descending order (starting at the highest bucket and
 // going down towards the zero bucket).
 func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] {
-	return newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	it := newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
 	return &it
 }
 // NegativeReverseBucketIterator returns a BucketIterator to iterate over all
 // negative buckets in ascending order (starting at the lowest bucket and going
 // up towards the zero bucket).
 func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] {
-	return newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	it := newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
 	return &it
 }
 // AllBucketIterator returns a BucketIterator to iterate over all negative,
@ -569,8 +573,8 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
 func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
 	return &allFloatBucketIterator{
 		h:         h,
-		leftIter:  h.NegativeReverseBucketIterator(),
+		leftIter:  newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false),
-		rightIter: h.PositiveBucketIterator(),
+		rightIter: h.floatBucketIterator(true, 0, h.Schema),
 		state:     -1,
 	}
 }
@ -583,8 +587,8 @@ func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
 func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
 	return &allFloatBucketIterator{
 		h:         h,
-		leftIter:  h.PositiveReverseBucketIterator(),
+		leftIter:  newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true),
-		rightIter: h.NegativeBucketIterator(),
+		rightIter: h.floatBucketIterator(false, 0, h.Schema),
 		state:     -1,
 	}
 }
@ -715,11 +719,11 @@ func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 {
 // targetSchema prior to iterating (without mutating FloatHistogram).
 func (h *FloatHistogram) floatBucketIterator(
 	positive bool, absoluteStartValue float64, targetSchema int32,
-) *floatBucketIterator {
+) floatBucketIterator {
 	if targetSchema > h.Schema {
 		panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema))
 	}
-	i := &floatBucketIterator{
+	i := floatBucketIterator{
 		baseBucketIterator: baseBucketIterator[float64, float64]{
 			schema:   h.Schema,
 			positive: positive,
@ -737,11 +741,11 @@ func (h *FloatHistogram) floatBucketIterator(
 	return i
 }
-// reverseFloatbucketiterator is a low-level constructor for reverse bucket iterators.
+// reverseFloatBucketIterator is a low-level constructor for reverse bucket iterators.
 func newReverseFloatBucketIterator(
 	spans []Span, buckets []float64, schema int32, positive bool,
-) *reverseFloatBucketIterator {
+) reverseFloatBucketIterator {
-	r := &reverseFloatBucketIterator{
+	r := reverseFloatBucketIterator{
 		baseBucketIterator: baseBucketIterator[float64, float64]{
 			schema:   schema,
 			spans:    spans,
@ -769,6 +773,8 @@ type floatBucketIterator struct {
 	targetSchema       int32   // targetSchema is the schema to merge to and must be ≤ schema.
 	origIdx            int32   // The bucket index within the original schema.
 	absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value.
 	boundReachedStartValue bool // Has getBound reached absoluteStartValue already?
 }
 func (i *floatBucketIterator) At() Bucket[float64] {
@ -832,9 +838,10 @@ mergeLoop: // Merge together all buckets from the original schema that fall into
 	}
 	// Skip buckets before absoluteStartValue.
 	// TODO(beorn7): Maybe do something more efficient than this recursive call.
-	if getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
+	if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
 		return i.Next()
 	}
 	i.boundReachedStartValue = true
 	return true
 }
@ -875,8 +882,9 @@ func (i *reverseFloatBucketIterator) Next() bool {
 }
 type allFloatBucketIterator struct {
-	h                   *FloatHistogram
+	h         *FloatHistogram
-	leftIter, rightIter BucketIterator[float64]
+	leftIter  reverseFloatBucketIterator
 	rightIter floatBucketIterator
 	// -1 means we are iterating negative buckets.
 	// 0 means it is time for the zero bucket.
 	// 1 means we are iterating positive buckets.
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@ -16,6 +16,7 @@ package histogram
 import (
 	"fmt"
 	"math"
 	"math/rand"
 	"testing"
 	"github.com/stretchr/testify/require"
@ -2393,3 +2394,51 @@ func TestFloatHistogramSize(t *testing.T) {
 		})
 	}
 }
 func BenchmarkFloatHistogramAllBucketIterator(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	fh := createRandomFloatHistogram(rng, 50)
 	b.ReportAllocs() // the current implementation reports 1 alloc
 	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
 		for it := fh.AllBucketIterator(); it.Next(); {
 		}
 	}
 }
 func BenchmarkFloatHistogramDetectReset(b *testing.B) {
 	rng := rand.New(rand.NewSource(0))
 	fh := createRandomFloatHistogram(rng, 50)
 	b.ReportAllocs() // the current implementation reports 0 allocs
 	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
 		// Detect against the itself (no resets is the worst case input).
 		fh.DetectReset(fh)
 	}
 }
 func createRandomFloatHistogram(rng *rand.Rand, spanNum int32) *FloatHistogram {
 	f := &FloatHistogram{}
 	f.PositiveSpans, f.PositiveBuckets = createRandomSpans(rng, spanNum)
 	f.NegativeSpans, f.NegativeBuckets = createRandomSpans(rng, spanNum)
 	return f
 }
 func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
 	Spans := make([]Span, spanNum)
 	Buckets := make([]float64, 0)
 	for i := 0; i < int(spanNum); i++ {
 		Spans[i].Offset = rng.Int31n(spanNum) + 1
 		Spans[i].Length = uint32(rng.Int31n(spanNum) + 1)
 		for j := 0; j < int(Spans[i].Length); j++ {
 			Buckets = append(Buckets, float64(rng.Int31n(spanNum)+1))
 		}
 	}
 	return Spans, Buckets
 }
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@ -53,6 +53,13 @@ type Bucket[BC BucketCount] struct {
 	Index int32
 }
 // strippedBucket is Bucket without bound values (which are expensive to calculate
 // and not used in certain use cases).
 type strippedBucket[BC BucketCount] struct {
 	count BC
 	index int32
 }
 // String returns a string representation of a Bucket, using the usual
 // mathematical notation of '['/']' for inclusive bounds and '('/')' for
 // non-inclusive bounds.
@ -101,13 +108,12 @@ type baseBucketIterator[BC BucketCount, IBC InternalBucketCount] struct {
 	currIdx   int32 // The actual bucket index.
 }
-func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
+func (b *baseBucketIterator[BC, IBC]) At() Bucket[BC] {
 	return b.at(b.schema)
 }
-// at is an internal version of the exported At to enable using a different
+// at is an internal version of the exported At to enable using a different schema.
-// schema.
+func (b *baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
 func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
 	bucket := Bucket[BC]{
 		Count: BC(b.currCount),
 		Index: b.currIdx,
@ -124,6 +130,14 @@ func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
 	return bucket
 }
 // strippedAt returns current strippedBucket (which lacks bucket bounds but is cheaper to compute).
 func (b *baseBucketIterator[BC, IBC]) strippedAt() strippedBucket[BC] {
 	return strippedBucket[BC]{
 		count: BC(b.currCount),
 		index: b.currIdx,
 	}
 }
 // compactBuckets is a generic function used by both Histogram.Compact and
 // FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
 // deltas. Set it to false if the buckets contain absolute counts.
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@ -150,13 +150,15 @@ func (h *Histogram) ZeroBucket() Bucket[uint64] {
 // PositiveBucketIterator returns a BucketIterator to iterate over all positive
 // buckets in ascending order (starting next to the zero bucket and going up).
 func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] {
-	return newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	it := newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
 	return &it
 }
 // NegativeBucketIterator returns a BucketIterator to iterate over all negative
 // buckets in descending order (starting next to the zero bucket and going down).
 func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] {
-	return newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	it := newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
 	return &it
 }
 // CumulativeBucketIterator returns a BucketIterator to iterate over a
@ -330,14 +332,14 @@ type regularBucketIterator struct {
 	baseBucketIterator[uint64, int64]
 }
-func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) *regularBucketIterator {
+func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) regularBucketIterator {
 	i := baseBucketIterator[uint64, int64]{
 		schema:   schema,
 		spans:    spans,
 		buckets:  buckets,
 		positive: positive,
 	}
-	return &regularBucketIterator{i}
+	return regularBucketIterator{i}
 }
 func (r *regularBucketIterator) Next() bool {