Merge pull request #12954 from linasm/linasm/optimize-floatBucketIterator2

Histogram performance: optimize floatBucketIterator
This commit is contained in:
Björn Rabenstein 2023-11-02 23:59:32 +01:00 committed by GitHub
commit fae4561369
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 37 deletions

View file

@ -466,25 +466,25 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
} }
currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema) currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema) prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
if detectReset(currIt, prevIt) { if detectReset(&currIt, &prevIt) {
return true return true
} }
currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema) currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema) prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
return detectReset(currIt, prevIt) return detectReset(&currIt, &prevIt)
} }
func detectReset(currIt, prevIt BucketIterator[float64]) bool { func detectReset(currIt, prevIt *floatBucketIterator) bool {
if !prevIt.Next() { if !prevIt.Next() {
return false // If no buckets in previous histogram, nothing can be reset. return false // If no buckets in previous histogram, nothing can be reset.
} }
prevBucket := prevIt.At() prevBucket := prevIt.strippedAt()
if !currIt.Next() { if !currIt.Next() {
// No bucket in current, but at least one in previous // No bucket in current, but at least one in previous
// histogram. Check if any of those are non-zero, in which case // histogram. Check if any of those are non-zero, in which case
// this is a reset. // this is a reset.
for { for {
if prevBucket.Count != 0 { if prevBucket.count != 0 {
return true return true
} }
if !prevIt.Next() { if !prevIt.Next() {
@ -492,10 +492,10 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
} }
} }
} }
currBucket := currIt.At() currBucket := currIt.strippedAt()
for { for {
// Forward currIt until we find the bucket corresponding to prevBucket. // Forward currIt until we find the bucket corresponding to prevBucket.
for currBucket.Index < prevBucket.Index { for currBucket.index < prevBucket.index {
if !currIt.Next() { if !currIt.Next() {
// Reached end of currIt early, therefore // Reached end of currIt early, therefore
// previous histogram has a bucket that the // previous histogram has a bucket that the
@ -503,7 +503,7 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// remaining buckets in the previous histogram // remaining buckets in the previous histogram
// are unpopulated, this is a reset. // are unpopulated, this is a reset.
for { for {
if prevBucket.Count != 0 { if prevBucket.count != 0 {
return true return true
} }
if !prevIt.Next() { if !prevIt.Next() {
@ -511,18 +511,18 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
} }
} }
} }
currBucket = currIt.At() currBucket = currIt.strippedAt()
} }
if currBucket.Index > prevBucket.Index { if currBucket.index > prevBucket.index {
// Previous histogram has a bucket the current one does // Previous histogram has a bucket the current one does
// not have. If it's populated, it's a reset. // not have. If it's populated, it's a reset.
if prevBucket.Count != 0 { if prevBucket.count != 0 {
return true return true
} }
} else { } else {
// We have reached corresponding buckets in both iterators. // We have reached corresponding buckets in both iterators.
// We can finally compare the counts. // We can finally compare the counts.
if currBucket.Count < prevBucket.Count { if currBucket.count < prevBucket.count {
return true return true
} }
} }
@ -530,35 +530,39 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// Reached end of prevIt without finding offending buckets. // Reached end of prevIt without finding offending buckets.
return false return false
} }
prevBucket = prevIt.At() prevBucket = prevIt.strippedAt()
} }
} }
// PositiveBucketIterator returns a BucketIterator to iterate over all positive // PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up). // buckets in ascending order (starting next to the zero bucket and going up).
func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(true, 0, h.Schema) it := h.floatBucketIterator(true, 0, h.Schema)
return &it
} }
// NegativeBucketIterator returns a BucketIterator to iterate over all negative // NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going // buckets in descending order (starting next to the zero bucket and going
// down). // down).
func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(false, 0, h.Schema) it := h.floatBucketIterator(false, 0, h.Schema)
return &it
} }
// PositiveReverseBucketIterator returns a BucketIterator to iterate over all // PositiveReverseBucketIterator returns a BucketIterator to iterate over all
// positive buckets in descending order (starting at the highest bucket and // positive buckets in descending order (starting at the highest bucket and
// going down towards the zero bucket). // going down towards the zero bucket).
func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true) it := newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
} }
// NegativeReverseBucketIterator returns a BucketIterator to iterate over all // NegativeReverseBucketIterator returns a BucketIterator to iterate over all
// negative buckets in ascending order (starting at the lowest bucket and going // negative buckets in ascending order (starting at the lowest bucket and going
// up towards the zero bucket). // up towards the zero bucket).
func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false) it := newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
} }
// AllBucketIterator returns a BucketIterator to iterate over all negative, // AllBucketIterator returns a BucketIterator to iterate over all negative,
@ -569,8 +573,8 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{ return &allFloatBucketIterator{
h: h, h: h,
leftIter: h.NegativeReverseBucketIterator(), leftIter: newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false),
rightIter: h.PositiveBucketIterator(), rightIter: h.floatBucketIterator(true, 0, h.Schema),
state: -1, state: -1,
} }
} }
@ -583,8 +587,8 @@ func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{ return &allFloatBucketIterator{
h: h, h: h,
leftIter: h.PositiveReverseBucketIterator(), leftIter: newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true),
rightIter: h.NegativeBucketIterator(), rightIter: h.floatBucketIterator(false, 0, h.Schema),
state: -1, state: -1,
} }
} }
@ -715,11 +719,11 @@ func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 {
// targetSchema prior to iterating (without mutating FloatHistogram). // targetSchema prior to iterating (without mutating FloatHistogram).
func (h *FloatHistogram) floatBucketIterator( func (h *FloatHistogram) floatBucketIterator(
positive bool, absoluteStartValue float64, targetSchema int32, positive bool, absoluteStartValue float64, targetSchema int32,
) *floatBucketIterator { ) floatBucketIterator {
if targetSchema > h.Schema { if targetSchema > h.Schema {
panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema)) panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema))
} }
i := &floatBucketIterator{ i := floatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{ baseBucketIterator: baseBucketIterator[float64, float64]{
schema: h.Schema, schema: h.Schema,
positive: positive, positive: positive,
@ -737,11 +741,11 @@ func (h *FloatHistogram) floatBucketIterator(
return i return i
} }
// reverseFloatbucketiterator is a low-level constructor for reverse bucket iterators. // reverseFloatBucketIterator is a low-level constructor for reverse bucket iterators.
func newReverseFloatBucketIterator( func newReverseFloatBucketIterator(
spans []Span, buckets []float64, schema int32, positive bool, spans []Span, buckets []float64, schema int32, positive bool,
) *reverseFloatBucketIterator { ) reverseFloatBucketIterator {
r := &reverseFloatBucketIterator{ r := reverseFloatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{ baseBucketIterator: baseBucketIterator[float64, float64]{
schema: schema, schema: schema,
spans: spans, spans: spans,
@ -769,6 +773,8 @@ type floatBucketIterator struct {
targetSchema int32 // targetSchema is the schema to merge to and must be ≤ schema. targetSchema int32 // targetSchema is the schema to merge to and must be ≤ schema.
origIdx int32 // The bucket index within the original schema. origIdx int32 // The bucket index within the original schema.
absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value. absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value.
boundReachedStartValue bool // Has getBound reached absoluteStartValue already?
} }
func (i *floatBucketIterator) At() Bucket[float64] { func (i *floatBucketIterator) At() Bucket[float64] {
@ -832,9 +838,10 @@ mergeLoop: // Merge together all buckets from the original schema that fall into
} }
// Skip buckets before absoluteStartValue. // Skip buckets before absoluteStartValue.
// TODO(beorn7): Maybe do something more efficient than this recursive call. // TODO(beorn7): Maybe do something more efficient than this recursive call.
if getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue { if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
return i.Next() return i.Next()
} }
i.boundReachedStartValue = true
return true return true
} }
@ -875,8 +882,9 @@ func (i *reverseFloatBucketIterator) Next() bool {
} }
type allFloatBucketIterator struct { type allFloatBucketIterator struct {
h *FloatHistogram h *FloatHistogram
leftIter, rightIter BucketIterator[float64] leftIter reverseFloatBucketIterator
rightIter floatBucketIterator
// -1 means we are iterating negative buckets. // -1 means we are iterating negative buckets.
// 0 means it is time for the zero bucket. // 0 means it is time for the zero bucket.
// 1 means we are iterating positive buckets. // 1 means we are iterating positive buckets.

View file

@ -16,6 +16,7 @@ package histogram
import ( import (
"fmt" "fmt"
"math" "math"
"math/rand"
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -2393,3 +2394,51 @@ func TestFloatHistogramSize(t *testing.T) {
}) })
} }
} }
func BenchmarkFloatHistogramAllBucketIterator(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 1 alloc
b.ResetTimer()
for n := 0; n < b.N; n++ {
for it := fh.AllBucketIterator(); it.Next(); {
}
}
}
func BenchmarkFloatHistogramDetectReset(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 0 allocs
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Detect against the itself (no resets is the worst case input).
fh.DetectReset(fh)
}
}
func createRandomFloatHistogram(rng *rand.Rand, spanNum int32) *FloatHistogram {
f := &FloatHistogram{}
f.PositiveSpans, f.PositiveBuckets = createRandomSpans(rng, spanNum)
f.NegativeSpans, f.NegativeBuckets = createRandomSpans(rng, spanNum)
return f
}
func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
Spans := make([]Span, spanNum)
Buckets := make([]float64, 0)
for i := 0; i < int(spanNum); i++ {
Spans[i].Offset = rng.Int31n(spanNum) + 1
Spans[i].Length = uint32(rng.Int31n(spanNum) + 1)
for j := 0; j < int(Spans[i].Length); j++ {
Buckets = append(Buckets, float64(rng.Int31n(spanNum)+1))
}
}
return Spans, Buckets
}

View file

@ -53,6 +53,13 @@ type Bucket[BC BucketCount] struct {
Index int32 Index int32
} }
// strippedBucket is Bucket without bound values (which are expensive to calculate
// and not used in certain use cases).
type strippedBucket[BC BucketCount] struct {
count BC
index int32
}
// String returns a string representation of a Bucket, using the usual // String returns a string representation of a Bucket, using the usual
// mathematical notation of '['/']' for inclusive bounds and '('/')' for // mathematical notation of '['/']' for inclusive bounds and '('/')' for
// non-inclusive bounds. // non-inclusive bounds.
@ -101,13 +108,12 @@ type baseBucketIterator[BC BucketCount, IBC InternalBucketCount] struct {
currIdx int32 // The actual bucket index. currIdx int32 // The actual bucket index.
} }
func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] { func (b *baseBucketIterator[BC, IBC]) At() Bucket[BC] {
return b.at(b.schema) return b.at(b.schema)
} }
// at is an internal version of the exported At to enable using a different // at is an internal version of the exported At to enable using a different schema.
// schema. func (b *baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
bucket := Bucket[BC]{ bucket := Bucket[BC]{
Count: BC(b.currCount), Count: BC(b.currCount),
Index: b.currIdx, Index: b.currIdx,
@ -124,6 +130,14 @@ func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
return bucket return bucket
} }
// strippedAt returns current strippedBucket (which lacks bucket bounds but is cheaper to compute).
func (b *baseBucketIterator[BC, IBC]) strippedAt() strippedBucket[BC] {
return strippedBucket[BC]{
count: BC(b.currCount),
index: b.currIdx,
}
}
// compactBuckets is a generic function used by both Histogram.Compact and // compactBuckets is a generic function used by both Histogram.Compact and
// FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are // FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
// deltas. Set it to false if the buckets contain absolute counts. // deltas. Set it to false if the buckets contain absolute counts.

View file

@ -150,13 +150,15 @@ func (h *Histogram) ZeroBucket() Bucket[uint64] {
// PositiveBucketIterator returns a BucketIterator to iterate over all positive // PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up). // buckets in ascending order (starting next to the zero bucket and going up).
func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] { func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true) it := newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
} }
// NegativeBucketIterator returns a BucketIterator to iterate over all negative // NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going down). // buckets in descending order (starting next to the zero bucket and going down).
func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] { func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false) it := newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
} }
// CumulativeBucketIterator returns a BucketIterator to iterate over a // CumulativeBucketIterator returns a BucketIterator to iterate over a
@ -330,14 +332,14 @@ type regularBucketIterator struct {
baseBucketIterator[uint64, int64] baseBucketIterator[uint64, int64]
} }
func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) *regularBucketIterator { func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) regularBucketIterator {
i := baseBucketIterator[uint64, int64]{ i := baseBucketIterator[uint64, int64]{
schema: schema, schema: schema,
spans: spans, spans: spans,
buckets: buckets, buckets: buckets,
positive: positive, positive: positive,
} }
return &regularBucketIterator{i} return regularBucketIterator{i}
} }
func (r *regularBucketIterator) Next() bool { func (r *regularBucketIterator) Next() bool {