prometheus/model/histogram/generic.go
Björn Rabenstein dccfb9db4e
histogram: Remove code replication via generics (#11361)
* histogram: Simplify iterators

We don't really need currLower and currUpper and can calculate it when
needed (as already done for the floatBucketIterator). The calculation
is cheap, while keeping those extra variables around costs RAM
(potentially a lot with many iterators).

* histogram: Convert Bucket/FloatBucket to one generic type

* histogram: Move some bucket iterator code into generic base iterator

* histogram: Remove cumulative iterator for FloatHistogram

We added it in the past for completeness (Histogram has one), but it
has never been used. Plus, even the cumulative iterator for Histogram
is only there for test reasons.

We can always add it back, and then maybe even using generics.

Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-03 16:45:27 +05:30

328 lines
9.6 KiB
Go

// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
import (
"fmt"
"strings"
)
// BucketCount is a type constraint for the count in a bucket, which can be
// float64 (for type FloatHistogram) or uint64 (for type Histogram).
type BucketCount interface {
float64 | uint64
}
// internalBucketCount is used internally by Histogram and FloatHistogram. The
// difference to the BucketCount above is that Histogram internally uses deltas
// between buckets rather than absolute counts (while FloatHistogram uses
// absolute counts directly). Go type parameters don't allow type
// specialization. Therefore, where special treatment of deltas between buckets
// vs. absolute counts is important, this information has to be provided as a
// separate boolean parameter "deltaBuckets"
type internalBucketCount interface {
float64 | int64
}
// Bucket represents a bucket with lower and upper limit and the absolute count
// of samples in the bucket. It also specifies if each limit is inclusive or
// not. (Mathematically, inclusive limits create a closed interval, and
// non-inclusive limits an open interval.)
//
// To represent cumulative buckets, Lower is set to -Inf, and the Count is then
// cumulative (including the counts of all buckets for smaller values).
type Bucket[BC BucketCount] struct {
Lower, Upper float64
LowerInclusive, UpperInclusive bool
Count BC
// Index within schema. To easily compare buckets that share the same
// schema and sign (positive or negative). Irrelevant for the zero bucket.
Index int32
}
// String returns a string representation of a Bucket, using the usual
// mathematical notation of '['/']' for inclusive bounds and '('/')' for
// non-inclusive bounds.
func (b Bucket[BC]) String() string {
var sb strings.Builder
if b.LowerInclusive {
sb.WriteRune('[')
} else {
sb.WriteRune('(')
}
fmt.Fprintf(&sb, "%g,%g", b.Lower, b.Upper)
if b.UpperInclusive {
sb.WriteRune(']')
} else {
sb.WriteRune(')')
}
fmt.Fprintf(&sb, ":%v", b.Count)
return sb.String()
}
// BucketIterator iterates over the buckets of a Histogram, returning decoded
// buckets.
type BucketIterator[BC BucketCount] interface {
// Next advances the iterator by one.
Next() bool
// At returns the current bucket.
At() Bucket[BC]
}
// baseBucketIterator provides a struct that is shared by most BucketIterator
// implementations, together with an implementation of the At method. This
// iterator can be embedded in full implementations of BucketIterator to save on
// code replication.
type baseBucketIterator[BC BucketCount, IBC internalBucketCount] struct {
schema int32
spans []Span
buckets []IBC
positive bool // Whether this is for positive buckets.
spansIdx int // Current span within spans slice.
idxInSpan uint32 // Index in the current span. 0 <= idxInSpan < span.Length.
bucketsIdx int // Current bucket within buckets slice.
currCount IBC // Count in the current bucket.
currIdx int32 // The actual bucket index.
}
func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
bucket := Bucket[BC]{
Count: BC(b.currCount),
Index: b.currIdx,
}
if b.positive {
bucket.Upper = getBound(b.currIdx, b.schema)
bucket.Lower = getBound(b.currIdx-1, b.schema)
} else {
bucket.Lower = -getBound(b.currIdx, b.schema)
bucket.Upper = -getBound(b.currIdx-1, b.schema)
}
bucket.LowerInclusive = bucket.Lower < 0
bucket.UpperInclusive = bucket.Upper > 0
return bucket
}
// compactBuckets is a generic function used by both Histogram.Compact and
// FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
// deltas. Set it to false if the buckets contain absolute counts.
func compactBuckets[IBC internalBucketCount](buckets []IBC, spans []Span, maxEmptyBuckets int, deltaBuckets bool) ([]IBC, []Span) {
// Fast path: If there are no empty buckets AND no offset in any span is
// <= maxEmptyBuckets AND no span has length 0, there is nothing to do and we can return
// immediately. We check that first because it's cheap and presumably
// common.
nothingToDo := true
var currentBucketAbsolute IBC
for _, bucket := range buckets {
if deltaBuckets {
currentBucketAbsolute += bucket
} else {
currentBucketAbsolute = bucket
}
if currentBucketAbsolute == 0 {
nothingToDo = false
break
}
}
if nothingToDo {
for _, span := range spans {
if int(span.Offset) <= maxEmptyBuckets || span.Length == 0 {
nothingToDo = false
break
}
}
if nothingToDo {
return buckets, spans
}
}
var iBucket, iSpan int
var posInSpan uint32
currentBucketAbsolute = 0
// Helper function.
emptyBucketsHere := func() int {
i := 0
abs := currentBucketAbsolute
for uint32(i)+posInSpan < spans[iSpan].Length && abs == 0 {
i++
if i+iBucket >= len(buckets) {
break
}
abs = buckets[i+iBucket]
}
return i
}
// Merge spans with zero-offset to avoid special cases later.
if len(spans) > 1 {
for i, span := range spans[1:] {
if span.Offset == 0 {
spans[iSpan].Length += span.Length
continue
}
iSpan++
if i+1 != iSpan {
spans[iSpan] = span
}
}
spans = spans[:iSpan+1]
iSpan = 0
}
// Merge spans with zero-length to avoid special cases later.
for i, span := range spans {
if span.Length == 0 {
if i+1 < len(spans) {
spans[i+1].Offset += span.Offset
}
continue
}
if i != iSpan {
spans[iSpan] = span
}
iSpan++
}
spans = spans[:iSpan]
iSpan = 0
// Cut out empty buckets from start and end of spans, no matter
// what. Also cut out empty buckets from the middle of a span but only
// if there are more than maxEmptyBuckets consecutive empty buckets.
for iBucket < len(buckets) {
if deltaBuckets {
currentBucketAbsolute += buckets[iBucket]
} else {
currentBucketAbsolute = buckets[iBucket]
}
if nEmpty := emptyBucketsHere(); nEmpty > 0 {
if posInSpan > 0 &&
nEmpty < int(spans[iSpan].Length-posInSpan) &&
nEmpty <= maxEmptyBuckets {
// The empty buckets are in the middle of a
// span, and there are few enough to not bother.
// Just fast-forward.
iBucket += nEmpty
if deltaBuckets {
currentBucketAbsolute = 0
}
posInSpan += uint32(nEmpty)
continue
}
// In all other cases, we cut out the empty buckets.
if deltaBuckets && iBucket+nEmpty < len(buckets) {
currentBucketAbsolute = -buckets[iBucket]
buckets[iBucket+nEmpty] += buckets[iBucket]
}
buckets = append(buckets[:iBucket], buckets[iBucket+nEmpty:]...)
if posInSpan == 0 {
// Start of span.
if nEmpty == int(spans[iSpan].Length) {
// The whole span is empty.
offset := spans[iSpan].Offset
spans = append(spans[:iSpan], spans[iSpan+1:]...)
if len(spans) > iSpan {
spans[iSpan].Offset += offset + int32(nEmpty)
}
continue
}
spans[iSpan].Length -= uint32(nEmpty)
spans[iSpan].Offset += int32(nEmpty)
continue
}
// It's in the middle or in the end of the span.
// Split the current span.
newSpan := Span{
Offset: int32(nEmpty),
Length: spans[iSpan].Length - posInSpan - uint32(nEmpty),
}
spans[iSpan].Length = posInSpan
// In any case, we have to split to the next span.
iSpan++
posInSpan = 0
if newSpan.Length == 0 {
// The span is empty, so we were already at the end of a span.
// We don't have to insert the new span, just adjust the next
// span's offset, if there is one.
if iSpan < len(spans) {
spans[iSpan].Offset += int32(nEmpty)
}
continue
}
// Insert the new span.
spans = append(spans, Span{})
if iSpan+1 < len(spans) {
copy(spans[iSpan+1:], spans[iSpan:])
}
spans[iSpan] = newSpan
continue
}
iBucket++
posInSpan++
if posInSpan >= spans[iSpan].Length {
posInSpan = 0
iSpan++
}
}
if maxEmptyBuckets == 0 || len(buckets) == 0 {
return buckets, spans
}
// Finally, check if any offsets between spans are small enough to merge
// the spans.
iBucket = int(spans[0].Length)
if deltaBuckets {
currentBucketAbsolute = 0
for _, bucket := range buckets[:iBucket] {
currentBucketAbsolute += bucket
}
}
iSpan = 1
for iSpan < len(spans) {
if int(spans[iSpan].Offset) > maxEmptyBuckets {
l := int(spans[iSpan].Length)
if deltaBuckets {
for _, bucket := range buckets[iBucket : iBucket+l] {
currentBucketAbsolute += bucket
}
}
iBucket += l
iSpan++
continue
}
// Merge span with previous one and insert empty buckets.
offset := int(spans[iSpan].Offset)
spans[iSpan-1].Length += uint32(offset) + spans[iSpan].Length
spans = append(spans[:iSpan], spans[iSpan+1:]...)
newBuckets := make([]IBC, len(buckets)+offset)
copy(newBuckets, buckets[:iBucket])
copy(newBuckets[iBucket+offset:], buckets[iBucket:])
if deltaBuckets {
newBuckets[iBucket] = -currentBucketAbsolute
newBuckets[iBucket+offset] += currentBucketAbsolute
}
iBucket += offset
buckets = newBuckets
currentBucketAbsolute = buckets[iBucket]
// Note that with many merges, it would be more efficient to
// first record all the chunks of empty buckets to insert and
// then do it in one go through all the buckets.
}
return buckets, spans
}