convert classic histograms to int nhcb where possible instead

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
This commit is contained in:
Jeanette Tan 2024-07-03 17:56:48 +08:00 committed by György Krajcsovits
parent 62e7f0438d
commit 4503145c8b
3 changed files with 94 additions and 25 deletions

View file

@ -534,10 +534,17 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
// Convert the collated classic histogram data into native histograms
// with custom bounds and append them to the storage.
for _, histogramWrapper := range histogramMap {
upperBounds, fhBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds)
upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true)
fhBase := hBase.ToFloat(nil)
samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs))
for t, histogram := range histogramWrapper.histogramByTs {
fh := convertnhcb.ConvertHistogramWrapper(histogram, upperBounds, fhBase)
h, fh := convertnhcb.ConvertHistogramWrapper(histogram, upperBounds, hBase, fhBase)
if fh == nil {
if err := h.Validate(); err != nil {
return err
}
fh = h.ToFloat(nil)
}
if err := fh.Validate(); err != nil {
return err
}

View file

@ -1812,15 +1812,24 @@ loop:
for b := range th.BucketCounts {
ub = append(ub, b)
}
upperBounds, fhBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub)
fh := convertnhcb.ConvertHistogramWrapper(th, upperBounds, fhBase)
if err := fh.Validate(); err != nil {
continue
}
upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub, false)
fhBase := hBase.ToFloat(nil)
h, fh := convertnhcb.ConvertHistogramWrapper(th, upperBounds, hBase, fhBase)
// fmt.Printf("FINAL lset: %s, timestamp: %v, val: %v\n", lset, defTime, fh)
_, err = app.AppendHistogram(0, lset, defTime, nil, fh)
if err != nil {
continue
if h != nil {
if err := h.Validate(); err != nil {
continue
}
if _, err = app.AppendHistogram(0, lset, defTime, h, nil); err != nil {
continue
}
} else if fh != nil {
if err := fh.Validate(); err != nil {
continue
}
if _, err = app.AppendHistogram(0, lset, defTime, nil, fh); err != nil {
continue
}
}
}
sl.cache.resetNhcb()

View file

@ -14,6 +14,7 @@
package convertnhcb
import (
"fmt"
"math"
"sort"
"strings"
@ -26,6 +27,7 @@ type TempHistogram struct {
BucketCounts map[float64]float64
Count float64
Sum float64
HasFloat bool
}
func NewTempHistogram() TempHistogram {
@ -34,15 +36,32 @@ func NewTempHistogram() TempHistogram {
}
}
func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) {
sort.Float64s(upperBounds0)
upperBounds := make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE { // deduplicate
upperBounds = append(upperBounds, le)
prevLE = le
func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) {
bucketCounts := map[float64]int64{}
for le, count := range h.BucketCounts {
intCount := int64(math.Round(count))
if float64(intCount) != count {
return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le)
}
bucketCounts[le] = intCount
}
return bucketCounts, nil
}
func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) {
sort.Float64s(upperBounds0)
var upperBounds []float64
if needsDedup {
upperBounds = make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE {
upperBounds = append(upperBounds, le)
prevLE = le
}
}
} else {
upperBounds = upperBounds0
}
var customBounds []float64
if upperBounds[len(upperBounds)-1] == math.Inf(1) {
@ -50,23 +69,57 @@ func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64
} else {
customBounds = upperBounds
}
return upperBounds, &histogram.FloatHistogram{
return upperBounds, &histogram.Histogram{
Count: 0,
Sum: 0,
Schema: histogram.CustomBucketsSchema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: uint32(len(upperBounds))},
},
PositiveBuckets: make([]float64, len(upperBounds)),
PositiveBuckets: make([]int64, len(upperBounds)),
CustomValues: customBounds,
}
}
func ConvertHistogramWrapper(hist TempHistogram, upperBounds []float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram {
func ConvertHistogramWrapper(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) {
intBucketCounts, err := histogram.getIntBucketCounts()
if err != nil {
return nil, convertFloatHistogramWrapper(histogram, upperBounds, histogram.BucketCounts, fhBase)
}
return convertIntHistogramWrapper(histogram, upperBounds, intBucketCounts, hBase), nil
}
func convertIntHistogramWrapper(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram {
h := hBase.Copy()
absBucketCounts := make([]int64, len(h.PositiveBuckets))
var prevCount, total int64
for i, le := range upperBounds {
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
count := currCount - prevCount
absBucketCounts[i] = count
total += count
prevCount = currCount
}
h.PositiveBuckets[0] = absBucketCounts[0]
for i := 1; i < len(h.PositiveBuckets); i++ {
h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1]
}
h.Sum = histogram.Sum
if histogram.Count != 0 {
total = int64(histogram.Count)
}
h.Count = uint64(total)
return h.Compact(0)
}
func convertFloatHistogramWrapper(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram {
fh := fhBase.Copy()
var prevCount, total float64
for i, le := range upperBounds {
currCount, exists := hist.BucketCounts[le]
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
@ -75,9 +128,9 @@ func ConvertHistogramWrapper(hist TempHistogram, upperBounds []float64, fhBase *
total += count
prevCount = currCount
}
fh.Sum = hist.Sum
if hist.Count != 0 {
total = hist.Count
fh.Sum = histogram.Sum
if histogram.Count != 0 {
total = histogram.Count
}
fh.Count = total
return fh.Compact(0)