model: move classic NHCB conversion into its own file (#15156)

* model: move classic to NHCB conversion into its own file

In preparation for #14978.

Author: Jeanette Tan <jeanette.tan@grafana.com>  2024-07-03 11:56:48
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Jeanette Tan <jeanette.tan@grafana.com>
Co-authored-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* Better naming from review comment

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* Add doc strings.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

---------

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Jeanette Tan <jeanette.tan@grafana.com>
This commit is contained in:
George Krajcsovits 2024-10-14 14:23:11 +02:00 committed by GitHub
parent c3b8582cf4
commit 522149a2ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 206 additions and 79 deletions

View file

@ -39,6 +39,7 @@ import (
"github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/parser/posrange"
"github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/almost"
"github.com/prometheus/prometheus/util/convertnhcb"
"github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
) )
@ -479,43 +480,22 @@ func (cmd *loadCmd) append(a storage.Appender) error {
return nil return nil
} }
func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) {
mName := m.Get(labels.MetricName)
baseM := labels.NewBuilder(m).
Set(labels.MetricName, strings.TrimSuffix(mName, suffix)).
Del(labels.BucketLabel).
Labels()
hash := baseM.Hash()
return baseM, hash
}
type tempHistogramWrapper struct { type tempHistogramWrapper struct {
metric labels.Labels metric labels.Labels
upperBounds []float64 upperBounds []float64
histogramByTs map[int64]tempHistogram histogramByTs map[int64]convertnhcb.TempHistogram
} }
func newTempHistogramWrapper() tempHistogramWrapper { func newTempHistogramWrapper() tempHistogramWrapper {
return tempHistogramWrapper{ return tempHistogramWrapper{
upperBounds: []float64{}, upperBounds: []float64{},
histogramByTs: map[int64]tempHistogram{}, histogramByTs: map[int64]convertnhcb.TempHistogram{},
} }
} }
type tempHistogram struct { func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) {
bucketCounts map[float64]float64 m2 := convertnhcb.GetHistogramMetricBase(m, suffix)
count float64 m2hash := m2.Hash()
sum float64
}
func newTempHistogram() tempHistogram {
return tempHistogram{
bucketCounts: map[float64]float64{},
}
}
func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) {
m2, m2hash := getHistogramMetricBase(m, suffix)
histogramWrapper, exists := histogramMap[m2hash] histogramWrapper, exists := histogramMap[m2hash]
if !exists { if !exists {
histogramWrapper = newTempHistogramWrapper() histogramWrapper = newTempHistogramWrapper()
@ -530,7 +510,7 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap
} }
histogram, exists := histogramWrapper.histogramByTs[s.T] histogram, exists := histogramWrapper.histogramByTs[s.T]
if !exists { if !exists {
histogram = newTempHistogram() histogram = convertnhcb.NewTempHistogram()
} }
updateHistogram(&histogram, s.F) updateHistogram(&histogram, s.F)
histogramWrapper.histogramByTs[s.T] = histogram histogramWrapper.histogramByTs[s.T] = histogram
@ -538,34 +518,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap
histogramMap[m2hash] = histogramWrapper histogramMap[m2hash] = histogramWrapper
} }
func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) {
sort.Float64s(upperBounds0)
upperBounds := make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE { // deduplicate
upperBounds = append(upperBounds, le)
prevLE = le
}
}
var customBounds []float64
if upperBounds[len(upperBounds)-1] == math.Inf(1) {
customBounds = upperBounds[:len(upperBounds)-1]
} else {
customBounds = upperBounds
}
return upperBounds, &histogram.FloatHistogram{
Count: 0,
Sum: 0,
Schema: histogram.CustomBucketsSchema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: uint32(len(upperBounds))},
},
PositiveBuckets: make([]float64, len(upperBounds)),
CustomValues: customBounds,
}
}
// If classic histograms are defined, convert them into native histograms with custom // If classic histograms are defined, convert them into native histograms with custom
// bounds and append the defined time series to the storage. // bounds and append the defined time series to the storage.
func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
@ -584,16 +536,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
} }
processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) {
histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le)
}, func(histogram *tempHistogram, f float64) { }, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.bucketCounts[le] = f histogram.BucketCounts[le] = f
}) })
case strings.HasSuffix(mName, "_count"): case strings.HasSuffix(mName, "_count"):
processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.count = f histogram.Count = f
}) })
case strings.HasSuffix(mName, "_sum"): case strings.HasSuffix(mName, "_sum"):
processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.sum = f histogram.Sum = f
}) })
} }
} }
@ -601,30 +553,21 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
// Convert the collated classic histogram data into native histograms // Convert the collated classic histogram data into native histograms
// with custom bounds and append them to the storage. // with custom bounds and append them to the storage.
for _, histogramWrapper := range histogramMap { for _, histogramWrapper := range histogramMap {
upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true)
fhBase := hBase.ToFloat(nil)
samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs))
for t, histogram := range histogramWrapper.histogramByTs { for t, histogram := range histogramWrapper.histogramByTs {
fh := fhBase.Copy() h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase)
var prevCount, total float64 if fh == nil {
for i, le := range upperBounds { if err := h.Validate(); err != nil {
currCount, exists := histogram.bucketCounts[le] return err
if !exists {
currCount = 0
} }
count := currCount - prevCount fh = h.ToFloat(nil)
fh.PositiveBuckets[i] = count
total += count
prevCount = currCount
} }
fh.Sum = histogram.sum if err := fh.Validate(); err != nil {
if histogram.count != 0 {
total = histogram.count
}
fh.Count = total
s := promql.Sample{T: t, H: fh.Compact(0)}
if err := s.H.Validate(); err != nil {
return err return err
} }
s := promql.Sample{T: t, H: fh}
samples = append(samples, s) samples = append(samples, s)
} }
sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T }) sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T })

View file

@ -0,0 +1,184 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package convertnhcb
import (
"fmt"
"math"
"sort"
"strings"
"github.com/grafana/regexp"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
)
var histogramNameSuffixReplacements = []struct {
pattern *regexp.Regexp
repl string
}{
{
pattern: regexp.MustCompile(`_bucket$`),
repl: "",
},
{
pattern: regexp.MustCompile(`_sum$`),
repl: "",
},
{
pattern: regexp.MustCompile(`_count$`),
repl: "",
},
}
// TempHistogram is used to collect information about classic histogram
// samples incrementally before creating a histogram.Histogram or
// histogram.FloatHistogram based on the values collected.
type TempHistogram struct {
BucketCounts map[float64]float64
Count float64
Sum float64
HasFloat bool
}
// NewTempHistogram creates a new TempHistogram to
// collect information about classic histogram samples.
func NewTempHistogram() TempHistogram {
return TempHistogram{
BucketCounts: map[float64]float64{},
}
}
func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) {
bucketCounts := map[float64]int64{}
for le, count := range h.BucketCounts {
intCount := int64(math.Round(count))
if float64(intCount) != count {
return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le)
}
bucketCounts[le] = intCount
}
return bucketCounts, nil
}
// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native
// histogram with custom buckets based on the provided upper bounds.
// Everything is set except the bucket counts.
// The sorted upper bounds are also returned.
func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) {
sort.Float64s(upperBounds0)
var upperBounds []float64
if needsDedup {
upperBounds = make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE {
upperBounds = append(upperBounds, le)
prevLE = le
}
}
} else {
upperBounds = upperBounds0
}
var customBounds []float64
if upperBounds[len(upperBounds)-1] == math.Inf(1) {
customBounds = upperBounds[:len(upperBounds)-1]
} else {
customBounds = upperBounds
}
return upperBounds, &histogram.Histogram{
Count: 0,
Sum: 0,
Schema: histogram.CustomBucketsSchema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: uint32(len(upperBounds))},
},
PositiveBuckets: make([]int64, len(upperBounds)),
CustomValues: customBounds,
}
}
// NewHistogram fills the bucket counts in the provided histogram.Histogram
// or histogram.FloatHistogram based on the provided temporary histogram and
// upper bounds.
func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) {
intBucketCounts, err := histogram.getIntBucketCounts()
if err != nil {
return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase)
}
return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil
}
func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram {
h := hBase.Copy()
absBucketCounts := make([]int64, len(h.PositiveBuckets))
var prevCount, total int64
for i, le := range upperBounds {
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
count := currCount - prevCount
absBucketCounts[i] = count
total += count
prevCount = currCount
}
h.PositiveBuckets[0] = absBucketCounts[0]
for i := 1; i < len(h.PositiveBuckets); i++ {
h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1]
}
h.Sum = histogram.Sum
if histogram.Count != 0 {
total = int64(histogram.Count)
}
h.Count = uint64(total)
return h.Compact(0)
}
func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram {
fh := fhBase.Copy()
var prevCount, total float64
for i, le := range upperBounds {
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
count := currCount - prevCount
fh.PositiveBuckets[i] = count
total += count
prevCount = currCount
}
fh.Sum = histogram.Sum
if histogram.Count != 0 {
total = histogram.Count
}
fh.Count = total
return fh.Compact(0)
}
func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels {
mName := m.Get(labels.MetricName)
return labels.NewBuilder(m).
Set(labels.MetricName, strings.TrimSuffix(mName, suffix)).
Del(labels.BucketLabel).
Labels()
}
func GetHistogramMetricBaseName(s string) string {
for _, rep := range histogramNameSuffixReplacements {
s = rep.pattern.ReplaceAllString(s, rep.repl)
}
return s
}