model: move classic NHCB conversion into its own file (#15156)

* model: move classic to NHCB conversion into its own file

In preparation for #14978.

Author: Jeanette Tan <jeanette.tan@grafana.com>  2024-07-03 11:56:48
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Jeanette Tan <jeanette.tan@grafana.com>
Co-authored-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* Better naming from review comment

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* Add doc strings.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

---------

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Jeanette Tan <jeanette.tan@grafana.com>
This commit is contained in:
George Krajcsovits 2024-10-14 14:23:11 +02:00 committed by GitHub
parent c3b8582cf4
commit 522149a2ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 206 additions and 79 deletions

View file

@ -39,6 +39,7 @@ import (
"github.com/prometheus/prometheus/promql/parser/posrange"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/almost"
"github.com/prometheus/prometheus/util/convertnhcb"
"github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil"
)
@ -479,43 +480,22 @@ func (cmd *loadCmd) append(a storage.Appender) error {
return nil
}
func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) {
mName := m.Get(labels.MetricName)
baseM := labels.NewBuilder(m).
Set(labels.MetricName, strings.TrimSuffix(mName, suffix)).
Del(labels.BucketLabel).
Labels()
hash := baseM.Hash()
return baseM, hash
}
type tempHistogramWrapper struct {
metric labels.Labels
upperBounds []float64
histogramByTs map[int64]tempHistogram
histogramByTs map[int64]convertnhcb.TempHistogram
}
func newTempHistogramWrapper() tempHistogramWrapper {
return tempHistogramWrapper{
upperBounds: []float64{},
histogramByTs: map[int64]tempHistogram{},
histogramByTs: map[int64]convertnhcb.TempHistogram{},
}
}
type tempHistogram struct {
bucketCounts map[float64]float64
count float64
sum float64
}
func newTempHistogram() tempHistogram {
return tempHistogram{
bucketCounts: map[float64]float64{},
}
}
func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) {
m2, m2hash := getHistogramMetricBase(m, suffix)
func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) {
m2 := convertnhcb.GetHistogramMetricBase(m, suffix)
m2hash := m2.Hash()
histogramWrapper, exists := histogramMap[m2hash]
if !exists {
histogramWrapper = newTempHistogramWrapper()
@ -530,7 +510,7 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap
}
histogram, exists := histogramWrapper.histogramByTs[s.T]
if !exists {
histogram = newTempHistogram()
histogram = convertnhcb.NewTempHistogram()
}
updateHistogram(&histogram, s.F)
histogramWrapper.histogramByTs[s.T] = histogram
@ -538,34 +518,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap
histogramMap[m2hash] = histogramWrapper
}
func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) {
sort.Float64s(upperBounds0)
upperBounds := make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE { // deduplicate
upperBounds = append(upperBounds, le)
prevLE = le
}
}
var customBounds []float64
if upperBounds[len(upperBounds)-1] == math.Inf(1) {
customBounds = upperBounds[:len(upperBounds)-1]
} else {
customBounds = upperBounds
}
return upperBounds, &histogram.FloatHistogram{
Count: 0,
Sum: 0,
Schema: histogram.CustomBucketsSchema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: uint32(len(upperBounds))},
},
PositiveBuckets: make([]float64, len(upperBounds)),
CustomValues: customBounds,
}
}
// If classic histograms are defined, convert them into native histograms with custom
// bounds and append the defined time series to the storage.
func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
@ -584,16 +536,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
}
processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) {
histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le)
}, func(histogram *tempHistogram, f float64) {
histogram.bucketCounts[le] = f
}, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.BucketCounts[le] = f
})
case strings.HasSuffix(mName, "_count"):
processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) {
histogram.count = f
processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.Count = f
})
case strings.HasSuffix(mName, "_sum"):
processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) {
histogram.sum = f
processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) {
histogram.Sum = f
})
}
}
@ -601,30 +553,21 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error {
// Convert the collated classic histogram data into native histograms
// with custom bounds and append them to the storage.
for _, histogramWrapper := range histogramMap {
upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds)
upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true)
fhBase := hBase.ToFloat(nil)
samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs))
for t, histogram := range histogramWrapper.histogramByTs {
fh := fhBase.Copy()
var prevCount, total float64
for i, le := range upperBounds {
currCount, exists := histogram.bucketCounts[le]
if !exists {
currCount = 0
h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase)
if fh == nil {
if err := h.Validate(); err != nil {
return err
}
count := currCount - prevCount
fh.PositiveBuckets[i] = count
total += count
prevCount = currCount
fh = h.ToFloat(nil)
}
fh.Sum = histogram.sum
if histogram.count != 0 {
total = histogram.count
}
fh.Count = total
s := promql.Sample{T: t, H: fh.Compact(0)}
if err := s.H.Validate(); err != nil {
if err := fh.Validate(); err != nil {
return err
}
s := promql.Sample{T: t, H: fh}
samples = append(samples, s)
}
sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T })

View file

@ -0,0 +1,184 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package convertnhcb
import (
"fmt"
"math"
"sort"
"strings"
"github.com/grafana/regexp"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
)
var histogramNameSuffixReplacements = []struct {
pattern *regexp.Regexp
repl string
}{
{
pattern: regexp.MustCompile(`_bucket$`),
repl: "",
},
{
pattern: regexp.MustCompile(`_sum$`),
repl: "",
},
{
pattern: regexp.MustCompile(`_count$`),
repl: "",
},
}
// TempHistogram is used to collect information about classic histogram
// samples incrementally before creating a histogram.Histogram or
// histogram.FloatHistogram based on the values collected.
type TempHistogram struct {
BucketCounts map[float64]float64
Count float64
Sum float64
HasFloat bool
}
// NewTempHistogram creates a new TempHistogram to
// collect information about classic histogram samples.
func NewTempHistogram() TempHistogram {
return TempHistogram{
BucketCounts: map[float64]float64{},
}
}
func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) {
bucketCounts := map[float64]int64{}
for le, count := range h.BucketCounts {
intCount := int64(math.Round(count))
if float64(intCount) != count {
return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le)
}
bucketCounts[le] = intCount
}
return bucketCounts, nil
}
// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native
// histogram with custom buckets based on the provided upper bounds.
// Everything is set except the bucket counts.
// The sorted upper bounds are also returned.
func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) {
sort.Float64s(upperBounds0)
var upperBounds []float64
if needsDedup {
upperBounds = make([]float64, 0, len(upperBounds0))
prevLE := math.Inf(-1)
for _, le := range upperBounds0 {
if le != prevLE {
upperBounds = append(upperBounds, le)
prevLE = le
}
}
} else {
upperBounds = upperBounds0
}
var customBounds []float64
if upperBounds[len(upperBounds)-1] == math.Inf(1) {
customBounds = upperBounds[:len(upperBounds)-1]
} else {
customBounds = upperBounds
}
return upperBounds, &histogram.Histogram{
Count: 0,
Sum: 0,
Schema: histogram.CustomBucketsSchema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: uint32(len(upperBounds))},
},
PositiveBuckets: make([]int64, len(upperBounds)),
CustomValues: customBounds,
}
}
// NewHistogram fills the bucket counts in the provided histogram.Histogram
// or histogram.FloatHistogram based on the provided temporary histogram and
// upper bounds.
func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) {
intBucketCounts, err := histogram.getIntBucketCounts()
if err != nil {
return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase)
}
return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil
}
func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram {
h := hBase.Copy()
absBucketCounts := make([]int64, len(h.PositiveBuckets))
var prevCount, total int64
for i, le := range upperBounds {
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
count := currCount - prevCount
absBucketCounts[i] = count
total += count
prevCount = currCount
}
h.PositiveBuckets[0] = absBucketCounts[0]
for i := 1; i < len(h.PositiveBuckets); i++ {
h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1]
}
h.Sum = histogram.Sum
if histogram.Count != 0 {
total = int64(histogram.Count)
}
h.Count = uint64(total)
return h.Compact(0)
}
func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram {
fh := fhBase.Copy()
var prevCount, total float64
for i, le := range upperBounds {
currCount, exists := bucketCounts[le]
if !exists {
currCount = 0
}
count := currCount - prevCount
fh.PositiveBuckets[i] = count
total += count
prevCount = currCount
}
fh.Sum = histogram.Sum
if histogram.Count != 0 {
total = histogram.Count
}
fh.Count = total
return fh.Compact(0)
}
func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels {
mName := m.Get(labels.MetricName)
return labels.NewBuilder(m).
Set(labels.MetricName, strings.TrimSuffix(mName, suffix)).
Del(labels.BucketLabel).
Labels()
}
func GetHistogramMetricBaseName(s string) string {
for _, rep := range histogramNameSuffixReplacements {
s = rep.pattern.ReplaceAllString(s, rep.repl)
}
return s
}