prometheus/tsdb/chunkenc/histogram_meta.go
George Krajcsovits 00ab05c3b9
Native histograms: fix spurios counter reset when merging recoded chunk to normal chunk (#14513)
* chunkenc: allow missing empty buckets on histogram append

Allow appending to chunks when the histogram to be added is missing
some buckets, but the missing buckets are empty in the chunk.
For example bucket at index 5 is present in the chunk, but its value
is 0 and the new histogram doesn't have a bucket at index 5.

This fixes an issue of merging chunks where one chunk was recoded to
retroactively have some empty buckets in all the histograms and we are
merging in a histogram that doesn't have the empty bucket (because it
was not recoded yet).

The operation alters the histogram that is being added, however this has
already been the case when appending gauge histograms. Thus the test
TestHistogramSeriesToChunks in storage package is changed to explicitly
test what happened to the appended histogram - Compact(0) call is removed.

The new expandIntSpansAndBuckets and expandFloatSpansAndBuckets functions
are a merge of expandSpansForward and counterResetInAnyBucket and
counterResetInAnyFloatBucket.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
2024-08-01 09:22:32 +02:00

580 lines
17 KiB
Go

// Copyright 2021 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunkenc
import (
"math"
"github.com/prometheus/prometheus/model/histogram"
)
func writeHistogramChunkLayout(
b *bstream, schema int32, zeroThreshold float64,
positiveSpans, negativeSpans []histogram.Span, customValues []float64,
) {
putZeroThreshold(b, zeroThreshold)
putVarbitInt(b, int64(schema))
putHistogramChunkLayoutSpans(b, positiveSpans)
putHistogramChunkLayoutSpans(b, negativeSpans)
if histogram.IsCustomBucketsSchema(schema) {
putHistogramChunkLayoutCustomBounds(b, customValues)
}
}
func readHistogramChunkLayout(b *bstreamReader) (
schema int32, zeroThreshold float64,
positiveSpans, negativeSpans []histogram.Span,
customValues []float64,
err error,
) {
zeroThreshold, err = readZeroThreshold(b)
if err != nil {
return
}
v, err := readVarbitInt(b)
if err != nil {
return
}
schema = int32(v)
positiveSpans, err = readHistogramChunkLayoutSpans(b)
if err != nil {
return
}
negativeSpans, err = readHistogramChunkLayoutSpans(b)
if err != nil {
return
}
if histogram.IsCustomBucketsSchema(schema) {
customValues, err = readHistogramChunkLayoutCustomBounds(b)
if err != nil {
return
}
}
return
}
func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) {
putVarbitUint(b, uint64(len(spans)))
for _, s := range spans {
putVarbitUint(b, uint64(s.Length))
putVarbitInt(b, int64(s.Offset))
}
}
func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) {
var spans []histogram.Span
num, err := readVarbitUint(b)
if err != nil {
return nil, err
}
for i := 0; i < int(num); i++ {
length, err := readVarbitUint(b)
if err != nil {
return nil, err
}
offset, err := readVarbitInt(b)
if err != nil {
return nil, err
}
spans = append(spans, histogram.Span{
Length: uint32(length),
Offset: int32(offset),
})
}
return spans, nil
}
func putHistogramChunkLayoutCustomBounds(b *bstream, customValues []float64) {
putVarbitUint(b, uint64(len(customValues)))
for _, bound := range customValues {
putCustomBound(b, bound)
}
}
func readHistogramChunkLayoutCustomBounds(b *bstreamReader) ([]float64, error) {
var customValues []float64
num, err := readVarbitUint(b)
if err != nil {
return nil, err
}
for i := 0; i < int(num); i++ {
bound, err := readCustomBound(b)
if err != nil {
return nil, err
}
customValues = append(customValues, bound)
}
return customValues, nil
}
// putZeroThreshold writes the zero threshold to the bstream. It stores typical
// values in just one byte, but needs 9 bytes for other values. In detail:
// - If the threshold is 0, store a single zero byte.
// - If the threshold is a power of 2 between (and including) 2^-243 and 2^10,
// take the exponent from the IEEE 754 representation of the threshold, which
// covers a range between (and including) -242 and 11. (2^-243 is 0.5*2^-242
// in IEEE 754 representation, and 2^10 is 0.5*2^11.) Add 243 to the exponent
// and store the result (which will be between 1 and 254) as a single
// byte. Note that small powers of two are preferred values for the zero
// threshold. The default value for the zero threshold is 2^-128 (or
// 0.5*2^-127 in IEEE 754 representation) and will therefore be encoded as a
// single byte (with value 116).
// - In all other cases, store 255 as a single byte, followed by the 8 bytes of
// the threshold as a float64, i.e. taking 9 bytes in total.
func putZeroThreshold(b *bstream, threshold float64) {
if threshold == 0 {
b.writeByte(0)
return
}
frac, exp := math.Frexp(threshold)
if frac != 0.5 || exp < -242 || exp > 11 {
b.writeByte(255)
b.writeBits(math.Float64bits(threshold), 64)
return
}
b.writeByte(byte(exp + 243))
}
// readZeroThreshold reads the zero threshold written with putZeroThreshold.
func readZeroThreshold(br *bstreamReader) (float64, error) {
b, err := br.ReadByte()
if err != nil {
return 0, err
}
switch b {
case 0:
return 0, nil
case 255:
v, err := br.readBits(64)
if err != nil {
return 0, err
}
return math.Float64frombits(v), nil
default:
return math.Ldexp(0.5, int(b)-243), nil
}
}
// isWholeWhenMultiplied checks to see if the number when multiplied by 1000 can
// be converted into an integer without losing precision.
func isWholeWhenMultiplied(in float64) bool {
i := uint(math.Round(in * 1000))
out := float64(i) / 1000
return in == out
}
// putCustomBound writes a custom bound to the bstream. It stores values from
// 0 to 33554.430 (inclusive) that are multiples of 0.001 in unsigned varbit
// encoding of up to 4 bytes, but needs 1 bit + 8 bytes for other values like
// negative numbers, numbers greater than 33554.430, or numbers that are not
// a multiple of 0.001, on the assumption that they are less common. In detail:
// - Multiply the bound by 1000, without rounding.
// - If the multiplied bound is >= 0, <= 33554430 and a whole number,
// add 1 and store it in unsigned varbit encoding. All these numbers are
// greater than 0, so the leading bit of the varbit is always 1!
// - Otherwise, store a 0 bit, followed by the 8 bytes of the original
// bound as a float64.
//
// When reading the values, we can first decode a value as unsigned varbit,
// if it's 0, then we read the next 8 bytes as a float64, otherwise
// we can convert the value to a float64 by subtracting 1 and dividing by 1000.
func putCustomBound(b *bstream, f float64) {
tf := f * 1000
// 33554431-1 comes from the maximum that can be stored in a varbit in 4
// bytes, other values are stored in 8 bytes anyway.
if tf < 0 || tf > 33554430 || !isWholeWhenMultiplied(f) {
b.writeBit(zero)
b.writeBits(math.Float64bits(f), 64)
return
}
putVarbitUint(b, uint64(math.Round(tf))+1)
}
// readCustomBound reads the custom bound written with putCustomBound.
func readCustomBound(br *bstreamReader) (float64, error) {
b, err := readVarbitUint(br)
if err != nil {
return 0, err
}
switch b {
case 0:
v, err := br.readBits(64)
if err != nil {
return 0, err
}
return math.Float64frombits(v), nil
default:
return float64(b-1) / 1000, nil
}
}
type bucketIterator struct {
spans []histogram.Span
span int // Span position of last yielded bucket.
bucket int // Bucket position within span of last yielded bucket.
idx int // Bucket index (globally across all spans) of last yielded bucket.
}
func newBucketIterator(spans []histogram.Span) *bucketIterator {
b := bucketIterator{
spans: spans,
span: 0,
bucket: -1,
idx: -1,
}
if len(spans) > 0 {
b.idx += int(spans[0].Offset)
}
return &b
}
func (b *bucketIterator) Next() (int, bool) {
// We're already out of bounds.
if b.span >= len(b.spans) {
return 0, false
}
if b.bucket < int(b.spans[b.span].Length)-1 { // Try to move within same span.
b.bucket++
b.idx++
return b.idx, true
}
for b.span < len(b.spans)-1 { // Try to move from one span to the next.
b.span++
b.idx += int(b.spans[b.span].Offset + 1)
b.bucket = 0
if b.spans[b.span].Length == 0 {
b.idx--
continue
}
return b.idx, true
}
// We're out of options.
return 0, false
}
// An Insert describes how many new buckets have to be inserted before
// processing the pos'th bucket from the original slice.
type Insert struct {
pos int
num int
}
// Deprecated: expandSpansForward, use expandIntSpansAndBuckets or
// expandFloatSpansAndBuckets instead.
// expandSpansForward is left here for reference.
// expandSpansForward returns the inserts to expand the bucket spans 'a' so that
// they match the spans in 'b'. 'b' must cover the same or more buckets than
// 'a', otherwise the function will return false.
//
// Example:
//
// Let's say the old buckets look like this:
//
// span syntax: [offset, length]
// spans : [ 0 , 2 ] [2,1] [ 3 , 2 ] [3,1] [1,1]
// bucket idx : [0] [1] 2 3 [4] 5 6 7 [8] [9] 10 11 12 [13] 14 [15]
// raw values 6 3 3 2 4 5 1
// deltas 6 -3 0 -1 2 1 -4
//
// But now we introduce a new bucket layout. (Carefully chosen example where we
// have a span appended, one unchanged[*], one prepended, and two merge - in
// that order.)
//
// [*] unchanged in terms of which bucket indices they represent. but to achieve
// that, their offset needs to change if "disrupted" by spans changing ahead of
// them
//
// \/ this one is "unchanged"
// spans : [ 0 , 3 ] [1,1] [ 1 , 4 ] [ 3 , 3 ]
// bucket idx : [0] [1] [2] 3 [4] 5 [6] [7] [8] [9] 10 11 12 [13] [14] [15]
// raw values 6 3 0 3 0 0 2 4 5 0 1
// deltas 6 -3 -3 3 -3 0 2 2 1 -5 1
// delta mods: / \ / \ / \
//
// Note for histograms with delta-encoded buckets: Whenever any new buckets are
// introduced, the subsequent "old" bucket needs to readjust its delta to the
// new base of 0. Thus, for the caller who wants to transform the set of
// original deltas to a new set of deltas to match a new span layout that adds
// buckets, we simply need to generate a list of inserts.
//
// Note: Within expandSpansForward we don't have to worry about the changes to the
// spans themselves, thanks to the iterators we get to work with the more useful
// bucket indices (which of course directly correspond to the buckets we have to
// adjust).
func expandSpansForward(a, b []histogram.Span) (forward []Insert, ok bool) {
ai := newBucketIterator(a)
bi := newBucketIterator(b)
var inserts []Insert
// When inter.num becomes > 0, this becomes a valid insert that should
// be yielded when we finish a streak of new buckets.
var inter Insert
av, aOK := ai.Next()
bv, bOK := bi.Next()
loop:
for {
switch {
case aOK && bOK:
switch {
case av == bv: // Both have an identical value. move on!
// Finish WIP insert and reset.
if inter.num > 0 {
inserts = append(inserts, inter)
}
inter.num = 0
av, aOK = ai.Next()
bv, bOK = bi.Next()
inter.pos++
case av < bv: // b misses a value that is in a.
return inserts, false
case av > bv: // a misses a value that is in b. Forward b and recompare.
inter.num++
bv, bOK = bi.Next()
}
case aOK && !bOK: // b misses a value that is in a.
return inserts, false
case !aOK && bOK: // a misses a value that is in b. Forward b and recompare.
inter.num++
bv, bOK = bi.Next()
default: // Both iterators ran out. We're done.
if inter.num > 0 {
inserts = append(inserts, inter)
}
break loop
}
}
return inserts, true
}
// expandSpansBothWays is similar to expandSpansForward, but now b may also
// cover an entirely different set of buckets. The function returns the
// “forward” inserts to expand 'a' to also cover all the buckets exclusively
// covered by 'b', and it returns the “backward” inserts to expand 'b' to also
// cover all the buckets exclusively covered by 'a'.
func expandSpansBothWays(a, b []histogram.Span) (forward, backward []Insert, mergedSpans []histogram.Span) {
ai := newBucketIterator(a)
bi := newBucketIterator(b)
var fInserts, bInserts []Insert
var lastBucket int
addBucket := func(b int) {
offset := b - lastBucket - 1
if offset == 0 && len(mergedSpans) > 0 {
mergedSpans[len(mergedSpans)-1].Length++
} else {
if len(mergedSpans) == 0 {
offset++
}
mergedSpans = append(mergedSpans, histogram.Span{
Offset: int32(offset),
Length: 1,
})
}
lastBucket = b
}
// When fInter.num (or bInter.num, respectively) becomes > 0, this
// becomes a valid insert that should be yielded when we finish a streak
// of new buckets.
var fInter, bInter Insert
av, aOK := ai.Next()
bv, bOK := bi.Next()
loop:
for {
switch {
case aOK && bOK:
switch {
case av == bv: // Both have an identical value. move on!
// Finish WIP insert and reset.
if fInter.num > 0 {
fInserts = append(fInserts, fInter)
fInter.num = 0
}
if bInter.num > 0 {
bInserts = append(bInserts, bInter)
bInter.num = 0
}
addBucket(av)
av, aOK = ai.Next()
bv, bOK = bi.Next()
fInter.pos++
bInter.pos++
case av < bv: // b misses a value that is in a.
bInter.num++
// Collect the forward inserts before advancing
// the position of 'a'.
if fInter.num > 0 {
fInserts = append(fInserts, fInter)
fInter.num = 0
}
addBucket(av)
fInter.pos++
av, aOK = ai.Next()
case av > bv: // a misses a value that is in b. Forward b and recompare.
fInter.num++
// Collect the backward inserts before advancing the
// position of 'b'.
if bInter.num > 0 {
bInserts = append(bInserts, bInter)
bInter.num = 0
}
addBucket(bv)
bInter.pos++
bv, bOK = bi.Next()
}
case aOK && !bOK: // b misses a value that is in a.
bInter.num++
addBucket(av)
av, aOK = ai.Next()
case !aOK && bOK: // a misses a value that is in b. Forward b and recompare.
fInter.num++
addBucket(bv)
bv, bOK = bi.Next()
default: // Both iterators ran out. We're done.
if fInter.num > 0 {
fInserts = append(fInserts, fInter)
}
if bInter.num > 0 {
bInserts = append(bInserts, bInter)
}
break loop
}
}
return fInserts, bInserts, mergedSpans
}
type bucketValue interface {
int64 | float64
}
// insert merges 'in' with the provided inserts and writes them into 'out',
// which must already have the appropriate length. 'out' is also returned for
// convenience.
func insert[BV bucketValue](in, out []BV, inserts []Insert, deltas bool) []BV {
var (
oi int // Position in out.
v BV // The last value seen.
ii int // The next insert to process.
)
for i, d := range in {
if ii < len(inserts) && i == inserts[ii].pos {
// We have an insert!
// Add insert.num new delta values such that their
// bucket values equate 0. When deltas==false, it means
// that it is an absolute value. So we set it to 0
// directly.
if deltas {
out[oi] = -v
} else {
out[oi] = 0
}
oi++
for x := 1; x < inserts[ii].num; x++ {
out[oi] = 0
oi++
}
ii++
// Now save the value from the input. The delta value we
// should save is the original delta value + the last
// value of the point before the insert (to undo the
// delta that was introduced by the insert). When
// deltas==false, it means that it is an absolute value,
// so we set it directly to the value in the 'in' slice.
if deltas {
out[oi] = d + v
} else {
out[oi] = d
}
oi++
v = d + v
continue
}
// If there was no insert, the original delta is still valid.
out[oi] = d
oi++
v += d
}
switch ii {
case len(inserts):
// All inserts processed. Nothing more to do.
case len(inserts) - 1:
// One more insert to process at the end.
if deltas {
out[oi] = -v
} else {
out[oi] = 0
}
oi++
for x := 1; x < inserts[ii].num; x++ {
out[oi] = 0
oi++
}
default:
panic("unprocessed inserts left")
}
return out
}
// counterResetHint returns a CounterResetHint based on the CounterResetHeader
// and on the position into the chunk.
func counterResetHint(crh CounterResetHeader, numRead uint16) histogram.CounterResetHint {
switch {
case crh == GaugeType:
// A gauge histogram chunk only contains gauge histograms.
return histogram.GaugeType
case numRead > 1:
// In a counter histogram chunk, there will not be any counter
// resets after the first histogram.
return histogram.NotCounterReset
case crh == CounterReset:
// If the chunk was started because of a counter reset, we can
// safely return that hint. This histogram always has to be
// treated as a counter reset.
return histogram.CounterReset
default:
// Sadly, we have to return "unknown" as the hint for all other
// cases, even if we know that the chunk was started without a
// counter reset. But we cannot be sure that the previous chunk
// still exists in the TSDB, so we conservatively return
// "unknown". On the bright side, this case should be relatively
// rare.
//
// TODO(beorn7): Nevertheless, if the current chunk is in the
// middle of a block (not the first chunk in the block for this
// series), it's probably safe to assume that the previous chunk
// will exist in the TSDB for as long as the current chunk
// exist, and we could safely return
// "histogram.NotCounterReset". This needs some more work and
// might not be worth the effort and/or risk. To be vetted...
return histogram.UnknownCounterReset
}
}