Improve encoding of zero threshold

Signed-off-by: beorn7 <beorn@grafana.com>
2025-03-05 20:59:13 -08:00 · 2021-10-14 14:47:26 +02:00 · 2021-10-14 14:47:26 +02:00 · c5522677bf
parent 7093b089f2
commit c5522677bf
2 changed files with 63 additions and 46 deletions
--- a/tsdb/chunkenc/histogram_meta.go
+++ b/tsdb/chunkenc/histogram_meta.go
@ -14,24 +14,18 @@
 package chunkenc
 import (
 	"math"
 	"github.com/prometheus/prometheus/model/histogram"
 )
 func writeHistogramChunkLayout(b *bstream, schema int32, zeroThreshold float64, positiveSpans, negativeSpans []histogram.Span) {
 	putVarbitInt(b, int64(schema))
-	putVarbitFloat(b, zeroThreshold)
+	putZeroThreshold(b, zeroThreshold)
 	putHistogramChunkLayoutSpans(b, positiveSpans)
 	putHistogramChunkLayoutSpans(b, negativeSpans)
 }
 func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) {
 	putVarbitInt(b, int64(len(spans)))
 	for _, s := range spans {
 		putVarbitUint(b, uint64(s.Length))
 		putVarbitInt(b, int64(s.Offset))
 	}
 }
 func readHistogramChunkLayout(b *bstreamReader) (
 	schema int32, zeroThreshold float64,
 	positiveSpans, negativeSpans []histogram.Span,
@ -43,7 +37,7 @@ func readHistogramChunkLayout(b *bstreamReader) (
 	}
 	schema = int32(v)
-	zeroThreshold, err = readVarbitFloat(b)
+	zeroThreshold, err = readZeroThreshold(b)
 	if err != nil {
 		return
 	}
@ -61,6 +55,14 @@ func readHistogramChunkLayout(b *bstreamReader) (
 	return
 }
 func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) {
 	putVarbitInt(b, int64(len(spans)))
 	for _, s := range spans {
 		putVarbitUint(b, uint64(s.Length))
 		putVarbitInt(b, int64(s.Offset))
 	}
 }
 func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) {
 	var spans []histogram.Span
 	num, err := readVarbitInt(b)
@ -87,6 +89,57 @@ func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) {
 	return spans, nil
 }
 // putZeroThreshold writes the zero threshold to the bstream. It stores typical
 // values in just one byte, but needs 9 bytes for other values. In detail:
 //
 // * If the threshold is 0, store a single zero byte.
 //
 // * If the threshold is a power of 2 between (and including) 2^-243 and 2^10,
 //   take the exponent from the IEEE 754 representation of the threshold, which
 //   covers a range between (and including) -242 and 11. (2^-243 is 0.5*2^-242
 //   in IEEE 754 representation, and 2^10 is 0.5*2^11.) Add 243 to the exponent
 //   and store the result (which will be between 1 and 254) as a single
 //   byte. Note that small powers of two are preferred values for the zero
 //   threshould. The default value for the zero threshold is 2^-128 (or
 //   0.5*2^-127 in IEEE 754 representation) and will therefore be encoded as a
 //   single byte (with value 116).
 //
 // * In all other cases, store 255 as a single byte, followed by the 8 bytes of
 //   the threshold as a float64, i.e. taking 9 bytes in total.
 func putZeroThreshold(b *bstream, threshold float64) {
 	if threshold == 0 {
 		b.writeByte(0)
 		return
 	}
 	frac, exp := math.Frexp(threshold)
 	if frac != 0.5 || exp < -242 || exp > 11 {
 		b.writeByte(255)
 		b.writeBits(math.Float64bits(threshold), 64)
 		return
 	}
 	b.writeByte(byte(exp + 243))
 }
 // readZeroThreshold reads the zero threshold written with putZeroThreshold.
 func readZeroThreshold(br *bstreamReader) (float64, error) {
 	b, err := br.ReadByte()
 	if err != nil {
 		return 0, err
 	}
 	switch b {
 	case 0:
 		return 0, nil
 	case 255:
 		v, err := br.readBits(64)
 		if err != nil {
 			return 0, err
 		}
 		return math.Float64frombits(v), nil
 	default:
 		return math.Ldexp(0.5, int(b-243)), nil
 	}
 }
 type bucketIterator struct {
 	spans  []histogram.Span
 	span   int // Span position of last yielded bucket.
--- a/tsdb/chunkenc/varbit.go
+++ b/tsdb/chunkenc/varbit.go
@ -14,47 +14,11 @@
 package chunkenc
 import (
 	"math"
 	"math/bits"
 	"github.com/pkg/errors"
 )
 // putVarbitFloat writes a float64 using varbit encoding.  It does so by
 // converting the underlying bits into an int64.
 func putVarbitFloat(b *bstream, val float64) {
 	// TODO(beorn7): The resulting int64 here will almost never be a small
 	// integer. Thus, the varbit encoding doesn't really make sense
 	// here. This function is only used to encode the zero threshold in
 	// histograms. Based on that, here is an idea to improve the encoding:
 	//
 	// It is recommended to use (usually negative) powers of two as
 	// threshoulds. The default value for the zero threshald is in fact
 	// 2^-128, or 0.5*2^-127, as it is represented by IEEE 754. It is
 	// therefore worth a try to test if the threshold is a power of 2 and
 	// then just store the exponent. 0 is also a commen threshold for those
 	// use cases where only observations of precisely zero should go to the
 	// zero bucket. This results in the following proposal:
 	// - First we store 1 byte.
 	// - Iff that byte is 255 (all bits set), it is followed by a direct
 	//   8byte representation of the float.
 	// - If the byte is 0, the threshold is 0.
 	// - In all other cases, take the number represented by the byte,
 	//   subtract 246, and that's the exponent (i.e. between -245 and
 	//   +8, covering thresholds that are powers of 2 between 2^-246
 	//   to 128).
 	putVarbitInt(b, int64(math.Float64bits(val)))
 }
 // readVarbitFloat reads a float64 encoded with putVarbitFloat
 func readVarbitFloat(b *bstreamReader) (float64, error) {
 	val, err := readVarbitInt(b)
 	if err != nil {
 		return 0, err
 	}
 	return math.Float64frombits(uint64(val)), nil
 }
 // putVarbitInt writes an int64 using varbit encoding with a bit bucketing
 // optimized for the dod's observed in histogram buckets, plus a few additional
 // buckets for large numbers.