From c5522677bf78399652836625ae10f419bc7bbc7b Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 14 Oct 2021 14:47:26 +0200 Subject: [PATCH] Improve encoding of zero threshold Signed-off-by: beorn7 --- tsdb/chunkenc/histogram_meta.go | 73 ++++++++++++++++++++++++++++----- tsdb/chunkenc/varbit.go | 36 ---------------- 2 files changed, 63 insertions(+), 46 deletions(-) diff --git a/tsdb/chunkenc/histogram_meta.go b/tsdb/chunkenc/histogram_meta.go index dd1d876d3f..17676ae2f4 100644 --- a/tsdb/chunkenc/histogram_meta.go +++ b/tsdb/chunkenc/histogram_meta.go @@ -14,24 +14,18 @@ package chunkenc import ( + "math" + "github.com/prometheus/prometheus/model/histogram" ) func writeHistogramChunkLayout(b *bstream, schema int32, zeroThreshold float64, positiveSpans, negativeSpans []histogram.Span) { putVarbitInt(b, int64(schema)) - putVarbitFloat(b, zeroThreshold) + putZeroThreshold(b, zeroThreshold) putHistogramChunkLayoutSpans(b, positiveSpans) putHistogramChunkLayoutSpans(b, negativeSpans) } -func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) { - putVarbitInt(b, int64(len(spans))) - for _, s := range spans { - putVarbitUint(b, uint64(s.Length)) - putVarbitInt(b, int64(s.Offset)) - } -} - func readHistogramChunkLayout(b *bstreamReader) ( schema int32, zeroThreshold float64, positiveSpans, negativeSpans []histogram.Span, @@ -43,7 +37,7 @@ func readHistogramChunkLayout(b *bstreamReader) ( } schema = int32(v) - zeroThreshold, err = readVarbitFloat(b) + zeroThreshold, err = readZeroThreshold(b) if err != nil { return } @@ -61,6 +55,14 @@ func readHistogramChunkLayout(b *bstreamReader) ( return } +func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) { + putVarbitInt(b, int64(len(spans))) + for _, s := range spans { + putVarbitUint(b, uint64(s.Length)) + putVarbitInt(b, int64(s.Offset)) + } +} + func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) { var spans []histogram.Span num, err := readVarbitInt(b) @@ -87,6 +89,57 @@ func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) { return spans, nil } +// putZeroThreshold writes the zero threshold to the bstream. It stores typical +// values in just one byte, but needs 9 bytes for other values. In detail: +// +// * If the threshold is 0, store a single zero byte. +// +// * If the threshold is a power of 2 between (and including) 2^-243 and 2^10, +// take the exponent from the IEEE 754 representation of the threshold, which +// covers a range between (and including) -242 and 11. (2^-243 is 0.5*2^-242 +// in IEEE 754 representation, and 2^10 is 0.5*2^11.) Add 243 to the exponent +// and store the result (which will be between 1 and 254) as a single +// byte. Note that small powers of two are preferred values for the zero +// threshould. The default value for the zero threshold is 2^-128 (or +// 0.5*2^-127 in IEEE 754 representation) and will therefore be encoded as a +// single byte (with value 116). +// +// * In all other cases, store 255 as a single byte, followed by the 8 bytes of +// the threshold as a float64, i.e. taking 9 bytes in total. +func putZeroThreshold(b *bstream, threshold float64) { + if threshold == 0 { + b.writeByte(0) + return + } + frac, exp := math.Frexp(threshold) + if frac != 0.5 || exp < -242 || exp > 11 { + b.writeByte(255) + b.writeBits(math.Float64bits(threshold), 64) + return + } + b.writeByte(byte(exp + 243)) +} + +// readZeroThreshold reads the zero threshold written with putZeroThreshold. +func readZeroThreshold(br *bstreamReader) (float64, error) { + b, err := br.ReadByte() + if err != nil { + return 0, err + } + switch b { + case 0: + return 0, nil + case 255: + v, err := br.readBits(64) + if err != nil { + return 0, err + } + return math.Float64frombits(v), nil + default: + return math.Ldexp(0.5, int(b-243)), nil + } +} + type bucketIterator struct { spans []histogram.Span span int // Span position of last yielded bucket. diff --git a/tsdb/chunkenc/varbit.go b/tsdb/chunkenc/varbit.go index c17600e4ad..4220819b91 100644 --- a/tsdb/chunkenc/varbit.go +++ b/tsdb/chunkenc/varbit.go @@ -14,47 +14,11 @@ package chunkenc import ( - "math" "math/bits" "github.com/pkg/errors" ) -// putVarbitFloat writes a float64 using varbit encoding. It does so by -// converting the underlying bits into an int64. -func putVarbitFloat(b *bstream, val float64) { - // TODO(beorn7): The resulting int64 here will almost never be a small - // integer. Thus, the varbit encoding doesn't really make sense - // here. This function is only used to encode the zero threshold in - // histograms. Based on that, here is an idea to improve the encoding: - // - // It is recommended to use (usually negative) powers of two as - // threshoulds. The default value for the zero threshald is in fact - // 2^-128, or 0.5*2^-127, as it is represented by IEEE 754. It is - // therefore worth a try to test if the threshold is a power of 2 and - // then just store the exponent. 0 is also a commen threshold for those - // use cases where only observations of precisely zero should go to the - // zero bucket. This results in the following proposal: - // - First we store 1 byte. - // - Iff that byte is 255 (all bits set), it is followed by a direct - // 8byte representation of the float. - // - If the byte is 0, the threshold is 0. - // - In all other cases, take the number represented by the byte, - // subtract 246, and that's the exponent (i.e. between -245 and - // +8, covering thresholds that are powers of 2 between 2^-246 - // to 128). - putVarbitInt(b, int64(math.Float64bits(val))) -} - -// readVarbitFloat reads a float64 encoded with putVarbitFloat -func readVarbitFloat(b *bstreamReader) (float64, error) { - val, err := readVarbitInt(b) - if err != nil { - return 0, err - } - return math.Float64frombits(uint64(val)), nil -} - // putVarbitInt writes an int64 using varbit encoding with a bit bucketing // optimized for the dod's observed in histogram buckets, plus a few additional // buckets for large numbers.