Improve encoding of zero threshold

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2021-10-14 14:47:26 +02:00
parent 7093b089f2
commit c5522677bf
2 changed files with 63 additions and 46 deletions

View file

@ -14,24 +14,18 @@
package chunkenc
import (
"math"
"github.com/prometheus/prometheus/model/histogram"
)
func writeHistogramChunkLayout(b *bstream, schema int32, zeroThreshold float64, positiveSpans, negativeSpans []histogram.Span) {
putVarbitInt(b, int64(schema))
putVarbitFloat(b, zeroThreshold)
putZeroThreshold(b, zeroThreshold)
putHistogramChunkLayoutSpans(b, positiveSpans)
putHistogramChunkLayoutSpans(b, negativeSpans)
}
func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) {
putVarbitInt(b, int64(len(spans)))
for _, s := range spans {
putVarbitUint(b, uint64(s.Length))
putVarbitInt(b, int64(s.Offset))
}
}
func readHistogramChunkLayout(b *bstreamReader) (
schema int32, zeroThreshold float64,
positiveSpans, negativeSpans []histogram.Span,
@ -43,7 +37,7 @@ func readHistogramChunkLayout(b *bstreamReader) (
}
schema = int32(v)
zeroThreshold, err = readVarbitFloat(b)
zeroThreshold, err = readZeroThreshold(b)
if err != nil {
return
}
@ -61,6 +55,14 @@ func readHistogramChunkLayout(b *bstreamReader) (
return
}
func putHistogramChunkLayoutSpans(b *bstream, spans []histogram.Span) {
putVarbitInt(b, int64(len(spans)))
for _, s := range spans {
putVarbitUint(b, uint64(s.Length))
putVarbitInt(b, int64(s.Offset))
}
}
func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) {
var spans []histogram.Span
num, err := readVarbitInt(b)
@ -87,6 +89,57 @@ func readHistogramChunkLayoutSpans(b *bstreamReader) ([]histogram.Span, error) {
return spans, nil
}
// putZeroThreshold writes the zero threshold to the bstream. It stores typical
// values in just one byte, but needs 9 bytes for other values. In detail:
//
// * If the threshold is 0, store a single zero byte.
//
// * If the threshold is a power of 2 between (and including) 2^-243 and 2^10,
// take the exponent from the IEEE 754 representation of the threshold, which
// covers a range between (and including) -242 and 11. (2^-243 is 0.5*2^-242
// in IEEE 754 representation, and 2^10 is 0.5*2^11.) Add 243 to the exponent
// and store the result (which will be between 1 and 254) as a single
// byte. Note that small powers of two are preferred values for the zero
// threshould. The default value for the zero threshold is 2^-128 (or
// 0.5*2^-127 in IEEE 754 representation) and will therefore be encoded as a
// single byte (with value 116).
//
// * In all other cases, store 255 as a single byte, followed by the 8 bytes of
// the threshold as a float64, i.e. taking 9 bytes in total.
func putZeroThreshold(b *bstream, threshold float64) {
if threshold == 0 {
b.writeByte(0)
return
}
frac, exp := math.Frexp(threshold)
if frac != 0.5 || exp < -242 || exp > 11 {
b.writeByte(255)
b.writeBits(math.Float64bits(threshold), 64)
return
}
b.writeByte(byte(exp + 243))
}
// readZeroThreshold reads the zero threshold written with putZeroThreshold.
func readZeroThreshold(br *bstreamReader) (float64, error) {
b, err := br.ReadByte()
if err != nil {
return 0, err
}
switch b {
case 0:
return 0, nil
case 255:
v, err := br.readBits(64)
if err != nil {
return 0, err
}
return math.Float64frombits(v), nil
default:
return math.Ldexp(0.5, int(b-243)), nil
}
}
type bucketIterator struct {
spans []histogram.Span
span int // Span position of last yielded bucket.

View file

@ -14,47 +14,11 @@
package chunkenc
import (
"math"
"math/bits"
"github.com/pkg/errors"
)
// putVarbitFloat writes a float64 using varbit encoding. It does so by
// converting the underlying bits into an int64.
func putVarbitFloat(b *bstream, val float64) {
// TODO(beorn7): The resulting int64 here will almost never be a small
// integer. Thus, the varbit encoding doesn't really make sense
// here. This function is only used to encode the zero threshold in
// histograms. Based on that, here is an idea to improve the encoding:
//
// It is recommended to use (usually negative) powers of two as
// threshoulds. The default value for the zero threshald is in fact
// 2^-128, or 0.5*2^-127, as it is represented by IEEE 754. It is
// therefore worth a try to test if the threshold is a power of 2 and
// then just store the exponent. 0 is also a commen threshold for those
// use cases where only observations of precisely zero should go to the
// zero bucket. This results in the following proposal:
// - First we store 1 byte.
// - Iff that byte is 255 (all bits set), it is followed by a direct
// 8byte representation of the float.
// - If the byte is 0, the threshold is 0.
// - In all other cases, take the number represented by the byte,
// subtract 246, and that's the exponent (i.e. between -245 and
// +8, covering thresholds that are powers of 2 between 2^-246
// to 128).
putVarbitInt(b, int64(math.Float64bits(val)))
}
// readVarbitFloat reads a float64 encoded with putVarbitFloat
func readVarbitFloat(b *bstreamReader) (float64, error) {
val, err := readVarbitInt(b)
if err != nil {
return 0, err
}
return math.Float64frombits(uint64(val)), nil
}
// putVarbitInt writes an int64 using varbit encoding with a bit bucketing
// optimized for the dod's observed in histogram buckets, plus a few additional
// buckets for large numbers.