[nhcb branch] Use single bit to differentiate between optimized bounds and floats (#13828)

* Use single bit to differentiate between optimized bounds and floats

Use one bit to decide what kind of data to read/write.
This reduces storage need of floats from 72 bits to 65 bits and makes the
integers store in 5 to 32 bits instead of 16.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Jeanette Tan <jeanette.tan@grafana.com>
This commit is contained in:
George Krajcsovits 2024-03-27 18:40:59 +01:00 committed by GitHub
parent dc7b282d39
commit 4eab18abd6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 20 additions and 29 deletions

View file

@ -113,16 +113,6 @@ func (b *bstream) writeBits(u uint64, nbits int) {
}
}
// wrapper for the standard library's PutUvarint to make it work
// with our bstream.
func (b *bstream) putUvarint(x uint64) {
buf := make([]byte, 2)
l := binary.PutUvarint(buf, x)
for i := 0; i < l; i++ {
b.writeByte(buf[i])
}
}
type bstreamReader struct {
stream []byte
streamOffset int // The offset from which read the next byte from the stream.
@ -267,9 +257,3 @@ func (b *bstreamReader) loadNextBuffer(nbits uint8) bool {
return true
}
// wrapper for the standard library's ReadUvarint to make it work
// with our bstream.
func (b *bstreamReader) readUvarint() (uint64, error) {
return binary.ReadUvarint(b)
}

View file

@ -183,29 +183,36 @@ func isWholeWhenMultiplied(in float64) bool {
return in == out
}
// putCustomBound writes the custom bound to the bstream. It stores values from 0 to
// 16.382 (inclusive) that are multiples of 0.001 in an unsigned var int of up to 2 bytes,
// but needs 1 bit + 8 bytes for other values like negative numbers, numbers greater than
// 16.382, or numbers that are not a multiple of 0.001, on the assumption that they are
// less common. In detail:
// putCustomBound writes a custom bound to the bstream. It stores values from
// 0 to 33554.430 (inclusive) that are multiples of 0.001 in unsigned varbit
// encoding of up to 4 bytes, but needs 1 bit + 8 bytes for other values like
// negative numbers, numbers greater than 33554.430, or numbers that are not
// a multiple of 0.001, on the assumption that they are less common. In detail:
// - Multiply the bound by 1000, without rounding.
// - If the multiplied bound is >= 0, <= 16382 and a whole number, store it as an
// unsigned var int.
// - Otherwise, store 0 as an unsigned var int, followed by the 8 bytes of the original
// - If the multiplied bound is >= 0, <= 33554430 and a whole number,
// add 1 and store it in unsigned varbit encoding. All these numbers are
// greater than 0, so the leading bit of the varbit is always 1!
// - Otherwise, store a 0 bit, followed by the 8 bytes of the original
// bound as a float64.
//
// When reading the values, we can first decode a value as unsigned varbit,
// if it's 0, then we read the next 8 bytes as a float64, otherwise
// we can convert the value to a float64 by subtracting 1 and dividing by 1000.
func putCustomBound(b *bstream, f float64) {
tf := f * 1000
if tf < 0 || tf > 16382 || !isWholeWhenMultiplied(f) {
b.putUvarint(0)
// 33554431-1 comes from the maximum that can be stored in a varint in 4
// bytes, other values are stored in 8 bytes anyway.
if tf < 0 || tf > 33554430 || !isWholeWhenMultiplied(f) {
b.writeBit(zero)
b.writeBits(math.Float64bits(f), 64)
return
}
b.putUvarint(uint64(math.Round(tf) + 1))
putVarbitUint(b, uint64(math.Round(tf))+1)
}
// readCustomBound reads the custom bound written with putCustomBound.
func readCustomBound(br *bstreamReader) (float64, error) {
b, err := br.readUvarint()
b, err := readVarbitUint(br)
if err != nil {
return 0, err
}

View file

@ -427,7 +427,7 @@ func TestWriteReadHistogramChunkLayout(t *testing.T) {
schema: histogram.CustomBucketsSchema,
positiveSpans: []histogram.Span{{Offset: -4, Length: 3}, {Offset: 2, Length: 42}},
negativeSpans: nil,
customValues: []float64{-5, -2.5, 0, 0.1, 0.25, 0.5, 1, 2, 5, 10, 25, 50, 100, 255, 500, 1000},
customValues: []float64{-5, -2.5, 0, 0.1, 0.25, 0.5, 1, 2, 5, 10, 25, 50, 100, 255, 500, 1000, 50000, 1e7},
},
{
schema: histogram.CustomBucketsSchema,