mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Fix precision loss.
Large delta values often imply a difference between a large base value and the large delta value, potentially resulting in small numbers with a huge precision error. Since large delta values need 8 bytes anyway, we are not even saving memory. As a solution, always save the absoluto value rather than a delta once 8 bytes would be needed for the delta. Timestamps are then saved as 8 byte integers, while values are always saved as float64 in that case. Change-Id: I01100d600515e16df58ce508b50982ffd762cc49
This commit is contained in:
parent
dc2e463a97
commit
d742edfe0d
|
@ -60,7 +60,7 @@ func chunkType(c chunk) byte {
|
|||
func chunkForType(chunkType byte) chunk {
|
||||
switch chunkType {
|
||||
case 0:
|
||||
return newDeltaEncodedChunk(1, 1, false)
|
||||
return newDeltaEncodedChunk(d1, d0, true)
|
||||
default:
|
||||
panic("unknown chunk type")
|
||||
}
|
||||
|
|
|
@ -41,6 +41,11 @@ const (
|
|||
deltaHeaderBufLenOffset = 19
|
||||
)
|
||||
|
||||
// A deltaEncodedChunk adaptively stores sample timestamps and values with a
|
||||
// delta encoding of various types (int, float) and bit width. However, once 8
|
||||
// bytes would be needed to encode a delta value, a fall-back to the absolute
|
||||
// numbers happens (so that timestamps are saved directly as int64 and values as
|
||||
// float64).
|
||||
type deltaEncodedChunk struct {
|
||||
buf []byte
|
||||
}
|
||||
|
@ -51,7 +56,7 @@ func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool) *deltaEncodedChunk {
|
|||
|
||||
buf[deltaHeaderTimeBytesOffset] = byte(tb)
|
||||
buf[deltaHeaderValueBytesOffset] = byte(vb)
|
||||
if isInt {
|
||||
if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes.
|
||||
buf[deltaHeaderIsIntOffset] = 1
|
||||
} else {
|
||||
buf[deltaHeaderIsIntOffset] = 0
|
||||
|
@ -77,35 +82,35 @@ func (c *deltaEncodedChunk) clone() chunk {
|
|||
}
|
||||
|
||||
func neededDeltaBytes(deltaT clientmodel.Timestamp, deltaV clientmodel.SampleValue, isInt bool) (dtb, dvb deltaBytes) {
|
||||
dtb = 1
|
||||
dtb = d1
|
||||
if deltaT >= 256 {
|
||||
dtb = 2
|
||||
dtb = d2
|
||||
}
|
||||
if deltaT >= 256*256 {
|
||||
dtb = 4
|
||||
dtb = d4
|
||||
}
|
||||
if deltaT >= 256*256*256*256 {
|
||||
dtb = 8
|
||||
dtb = d8
|
||||
}
|
||||
|
||||
if isInt {
|
||||
dvb = 0
|
||||
dvb = d0
|
||||
if deltaV != 0 {
|
||||
dvb = 1
|
||||
dvb = d1
|
||||
}
|
||||
if deltaV < -(256/2) || deltaV > (256/2)-1 {
|
||||
dvb = 2
|
||||
dvb = d2
|
||||
}
|
||||
if deltaV < -(256*256/2) || deltaV > (256*256/2)-1 {
|
||||
dvb = 4
|
||||
dvb = d4
|
||||
}
|
||||
if deltaV < -(256*256*256*256/2) || deltaV > (256*256*256*256/2)-1 {
|
||||
dvb = 8
|
||||
dvb = d8
|
||||
}
|
||||
} else {
|
||||
dvb = 4
|
||||
dvb = d4
|
||||
if clientmodel.SampleValue(float32(deltaV)) != deltaV {
|
||||
dvb = 8
|
||||
dvb = d8
|
||||
}
|
||||
}
|
||||
return dtb, dvb
|
||||
|
@ -149,8 +154,7 @@ func (c *deltaEncodedChunk) add(s *metric.SamplePair) chunks {
|
|||
sampleSize := c.sampleSize()
|
||||
|
||||
// Do we generally have space for another sample in this chunk? If not,
|
||||
// overflow into a new one. We assume that if we have seen floating point
|
||||
// values once, the series will most likely contain floats in the future.
|
||||
// overflow into a new one.
|
||||
if remainingBytes < sampleSize {
|
||||
//fmt.Println("overflow")
|
||||
overflowChunks := c.newFollowupChunk().add(s)
|
||||
|
@ -159,6 +163,8 @@ func (c *deltaEncodedChunk) add(s *metric.SamplePair) chunks {
|
|||
|
||||
dt := s.Timestamp - c.baseTime()
|
||||
dv := s.Value - c.baseValue()
|
||||
tb := c.timeBytes()
|
||||
vb := c.valueBytes()
|
||||
|
||||
// If the new sample is incompatible with the current encoding, reencode the
|
||||
// existing chunk data into new chunk(s).
|
||||
|
@ -166,61 +172,65 @@ func (c *deltaEncodedChunk) add(s *metric.SamplePair) chunks {
|
|||
// int->float.
|
||||
// TODO: compare speed with Math.Modf.
|
||||
if c.isInt() && clientmodel.SampleValue(int64(dv)) != dv {
|
||||
//fmt.Println("int->float", len(c.buf), cap(c.buf))
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(c.timeBytes(), d4, false), c, s)
|
||||
//fmt.Println("int->float", len(c.buf), cap(c.buf), dv)
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(tb, d4, false), c, s)
|
||||
}
|
||||
// float32->float64.
|
||||
if !c.isInt() && c.valueBytes() == d4 && clientmodel.SampleValue(float32(dv)) != dv {
|
||||
if !c.isInt() && vb == d4 && clientmodel.SampleValue(float32(dv)) != dv {
|
||||
//fmt.Println("float32->float64", float32(dv), dv, len(c.buf), cap(c.buf))
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(c.timeBytes(), d8, false), c, s)
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(tb, d8, false), c, s)
|
||||
}
|
||||
// More bytes per sample.
|
||||
if dtb, dvb := neededDeltaBytes(dt, dv, c.isInt()); dtb > c.timeBytes() || dvb > c.valueBytes() {
|
||||
//fmt.Printf("transcoding T: %v->%v, V: %v->%v, I: %v; len %v, cap %v\n", c.timeBytes(), dtb, c.valueBytes(), dvb, c.isInt(), len(c.buf), cap(c.buf))
|
||||
dtb = max(dtb, c.timeBytes())
|
||||
dvb = max(dvb, c.valueBytes())
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(dtb, dvb, c.isInt()), c, s)
|
||||
if tb < d8 || vb < d8 {
|
||||
// Maybe more bytes per sample.
|
||||
if ntb, nvb := neededDeltaBytes(dt, dv, c.isInt()); ntb > tb || nvb > vb {
|
||||
//fmt.Printf("transcoding T: %v->%v, V: %v->%v, I: %v; len %v, cap %v\n", tb, ntb, vb, nvb, c.isInt(), len(c.buf), cap(c.buf))
|
||||
ntb = max(ntb, tb)
|
||||
nvb = max(nvb, vb)
|
||||
return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, c.isInt()), c, s)
|
||||
}
|
||||
}
|
||||
|
||||
offset := len(c.buf)
|
||||
c.buf = c.buf[:offset+sampleSize]
|
||||
|
||||
switch c.timeBytes() {
|
||||
case 1:
|
||||
switch tb {
|
||||
case d1:
|
||||
c.buf[offset] = byte(dt)
|
||||
case 2:
|
||||
case d2:
|
||||
binary.LittleEndian.PutUint16(c.buf[offset:], uint16(dt))
|
||||
case 4:
|
||||
case d4:
|
||||
binary.LittleEndian.PutUint32(c.buf[offset:], uint32(dt))
|
||||
case 8:
|
||||
binary.LittleEndian.PutUint64(c.buf[offset:], uint64(dt))
|
||||
case d8:
|
||||
// Store the absolute value (no delta) in case of d8.
|
||||
binary.LittleEndian.PutUint64(c.buf[offset:], uint64(s.Timestamp))
|
||||
default:
|
||||
panic("invalid number of bytes for time delta")
|
||||
}
|
||||
|
||||
offset += int(c.timeBytes())
|
||||
offset += int(tb)
|
||||
|
||||
if c.isInt() {
|
||||
switch c.valueBytes() {
|
||||
case 0:
|
||||
switch vb {
|
||||
case d0:
|
||||
// No-op. Constant value is stored as base value.
|
||||
case 1:
|
||||
case d1:
|
||||
c.buf[offset] = byte(dv)
|
||||
case 2:
|
||||
case d2:
|
||||
binary.LittleEndian.PutUint16(c.buf[offset:], uint16(dv))
|
||||
case 4:
|
||||
case d4:
|
||||
binary.LittleEndian.PutUint32(c.buf[offset:], uint32(dv))
|
||||
case 8:
|
||||
binary.LittleEndian.PutUint64(c.buf[offset:], uint64(dv))
|
||||
// d8 must not happen. Those samples are encoded as float64.
|
||||
default:
|
||||
panic("Invalid number of bytes for integer delta")
|
||||
panic("invalid number of bytes for integer delta")
|
||||
}
|
||||
} else {
|
||||
switch c.valueBytes() {
|
||||
case 4:
|
||||
switch vb {
|
||||
case d4:
|
||||
binary.LittleEndian.PutUint32(c.buf[offset:], math.Float32bits(float32(dv)))
|
||||
case 8:
|
||||
binary.LittleEndian.PutUint64(c.buf[offset:], math.Float64bits(float64(dv)))
|
||||
case d8:
|
||||
// Store the absolute value (no delta) in case of d8.
|
||||
binary.LittleEndian.PutUint64(c.buf[offset:], math.Float64bits(float64(s.Value)))
|
||||
default:
|
||||
panic("Invalid number of bytes for floating point delta")
|
||||
panic("invalid number of bytes for floating point delta")
|
||||
}
|
||||
}
|
||||
return chunks{c}
|
||||
|
@ -258,49 +268,52 @@ func (c *deltaEncodedChunk) values() <-chan *metric.SamplePair {
|
|||
func (c *deltaEncodedChunk) valueAtIndex(idx int) *metric.SamplePair {
|
||||
offset := deltaHeaderBytes + idx*c.sampleSize()
|
||||
|
||||
var dt uint64
|
||||
var ts clientmodel.Timestamp
|
||||
switch c.timeBytes() {
|
||||
case 1:
|
||||
dt = uint64(uint8(c.buf[offset]))
|
||||
case 2:
|
||||
dt = uint64(binary.LittleEndian.Uint16(c.buf[offset:]))
|
||||
case 4:
|
||||
dt = uint64(binary.LittleEndian.Uint32(c.buf[offset:]))
|
||||
case 8:
|
||||
dt = uint64(binary.LittleEndian.Uint64(c.buf[offset:]))
|
||||
case d1:
|
||||
ts = c.baseTime() + clientmodel.Timestamp(uint8(c.buf[offset]))
|
||||
case d2:
|
||||
ts = c.baseTime() + clientmodel.Timestamp(binary.LittleEndian.Uint16(c.buf[offset:]))
|
||||
case d4:
|
||||
ts = c.baseTime() + clientmodel.Timestamp(binary.LittleEndian.Uint32(c.buf[offset:]))
|
||||
case d8:
|
||||
// Take absolute value for d8.
|
||||
ts = clientmodel.Timestamp(binary.LittleEndian.Uint64(c.buf[offset:]))
|
||||
default:
|
||||
panic("Invalid number of bytes for time delta")
|
||||
}
|
||||
|
||||
offset += int(c.timeBytes())
|
||||
|
||||
var dv clientmodel.SampleValue
|
||||
var v clientmodel.SampleValue
|
||||
if c.isInt() {
|
||||
switch c.valueBytes() {
|
||||
case 0:
|
||||
dv = clientmodel.SampleValue(0)
|
||||
case 1:
|
||||
dv = clientmodel.SampleValue(int8(c.buf[offset]))
|
||||
case 2:
|
||||
dv = clientmodel.SampleValue(int16(binary.LittleEndian.Uint16(c.buf[offset:])))
|
||||
case 4:
|
||||
dv = clientmodel.SampleValue(int32(binary.LittleEndian.Uint32(c.buf[offset:])))
|
||||
case 8:
|
||||
dv = clientmodel.SampleValue(int64(binary.LittleEndian.Uint64(c.buf[offset:])))
|
||||
case d0:
|
||||
v = c.baseValue()
|
||||
case d1:
|
||||
v = c.baseValue() + clientmodel.SampleValue(int8(c.buf[offset]))
|
||||
case d2:
|
||||
v = c.baseValue() + clientmodel.SampleValue(int16(binary.LittleEndian.Uint16(c.buf[offset:])))
|
||||
case d4:
|
||||
v = c.baseValue() + clientmodel.SampleValue(int32(binary.LittleEndian.Uint32(c.buf[offset:])))
|
||||
// No d8 for ints.
|
||||
default:
|
||||
panic("Invalid number of bytes for integer delta")
|
||||
}
|
||||
} else {
|
||||
switch c.valueBytes() {
|
||||
case 4:
|
||||
dv = clientmodel.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(c.buf[offset:])))
|
||||
case 8:
|
||||
dv = clientmodel.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c.buf[offset:])))
|
||||
case d4:
|
||||
v = c.baseValue() + clientmodel.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(c.buf[offset:])))
|
||||
case d8:
|
||||
// Take absolute value for d8.
|
||||
v = clientmodel.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c.buf[offset:])))
|
||||
default:
|
||||
panic("Invalid number of bytes for floating point delta")
|
||||
}
|
||||
}
|
||||
return &metric.SamplePair{
|
||||
Timestamp: c.baseTime() + clientmodel.Timestamp(dt),
|
||||
Value: c.baseValue() + dv,
|
||||
Timestamp: ts,
|
||||
Value: v,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ func TestPersistChunk(t *testing.T) {
|
|||
|
||||
for fp, expectedChunks := range fpToChunks {
|
||||
indexes := make([]int, 0, len(expectedChunks))
|
||||
for i, _ := range expectedChunks {
|
||||
for i := range expectedChunks {
|
||||
indexes = append(indexes, i)
|
||||
}
|
||||
actualChunks, err := p.LoadChunks(fp, indexes)
|
||||
|
|
|
@ -324,9 +324,8 @@ func createRandomSamples(r *rand.Rand) clientmodel.Samples {
|
|||
},
|
||||
},
|
||||
{ // Integer with int deltas of various byte length.
|
||||
// TODO: Using larger ints yields even worse results. Improve!
|
||||
createValue: func() clientmodel.SampleValue {
|
||||
return clientmodel.SampleValue(r.Int31() - 1<<30)
|
||||
return clientmodel.SampleValue(r.Int63() - 1<<62)
|
||||
},
|
||||
applyDelta: []deltaApplier{
|
||||
func(v clientmodel.SampleValue) clientmodel.SampleValue {
|
||||
|
@ -435,8 +434,7 @@ func verifyStorage(t *testing.T, s Storage, samples clientmodel.Samples, r *rand
|
|||
}
|
||||
want := float64(sample.Value)
|
||||
got := float64(found[0].Value)
|
||||
// TODO: 0.01 is a horribly large deviation. Improve!
|
||||
if want != got && (want == 0. || math.Abs(want-got)/want > 0.01) {
|
||||
if want != got && (want == 0. || math.Abs(want-got)/want > 0.000001) {
|
||||
t.Errorf("Value mismatch, want %f, got %f.", want, got)
|
||||
result = false
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue