Merge pull request #9047 from Dieterbe/sparsehistogram_dieter

Sparsehistogram: SHS chunk recording and head cutting to head block
This commit is contained in:
Björn Rabenstein 2021-07-05 15:07:25 +02:00 committed by GitHub
commit 9cd0297fda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 48 deletions

View file

@ -17,6 +17,18 @@ import (
"math" "math"
) )
// SparseHistogram encodes a sparse histogram
// full details: https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit#
// the most tricky bit is how bucket indices represent real bucket boundaries
//
// an example for schema 0 (which doubles the size of consecutive buckets):
//
// buckets syntax (LE,GE) (-2,-1) (-1,-0.5) (-0.5,-0.25) ... (-0.001-0.001) ... (0.25-0.5)(0.5-1) (1-2) ....
// ^
// zero bucket (here width a width of 0.001) ZB
// pos bucket idx ... -1 0 1 2 3
// neg bucket idx 3 2 1 0 -1 ...
// actively used bucket indices themselves are represented by the spans
type SparseHistogram struct { type SparseHistogram struct {
Count, ZeroCount uint64 Count, ZeroCount uint64
Sum, ZeroThreshold float64 Sum, ZeroThreshold float64

View file

@ -126,7 +126,7 @@ func (c *HistoChunk) Appender() (Appender, error) {
return nil, err return nil, err
} }
a := &histoAppender{ a := &HistoAppender{
b: &c.b, b: &c.b,
schema: it.schema, schema: it.schema,
@ -192,7 +192,7 @@ func (c *HistoChunk) Iterator(it Iterator) Iterator {
return c.iterator(it) return c.iterator(it)
} }
type histoAppender struct { type HistoAppender struct {
b *bstream b *bstream
// Metadata: // Metadata:
@ -230,12 +230,36 @@ func putUvarint(b *bstream, buf []byte, x uint64) {
} }
} }
func (a *histoAppender) Append(int64, float64) {} func (a *HistoAppender) Append(int64, float64) {}
// Appendable returns whether the chunk can be appended to, and if so
// whether any recoding needs to happen using the provided interjections
// (in case of any new buckets, positive or negative range, respectively)
// The chunk is not appendable if:
// * the schema has changed
// * the zerobucket threshold has changed
// * any buckets disappeared
func (a *HistoAppender) Appendable(h histogram.SparseHistogram) ([]interjection, []interjection, bool) {
// TODO zerothreshold
if h.Schema != a.schema {
return nil, nil, false
}
posInterjections, ok := compareSpans(a.posSpans, h.PositiveSpans)
if !ok {
return nil, nil, false
}
negInterjections, ok := compareSpans(a.negSpans, h.NegativeSpans)
if !ok {
return nil, nil, false
}
return posInterjections, negInterjections, ok
}
// AppendHistogram appends a SparseHistogram to the chunk. We assume the // AppendHistogram appends a SparseHistogram to the chunk. We assume the
// histogram is properly structured. E.g. that the number of pos/neg buckets // histogram is properly structured. E.g. that the number of pos/neg buckets
// used corresponds to the number conveyed by the pos/neg span structures. // used corresponds to the number conveyed by the pos/neg span structures.
func (a *histoAppender) AppendHistogram(t int64, h histogram.SparseHistogram) { // callers must call Appendable() first and act accordingly!
func (a *HistoAppender) AppendHistogram(t int64, h histogram.SparseHistogram) {
var tDelta, cntDelta, zcntDelta int64 var tDelta, cntDelta, zcntDelta int64
num := binary.BigEndian.Uint16(a.b.bytes()) num := binary.BigEndian.Uint16(a.b.bytes())
@ -265,19 +289,6 @@ func (a *histoAppender) AppendHistogram(t int64, h histogram.SparseHistogram) {
putVarint(a.b, a.buf64, buck) putVarint(a.b, a.buf64, buck)
} }
case 1: case 1:
// TODO if zerobucket thresh or schema is different, we should create a new chunk
posInterjections, _ := compareSpans(a.posSpans, h.PositiveSpans)
//if !ok {
// TODO Ganesh this is when we know buckets have dis-appeared and we should create a new chunk instead
//}
negInterjections, _ := compareSpans(a.negSpans, h.NegativeSpans)
//if !ok {
// TODO Ganesh this is when we know buckets have dis-appeared and we should create a new chunk instead
//}
if len(posInterjections) > 0 || len(negInterjections) > 0 {
// new buckets have appeared. we need to recode all prior histograms within the chunk before we can process this one.
a.recode(posInterjections, negInterjections, h.PositiveSpans, h.NegativeSpans)
}
tDelta = t - a.t tDelta = t - a.t
cntDelta = int64(h.Count) - int64(a.cnt) cntDelta = int64(h.Count) - int64(a.cnt)
@ -300,19 +311,6 @@ func (a *histoAppender) AppendHistogram(t int64, h histogram.SparseHistogram) {
a.negbucketsDelta[i] = delta a.negbucketsDelta[i] = delta
} }
default: default:
// TODO if zerobucket thresh or schema is different, we should create a new chunk
posInterjections, _ := compareSpans(a.posSpans, h.PositiveSpans)
//if !ok {
// TODO Ganesh this is when we know buckets have dis-appeared and we should create a new chunk instead
//}
negInterjections, _ := compareSpans(a.negSpans, h.NegativeSpans)
//if !ok {
// TODO Ganesh this is when we know buckets have dis-appeared and we should create a new chunk instead
//}
if len(posInterjections) > 0 || len(negInterjections) > 0 {
// new buckets have appeared. we need to recode all prior histograms within the chunk before we can process this one.
a.recode(posInterjections, negInterjections, h.PositiveSpans, h.NegativeSpans)
}
tDelta = t - a.t tDelta = t - a.t
cntDelta = int64(h.Count) - int64(a.cnt) cntDelta = int64(h.Count) - int64(a.cnt)
zcntDelta = int64(h.ZeroCount) - int64(a.zcnt) zcntDelta = int64(h.ZeroCount) - int64(a.zcnt)
@ -357,13 +355,14 @@ func (a *histoAppender) AppendHistogram(t int64, h histogram.SparseHistogram) {
} }
// recode converts the current chunk to accommodate an expansion of the set of // Recode converts the current chunk to accommodate an expansion of the set of
// (positive and/or negative) buckets used, according to the provided interjections, resulting in // (positive and/or negative) buckets used, according to the provided interjections, resulting in
// the honoring of the provided new posSpans and negSpans // the honoring of the provided new posSpans and negSpans
// note: the decode-recode can probably be done more efficiently, but that's for a future optimization // note: the decode-recode can probably be done more efficiently, but that's for a future optimization
func (a *histoAppender) recode(posInterjections, negInterjections []interjection, posSpans, negSpans []histogram.Span) { func (a *HistoAppender) Recode(posInterjections, negInterjections []interjection, posSpans, negSpans []histogram.Span) (Chunk, Appender) {
it := newHistoIterator(a.b.bytes()) it := newHistoIterator(a.b.bytes())
app, err := NewHistoChunk().Appender() hc := NewHistoChunk()
app, err := hc.Appender()
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -381,20 +380,13 @@ func (a *histoAppender) recode(posInterjections, negInterjections []interjection
if len(negInterjections) > 0 { if len(negInterjections) > 0 {
hOld.NegativeBuckets = interject(hOld.NegativeBuckets, negbuckets, negInterjections) hOld.NegativeBuckets = interject(hOld.NegativeBuckets, negbuckets, negInterjections)
} }
// there is no risk of infinite recursion here as all histograms get appended with the same schema (number of buckets)
app.AppendHistogram(tOld, hOld) app.AppendHistogram(tOld, hOld)
} }
// adopt the new appender into ourselves return hc, app
// we skip porting some fields like schema, t, cnt and zcnt, sum because they didn't change between our old chunk and the recoded one
app2 := app.(*histoAppender)
a.b = app2.b
a.posSpans, a.negSpans = posSpans, negSpans
a.posbuckets, a.negbuckets = app2.posbuckets, app2.negbuckets
a.posbucketsDelta, a.negbucketsDelta = app2.posbucketsDelta, app2.negbucketsDelta
} }
func (a *histoAppender) writeSumDelta(v float64) { func (a *HistoAppender) writeSumDelta(v float64) {
vDelta := math.Float64bits(v) ^ math.Float64bits(a.sum) vDelta := math.Float64bits(v) ^ math.Float64bits(a.sum)
if vDelta == 0 { if vDelta == 0 {
@ -434,11 +426,11 @@ type histoIterator struct {
numTotal uint16 numTotal uint16
numRead uint16 numRead uint16
// Meta // Metadata:
schema int32 schema int32
posSpans, negSpans []histogram.Span posSpans, negSpans []histogram.Span
// for the fields that are tracked as dod's // For the fields that are tracked as dod's.
t int64 t int64
cnt, zcnt uint64 cnt, zcnt uint64
tDelta, cntDelta, zcntDelta int64 tDelta, cntDelta, zcntDelta int64
@ -446,7 +438,7 @@ type histoIterator struct {
posbuckets, negbuckets []int64 posbuckets, negbuckets []int64
posbucketsDelta, negbucketsDelta []int64 posbucketsDelta, negbucketsDelta []int64
// for the fields that are gorilla xor coded // The sum is Gorilla xor encoded.
sum float64 sum float64
leading uint8 leading uint8
trailing uint8 trailing uint8
@ -640,6 +632,7 @@ func (it *histoIterator) Next() bool {
it.negbuckets[i] = it.negbuckets[i] + delta it.negbuckets[i] = it.negbuckets[i] + delta
} }
it.numRead++
return true return true
} }
@ -692,6 +685,7 @@ func (it *histoIterator) Next() bool {
it.negbuckets[i] = it.negbuckets[i] + it.negbucketsDelta[i] it.negbuckets[i] = it.negbuckets[i] + it.negbucketsDelta[i]
} }
it.numRead++
return true return true
} }
@ -760,6 +754,5 @@ func (it *histoIterator) readSum() bool {
it.sum = math.Float64frombits(vbits) it.sum = math.Float64frombits(vbits)
} }
it.numRead++
return true return true
} }

View file

@ -191,7 +191,7 @@ func TestHistoChunkBucketChanges(t *testing.T) {
// TODO is this okay? // TODO is this okay?
// the appender can rewrite its own bytes slice but it is not able to update the HistoChunk, so our histochunk is outdated until we update it manually // the appender can rewrite its own bytes slice but it is not able to update the HistoChunk, so our histochunk is outdated until we update it manually
c.b = *(app.(*histoAppender).b) c.b = *(app.(*HistoAppender).b)
require.Equal(t, 2, c.NumSamples()) require.Equal(t, 2, c.NumSamples())
// because the 2nd histogram has expanded buckets, we should expect all histograms (in particular the first) // because the 2nd histogram has expanded buckets, we should expect all histograms (in particular the first)

View file

@ -150,7 +150,7 @@ type xorAppender struct {
} }
func (a *xorAppender) AppendHistogram(t int64, h histogram.SparseHistogram) { func (a *xorAppender) AppendHistogram(t int64, h histogram.SparseHistogram) {
panic("cannot call xorAppender.AppendHistogram().") //panic("cannot call xorAppender.AppendHistogram().")
} }
func (a *xorAppender) Append(t int64, v float64) { func (a *xorAppender) Append(t int64, v float64) {

View file

@ -2624,6 +2624,24 @@ func (s *memSeries) appendHistogram(t int64, sh histogram.SparseHistogram, appen
return sampleInOrder, chunkCreated return sampleInOrder, chunkCreated
} }
if !chunkCreated {
// Head controls the execution of recoding, so that we own the proper chunk reference afterwards
app, _ := s.app.(*chunkenc.HistoAppender)
posInterjections, negInterjections, ok := app.Appendable(sh)
// we have 3 cases here
// !ok -> we need to cut a new chunk
// ok but we have interjections -> existing chunk needs recoding before we can append our histogram
// ok and no interjections -> chunk is ready to support our histogram
if !ok {
c = s.cutNewHeadChunk(t, chunkenc.EncSHS, chunkDiskMapper)
chunkCreated = true
} else if len(posInterjections) > 0 || len(negInterjections) > 0 {
// new buckets have appeared. we need to recode all prior histograms within the chunk before we can process this one.
s.headChunk.chunk, s.app = app.Recode(posInterjections, negInterjections, sh.PositiveSpans, sh.NegativeSpans)
}
}
s.app.AppendHistogram(t, sh) s.app.AppendHistogram(t, sh)
c.maxTime = t c.maxTime = t