Query histograms from TSDB and unit test for append+query (#9022)

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
This commit is contained in:
Ganesh Vernekar 2021-06-30 20:18:13 +05:30 committed by GitHub
parent 4d27816ea5
commit f4d3af73f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 142 additions and 8 deletions

View file

@ -300,6 +300,10 @@ func (ssi *storageSeriesIterator) AtHistogram() (int64, histogram.SparseHistogra
return 0, histogram.SparseHistogram{}
}
func (ssi *storageSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (ssi *storageSeriesIterator) Next() bool {
ssi.curr++
return ssi.curr < len(ssi.points)

View file

@ -202,6 +202,10 @@ func (it *sampleRingIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
}
func (it *sampleRingIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (r *sampleRing) at(i int) (int64, float64) {
j := (r.f + i) % len(r.buf)
s := r.buf[j]

View file

@ -18,6 +18,7 @@ import (
"testing"
"github.com/prometheus/prometheus/pkg/histogram"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/stretchr/testify/require"
)
@ -198,6 +199,9 @@ func (m *mockSeriesIterator) At() (int64, float64) { return m.at() }
func (m *mockSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
}
func (m *mockSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (m *mockSeriesIterator) Next() bool { return m.next() }
func (m *mockSeriesIterator) Err() error { return m.err() }
@ -219,6 +223,10 @@ func (it *fakeSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return it.idx * it.step, histogram.SparseHistogram{} // value doesn't matter
}
func (it *fakeSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (it *fakeSeriesIterator) Next() bool {
it.idx++
return it.idx < it.nsamples

View file

@ -489,6 +489,13 @@ func (c *chainSampleIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return c.curr.AtHistogram()
}
func (c *chainSampleIterator) ChunkEncoding() chunkenc.Encoding {
if c.curr == nil {
panic("chainSampleIterator.ChunkEncoding() called before first .Next() or after .Next() returned false.")
}
return c.curr.ChunkEncoding()
}
func (c *chainSampleIterator) Next() bool {
if c.h == nil {
c.h = samplesIteratorHeap{}

View file

@ -373,6 +373,10 @@ func (c *concreteSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram
return 0, histogram.SparseHistogram{}
}
func (c *concreteSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
// Next implements storage.SeriesIterator.
func (c *concreteSeriesIterator) Next() bool {
c.cur++

View file

@ -95,6 +95,10 @@ func (it *listSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
}
func (it *listSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (it *listSeriesIterator) Next() bool {
it.idx++
return it.idx < it.samples.Len()

View file

@ -103,6 +103,8 @@ type Iterator interface {
// Err returns the current error. It should be used only after iterator is
// exhausted, that is `Next` or `Seek` returns false.
Err() error
// ChunkEncoding returns the encoding of the chunk that it is iterating.
ChunkEncoding() Encoding
}
// NewNopIterator returns a new chunk iterator that does not hold any data.
@ -119,6 +121,7 @@ func (nopIterator) AtHistogram() (int64, histogram.SparseHistogram) {
}
func (nopIterator) Next() bool { return false }
func (nopIterator) Err() error { return nil }
func (nopIterator) ChunkEncoding() Encoding { return EncNone }
// Pool is used to create and reuse chunk references to avoid allocations.
type Pool interface {

View file

@ -403,6 +403,10 @@ func (it *histoIterator) At() (int64, float64) {
panic("cannot call histoIterator.At().")
}
func (it *histoIterator) ChunkEncoding() Encoding {
return EncSHS
}
func (it *histoIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return it.t, histogram.SparseHistogram{
Count: it.cnt,

View file

@ -281,6 +281,10 @@ func (it *xorIterator) AtHistogram() (int64, histogram.SparseHistogram) {
panic("cannot call xorIterator.AtHistogram().")
}
func (it *xorIterator) ChunkEncoding() Encoding {
return EncXOR
}
func (it *xorIterator) Err() error {
return it.err
}

View file

@ -2678,7 +2678,7 @@ func computeChunkEndTime(start, cur, max int64) int64 {
// iterator returns a chunk iterator.
// It is unsafe to call this concurrently with s.append(...) without holding the series lock.
func (s *memSeries) iterator(id int, isoState *isolationState, chunkDiskMapper *chunks.ChunkDiskMapper, it chunkenc.Iterator) chunkenc.Iterator {
func (s *memSeries) iterator(id int, isoState *isolationState, chunkDiskMapper *chunks.ChunkDiskMapper, it chunkenc.Iterator) (ttt chunkenc.Iterator) {
c, garbageCollect, err := s.chunk(id, chunkDiskMapper)
// TODO(fabxc): Work around! An error will be returns when a querier have retrieved a pointer to a
// series's chunk, which got then garbage collected before it got
@ -2837,7 +2837,7 @@ func (it *memSafeIterator) Next() bool {
return false
}
it.i++
if it.total-it.i > 4 {
if it.Iterator.ChunkEncoding() == chunkenc.EncSHS || it.total-it.i > 4 {
return it.Iterator.Next()
}
return true

View file

@ -31,6 +31,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/pkg/exemplar"
"github.com/prometheus/prometheus/pkg/histogram"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
@ -2177,3 +2178,67 @@ func TestMemSafeIteratorSeekIntoBuffer(t *testing.T) {
ok = it.Seek(7)
require.False(t, ok)
}
func TestAppendHistogram(t *testing.T) {
l := labels.Labels{{Name: "a", Value: "b"}}
for _, numHistograms := range []int{1, 10, 150, 200, 250, 300} {
t.Run(fmt.Sprintf("%d", numHistograms), func(t *testing.T) {
head, _ := newTestHead(t, 1000, false)
t.Cleanup(func() {
require.NoError(t, head.Close())
})
require.NoError(t, head.Init(0))
app := head.Appender(context.Background())
type timedHist struct {
t int64
h histogram.SparseHistogram
}
expHists := make([]timedHist, 0, numHistograms)
for i, h := range generateHistograms(numHistograms) {
_, err := app.AppendHistogram(0, l, int64(i), h)
require.NoError(t, err)
expHists = append(expHists, timedHist{int64(i), h})
}
require.NoError(t, app.Commit())
q, err := NewBlockQuerier(head, head.MinTime(), head.MaxTime())
require.NoError(t, err)
ss := q.Select(false, nil, labels.MustNewMatcher(labels.MatchEqual, "a", "b"))
require.True(t, ss.Next())
s := ss.At()
require.False(t, ss.Next())
it := s.Iterator()
actHists := make([]timedHist, 0, len(expHists))
for it.Next() {
t, h := it.AtHistogram()
actHists = append(actHists, timedHist{t, h.Copy()})
}
require.Equal(t, expHists, actHists)
})
}
}
func generateHistograms(n int) (r []histogram.SparseHistogram) {
for i := 0; i < n; i++ {
r = append(r, histogram.SparseHistogram{
Count: 5 + uint64(i*4),
ZeroCount: 2 + uint64(i),
Sum: 18.4 * float64(i+1),
Schema: 1,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{int64(i + 1), 1, -1, 0},
NegativeBuckets: []int64{},
})
}
return r
}

View file

@ -633,7 +633,10 @@ func (p *populateWithDelSeriesIterator) Seek(t int64) bool {
func (p *populateWithDelSeriesIterator) At() (int64, float64) { return p.curr.At() }
func (p *populateWithDelSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
return p.curr.AtHistogram()
}
func (p *populateWithDelSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return p.curr.ChunkEncoding()
}
func (p *populateWithDelSeriesIterator) Err() error {
@ -823,7 +826,12 @@ func (it *DeletedIterator) At() (int64, float64) {
}
func (it *DeletedIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
t, h := it.Iter.AtHistogram()
return t, h
}
func (it *DeletedIterator) ChunkEncoding() chunkenc.Encoding {
return it.Iter.ChunkEncoding()
}
func (it *DeletedIterator) Seek(t int64) bool {
@ -835,7 +843,12 @@ func (it *DeletedIterator) Seek(t int64) bool {
}
// Now double check if the entry falls into a deleted interval.
ts, _ := it.At()
var ts int64
if it.ChunkEncoding() == chunkenc.EncSHS {
ts, _ = it.AtHistogram()
} else {
ts, _ = it.At()
}
for _, itv := range it.Intervals {
if ts < itv.Mint {
return true
@ -857,7 +870,12 @@ func (it *DeletedIterator) Seek(t int64) bool {
func (it *DeletedIterator) Next() bool {
Outer:
for it.Iter.Next() {
ts, _ := it.Iter.At()
var ts int64
if it.ChunkEncoding() == chunkenc.EncSHS {
ts, _ = it.AtHistogram()
} else {
ts, _ = it.At()
}
for _, tr := range it.Intervals {
if tr.InBounds(ts) {

View file

@ -164,6 +164,10 @@ func (it *sampleRingIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
}
func (it *sampleRingIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (r *sampleRing) at(i int) (int64, float64) {
j := (r.f + i) % len(r.buf)
s := r.buf[j]

View file

@ -19,6 +19,7 @@ import (
"testing"
"github.com/prometheus/prometheus/pkg/histogram"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/stretchr/testify/require"
)
@ -155,6 +156,10 @@ func (it *listSeriesIterator) AtHistogram() (int64, histogram.SparseHistogram) {
return 0, histogram.SparseHistogram{}
}
func (it *listSeriesIterator) ChunkEncoding() chunkenc.Encoding {
return chunkenc.EncXOR
}
func (it *listSeriesIterator) Next() bool {
it.idx++
return it.idx < len(it.list)