Compress the series chunk details in index.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
This commit is contained in:
Goutham Veeramachaneni 2017-09-08 00:35:28 +05:30
parent 0db4c227b7
commit afaf12fe45
No known key found for this signature in database
GPG key ID: F1C217E8E9023CAD
3 changed files with 66 additions and 30 deletions

View file

@ -82,28 +82,37 @@ The file offset to the beginning of a series serves as the series' ID in all sub
Every series entry first holds its number of labels, followed by tuples of symbol table references that contain the label name and value. The label pairs are lexicographically sorted.
After the labels, the number of indexed chunks is encoded, followed by a sequence of metadata entries containing the chunks minimum and maximum timestamp and a reference to its position in the chunk file. Holding the time range data in the index allows dropping chunks irrelevant to queried time ranges without accessing them directly.
mint of the first chunk is stored and the mint and maxt are encoded as deltas to the previous time. Similarly, the first delta is stored and the next ref is stored as a delta to the previous.
```
┌─────────────────────────────────────────────────────────┐
│ len <uvarint>
├─────────────────────────────────────────────────────────┤
│ ┌──────────────────┬──────────────────────────────────┐ │
│ │ │ ┌──────────────────────────┐ │ │
│ │ │ │ ref(l_i.name) <uvarint> │ │ │
│ │ #labels │ ├──────────────────────────┤ ... │ │
│ │ <uvarint> │ │ ref(l_i.value) <uvarint> │ │ │
│ │ │ └──────────────────────────┘ │ │
│ ├──────────────────┼──────────────────────────────────┤ │
│ │ │ ┌──────────────────────────┐ │ │
│ │ │ │ c_i.mint <varint> │ │ │
│ │ │ ├──────────────────────────┤ │ │
│ │ #chunks │ │ c_i.maxt <varint> │ │ │
│ │ <uvarint> │ ├──────────────────────────┤ ... │ │
│ │ │ │ ref(c_i.data) <uvarint> │ │ │
│ │ │ └──────────────────────────┘ │ │
│ └──────────────────┴──────────────────────────────────┘ │
├─────────────────────────────────────────────────────────┤
│ CRC32 <4b>
└─────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────────┐
│ len <uvarint>
├─────────────────────────────────────────────────────────────────────────┤
│ ┌──────────────────┬──────────────────────────────────────────────────┐ │
│ │ │ ┌──────────────────────────────────────────┐ │ │
│ │ │ │ ref(l_i.name) <uvarint> │ │ │
│ │ #labels │ ├──────────────────────────────────────────┤ ... │ │
│ │ <uvarint> │ │ ref(l_i.value) <uvarint> │ │ │
│ │ │ └──────────────────────────────────────────┘ │ │
│ ├──────────────────┼──────────────────────────────────────────────────┤ │
│ │ │ ┌──────────────────────────────────────────┐ │ │
│ │ │ │ c_0.mint <varint> │ │ │
│ │ │ ├──────────────────────────────────────────┤ │ │
│ │ │ │ c_0.maxt - c_0.mint <uvarint> │ │ │
│ │ │ ├──────────────────────────────────────────┤ │ │
│ │ │ │ ref(c_0.data) <uvarint> │ │ │
│ │ #chunks │ └──────────────────────────────────────────┘ │ │
│ │ <uvarint> │ ┌──────────────────────────────────────────┐ │ │
│ │ │ │ c_i.mint - c_i-1.maxt <uvarint> │ │ │
│ │ │ ├──────────────────────────────────────────┤ │ │
│ │ │ │ c_i.maxt - c_i.mint <uvarint> │ │ │
│ │ │ ├──────────────────────────────────────────┤ ... │ │
│ │ │ │ ref(c_i.data) - ref(c_i-1.data) <varint> │ │ │
│ │ │ └──────────────────────────────────────────┘ │ │
│ └──────────────────┴──────────────────────────────────────────────────┘ │
├─────────────────────────────────────────────────────────────────────────┤
│ CRC32 <4b>
└─────────────────────────────────────────────────────────────────────────┘
```

View file

@ -433,8 +433,6 @@ func TestDB_e2e(t *testing.T) {
mint := rand.Int63n(300)
maxt := mint + rand.Int63n(timeInterval*int64(numDatapoints))
t.Logf("run query %s, [%d, %d]", qry.ms, mint, maxt)
expected := map[string][]sample{}
// Build the mockSeriesSet.

View file

@ -292,10 +292,22 @@ func (w *indexWriter) AddSeries(ref uint64, lset labels.Labels, chunks ...ChunkM
w.buf2.putUvarint(len(chunks))
for _, c := range chunks {
if len(chunks) > 0 {
c := chunks[0]
w.buf2.putVarint64(c.MinTime)
w.buf2.putVarint64(c.MaxTime)
w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime))
w.buf2.putUvarint64(c.Ref)
t0 := c.MaxTime
ref0 := int64(c.Ref)
for _, c := range chunks[1:] {
w.buf2.putUvarint64(uint64(c.MinTime - t0))
w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime))
t0 = c.MaxTime
w.buf2.putVarint64(int64(c.Ref) - ref0)
ref0 = int64(c.Ref)
}
}
w.buf1.reset()
@ -775,17 +787,34 @@ func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta)
// Read the chunks meta data.
k = int(d2.uvarint())
for i := 0; i < k; i++ {
mint := d2.varint64()
maxt := d2.varint64()
off := d2.uvarint64()
if k == 0 {
return nil
}
t0 := d2.varint64()
maxt := int64(d2.uvarint64()) + t0
ref0 := int64(d2.uvarint64())
*chks = append(*chks, ChunkMeta{
Ref: uint64(ref0),
MinTime: t0,
MaxTime: maxt,
})
t0 = maxt
for i := 1; i < k; i++ {
mint := int64(d2.uvarint64()) + t0
maxt := int64(d2.uvarint64()) + mint
ref0 += d2.varint64()
t0 = maxt
if d2.err() != nil {
return errors.Wrapf(d2.err(), "read meta for chunk %d", i)
}
*chks = append(*chks, ChunkMeta{
Ref: off,
Ref: uint64(ref0),
MinTime: mint,
MaxTime: maxt,
})