Compress the series chunk details in index.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2025-03-05 20:59:13 -08:00 · 2017-09-08 00:35:28 +05:30 · 2017-09-08 00:35:28 +05:30 · afaf12fe45
parent 0db4c227b7
commit afaf12fe45
3 changed files with 66 additions and 30 deletions
--- a/Documentation/format/index.md
+++ b/Documentation/format/index.md
@ -82,28 +82,37 @@ The file offset to the beginning of a series serves as the series' ID in all sub
 Every series entry first holds its number of labels, followed by tuples of symbol table references that contain the label name and value. The label pairs are lexicographically sorted.  
 After the labels, the number of indexed chunks is encoded, followed by a sequence of metadata entries containing the chunks minimum and maximum timestamp and a reference to its position in the chunk file. Holding the time range data in the index allows dropping chunks irrelevant to queried time ranges without accessing them directly.

+mint of the first chunk is stored and the mint and maxt are encoded as deltas to the previous time. Similarly, the first delta is stored and the next ref is stored as a delta to the previous.
+
 ```
-┌─────────────────────────────────────────────────────────┐
+┌─────────────────────────────────────────────────────────────────────────┐
 │ len <uvarint>                                                           │
-├─────────────────────────────────────────────────────────┤
-│ ┌──────────────────┬──────────────────────────────────┐ │
-│ │                  │ ┌──────────────────────────┐     │ │
+├─────────────────────────────────────────────────────────────────────────┤
+│ ┌──────────────────┬──────────────────────────────────────────────────┐ │
+│ │                  │ ┌──────────────────────────────────────────┐     │ │
 │ │                  │ │ ref(l_i.name) <uvarint>                  │     │ │
-│ │     #labels      │ ├──────────────────────────┤ ... │ │
+│ │     #labels      │ ├──────────────────────────────────────────┤ ... │ │
 │ │    <uvarint>     │ │ ref(l_i.value) <uvarint>                 │     │ │
-│ │                  │ └──────────────────────────┘     │ │
-│ ├──────────────────┼──────────────────────────────────┤ │
-│ │                  │ ┌──────────────────────────┐     │ │
-│ │                  │ │ c_i.mint <varint>        │     │ │
-│ │                  │ ├──────────────────────────┤     │ │
-│ │      #chunks     │ │ c_i.maxt <varint>        │     │ │
-│ │     <uvarint>    │ ├──────────────────────────┤ ... │ │
-│ │                  │ │ ref(c_i.data) <uvarint>  │     │ │
-│ │                  │ └──────────────────────────┘     │ │
-│ └──────────────────┴──────────────────────────────────┘ │
-├─────────────────────────────────────────────────────────┤
+│ │                  │ └──────────────────────────────────────────┘     │ │
+│ ├──────────────────┼──────────────────────────────────────────────────┤ │
+│ │                  │ ┌──────────────────────────────────────────┐     │ │
+│ │                  │ │ c_0.mint <varint>                        │     │ │
+│ │                  │ ├──────────────────────────────────────────┤     │ │
+│ │                  │ │ c_0.maxt - c_0.mint <uvarint>            │     │ │
+│ │                  │ ├──────────────────────────────────────────┤     │ │
+│ │                  │ │ ref(c_0.data) <uvarint>                  │     │ │
+│ │      #chunks     │ └──────────────────────────────────────────┘     │ │
+│ │     <uvarint>    │ ┌──────────────────────────────────────────┐     │ │
+│ │                  │ │ c_i.mint - c_i-1.maxt <uvarint>          │     │ │
+│ │                  │ ├──────────────────────────────────────────┤     │ │
+│ │                  │ │ c_i.maxt - c_i.mint <uvarint>            │     │ │
+│ │                  │ ├──────────────────────────────────────────┤ ... │ │
+│ │                  │ │ ref(c_i.data) - ref(c_i-1.data) <varint> │     │ │
+│ │                  │ └──────────────────────────────────────────┘     │ │
+│ └──────────────────┴──────────────────────────────────────────────────┘ │
+├─────────────────────────────────────────────────────────────────────────┤
 │ CRC32 <4b>                                                              │
-└─────────────────────────────────────────────────────────┘
+└─────────────────────────────────────────────────────────────────────────┘
 ```


--- a/db_test.go
+++ b/db_test.go
@ -433,8 +433,6 @@ func TestDB_e2e(t *testing.T) {
 			mint := rand.Int63n(300)
 			maxt := mint + rand.Int63n(timeInterval*int64(numDatapoints))

-			t.Logf("run query %s, [%d, %d]", qry.ms, mint, maxt)
-
 			expected := map[string][]sample{}

 			// Build the mockSeriesSet.
--- a/index.go
+++ b/index.go
@ -292,10 +292,22 @@ func (w *indexWriter) AddSeries(ref uint64, lset labels.Labels, chunks ...ChunkM

 	w.buf2.putUvarint(len(chunks))

-	for _, c := range chunks {
+	if len(chunks) > 0 {
+		c := chunks[0]
 		w.buf2.putVarint64(c.MinTime)
-		w.buf2.putVarint64(c.MaxTime)
+		w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime))
 		w.buf2.putUvarint64(c.Ref)
+		t0 := c.MaxTime
+		ref0 := int64(c.Ref)
+
+		for _, c := range chunks[1:] {
+			w.buf2.putUvarint64(uint64(c.MinTime - t0))
+			w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime))
+			t0 = c.MaxTime
+
+			w.buf2.putVarint64(int64(c.Ref) - ref0)
+			ref0 = int64(c.Ref)
+		}
 	}

 	w.buf1.reset()
@ -775,17 +787,34 @@ func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta)
 	// Read the chunks meta data.
 	k = int(d2.uvarint())

-	for i := 0; i < k; i++ {
-		mint := d2.varint64()
-		maxt := d2.varint64()
-		off := d2.uvarint64()
+	if k == 0 {
+		return nil
+	}
+
+	t0 := d2.varint64()
+	maxt := int64(d2.uvarint64()) + t0
+	ref0 := int64(d2.uvarint64())
+
+	*chks = append(*chks, ChunkMeta{
+		Ref:     uint64(ref0),
+		MinTime: t0,
+		MaxTime: maxt,
+	})
+	t0 = maxt
+
+	for i := 1; i < k; i++ {
+		mint := int64(d2.uvarint64()) + t0
+		maxt := int64(d2.uvarint64()) + mint
+
+		ref0 += d2.varint64()
+		t0 = maxt

 		if d2.err() != nil {
 			return errors.Wrapf(d2.err(), "read meta for chunk %d", i)
 		}

 		*chks = append(*chks, ChunkMeta{
-			Ref:     off,
+			Ref:     uint64(ref0),
 			MinTime: mint,
 			MaxTime: maxt,
 		})