Add padding between fixed-sized index sections

2025-03-05 20:59:13 -08:00 · 2017-04-28 14:28:25 +02:00 · 2017-04-28 14:28:25 +02:00 · 2032a11d98
parent 34ba92eeeb
commit 2032a11d98
3 changed files with 33 additions and 14 deletions
--- a/Documentation/format/chunks.md
+++ b/Documentation/format/chunks.md
@ -5,11 +5,11 @@ The following describes the format of a single chunks file, which is created in
 Chunks in the files are referenced from the index by the in-file offset in the 4 LSB and the segment sequence number in the bigher 4 MSBs.

 ```
-┌────────────────────────────────────────┬─────────────────────┐
+┌────────────────────────────────────────┬──────────────────────┐
 │ magic(0x85BD40DD) <4 byte>             │ version(1) <1 byte>  │
-├────────────────────────────────────────┴─────────────────────┤
-│ ┌──────────────┬───────────────────┬──────┬────────────────┐ │
-│ │ len <varint> │ encoding <1 byte> │ data │ CRC32 <4 byte> │ │
-│ └──────────────┴───────────────────┴──────┴────────────────┘ │
-└──────────────────────────────────────────────────────────────┘
+├────────────────────────────────────────┴──────────────────────┤
+│ ┌───────────────┬───────────────────┬──────┬────────────────┐ │
+│ │ len <uvarint> │ encoding <1 byte> │ data │ CRC32 <4 byte> │ │
+│ └───────────────┴───────────────────┴──────┴────────────────┘ │
+└───────────────────────────────────────────────────────────────┘
 ```
--- a/Documentation/format/index.md
+++ b/Documentation/format/index.md
@ -33,6 +33,10 @@ It is terminated by a table of contents which serves as an entry point into the
 └──────────────────────────────────────────────────┘
 ```

+When the index is written, an arbitrary number of padding bytes may be added between the lined out main sections above. When sequentially scanning through the file, any zero bytes after a section's specified length must be skipped.
+
+Most of the sections described below start with a `len` field. It always specifies the number of bytes after them up until the trailing CRC32 checksum. The checksum is always calculated over those `len` bytes.
+

 ### Symbol Table

@ -41,8 +45,6 @@ The symbol table holds a sorted list of deduplicated strings that occurred in la
 The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes.
 Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order.

-The full list of strings is validated with a CRC32 checksum.
-
 ```
 ┌────────────────────┬─────────────────────┐
 │ len <4b>           │ #symbols <4b>       │
@ -81,11 +83,10 @@ The file offset to the beginning of a series serves as the series' ID in all sub

 Every series entry first holds its number of labels, followed by tuples of symbol table references that resemble label name and value. The label pairs are lexicographically sorted.  
 After the labels, the number of indexed chunks is encoded, followed by a sequence of metadata entries containing the chunks minimum and maximum timestamp and a reference to its position in the chunk file. Holding the time range data in the index allows dropping chunks irrelevant to queried time ranges without accessing them directly.  
-The series entry is prefixed with its length and terminated by a CRC32 checksum over its contents.

 ```
 ┌─────────────────────────────────────────────────────────┐
-│ len <varint>                                            │
+│ len <uvarint>                                           │
 ├─────────────────────────────────────────────────────────┤
 │ ┌──────────────────┬──────────────────────────────────┐ │
 │ │                  │ ┌──────────────────────────┐     │ │
--- a/index.go
+++ b/index.go
@ -176,7 +176,7 @@ func (w *indexWriter) write(bufs ...[]byte) error {
 			return err
 		}
 		// For now the index file must not grow beyond 4GiB. Some of the fixed-sized
-		// offset references in v1 are only 4 byte large.
+		// offset references in v1 are only 4 bytes large.
 		// Once we move to compressed/varint representations in those areas, this limitation
 		// can be lifted.
 		if w.pos > math.MaxUint32 {
@ -186,6 +186,15 @@ func (w *indexWriter) write(bufs ...[]byte) error {
 	return nil
 }

+// addPadding adds zero byte padding until the file size is a multiple of n.
+func (w *indexWriter) addPadding(n int) error {
+	p := n - (int(w.pos) % n)
+	if p == 0 {
+		return nil
+	}
+	return errors.Wrap(w.write(make([]byte, p)), "add padding")
+}
+
 // ensureStage handles transitions between write stages and ensures that IndexWriter
 // methods are called in an order valid for the implementation.
 func (w *indexWriter) ensureStage(s indexWriterStage) error {
@ -353,6 +362,11 @@ func (w *indexWriter) WriteLabelIndex(names []string, values []string) error {
 	}
 	sort.Sort(valt)

+	// Align beginning to 4 bytes for more efficient index list scans.
+	if err := w.addPadding(4); err != nil {
+		return err
+	}
+
 	w.labelIndexes = append(w.labelIndexes, hashEntry{
 		keys:   names,
 		offset: w.pos,
@ -418,6 +432,11 @@ func (w *indexWriter) WritePostings(name, value string, it Postings) error {
 		return errors.Wrap(err, "ensure stage")
 	}

+	// Align beginning to 4 bytes for more efficient postings list scans.
+	if err := w.addPadding(4); err != nil {
+		return err
+	}
+
 	w.postings = append(w.postings, hashEntry{
 		keys:   []string{name, value},
 		offset: w.pos,
@ -816,7 +835,6 @@ type serializedStringTuples struct {
 }

 func (t *serializedStringTuples) Len() int {
-	// TODO(fabxc): Cache this?
 	return len(t.b) / (4 * t.l)
 }