From 2032a11d981212f6f2b54cab608b0396237ec52b Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Fri, 28 Apr 2017 14:28:25 +0200 Subject: [PATCH] Add padding between fixed-sized index sections --- Documentation/format/chunks.md | 14 +++++++------- Documentation/format/index.md | 11 ++++++----- index.go | 22 ++++++++++++++++++++-- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Documentation/format/chunks.md b/Documentation/format/chunks.md index c8acf6f28..51ac1242c 100644 --- a/Documentation/format/chunks.md +++ b/Documentation/format/chunks.md @@ -5,11 +5,11 @@ The following describes the format of a single chunks file, which is created in Chunks in the files are referenced from the index by the in-file offset in the 4 LSB and the segment sequence number in the bigher 4 MSBs. ``` -┌────────────────────────────────────────┬─────────────────────┐ -│ magic(0x85BD40DD) <4 byte> │ version(1) <1 byte> │ -├────────────────────────────────────────┴─────────────────────┤ -│ ┌──────────────┬───────────────────┬──────┬────────────────┐ │ -│ │ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ │ -│ └──────────────┴───────────────────┴──────┴────────────────┘ │ -└──────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────┬──────────────────────┐ +│ magic(0x85BD40DD) <4 byte> │ version(1) <1 byte> │ +├────────────────────────────────────────┴──────────────────────┤ +│ ┌───────────────┬───────────────────┬──────┬────────────────┐ │ +│ │ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ │ +│ └───────────────┴───────────────────┴──────┴────────────────┘ │ +└───────────────────────────────────────────────────────────────┘ ``` diff --git a/Documentation/format/index.md b/Documentation/format/index.md index 2dc1be6ba..848187a0b 100644 --- a/Documentation/format/index.md +++ b/Documentation/format/index.md @@ -5,7 +5,7 @@ It is terminated by a table of contents which serves as an entry point into the ``` ┌────────────────────────────┬─────────────────────┐ -│ magic(0xBAAAD700) <4b> │ version(1) <1 byte> │ +│ magic(0xBAAAD700) <4b> │ version(1) <1 byte> │ ├────────────────────────────┴─────────────────────┤ │ ┌──────────────────────────────────────────────┐ │ │ │ Symbol Table │ │ @@ -33,6 +33,10 @@ It is terminated by a table of contents which serves as an entry point into the └──────────────────────────────────────────────────┘ ``` +When the index is written, an arbitrary number of padding bytes may be added between the lined out main sections above. When sequentially scanning through the file, any zero bytes after a section's specified length must be skipped. + +Most of the sections described below start with a `len` field. It always specifies the number of bytes after them up until the trailing CRC32 checksum. The checksum is always calculated over those `len` bytes. + ### Symbol Table @@ -41,8 +45,6 @@ The symbol table holds a sorted list of deduplicated strings that occurred in la The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order. -The full list of strings is validated with a CRC32 checksum. - ``` ┌────────────────────┬─────────────────────┐ │ len <4b> │ #symbols <4b> │ @@ -81,11 +83,10 @@ The file offset to the beginning of a series serves as the series' ID in all sub Every series entry first holds its number of labels, followed by tuples of symbol table references that resemble label name and value. The label pairs are lexicographically sorted. After the labels, the number of indexed chunks is encoded, followed by a sequence of metadata entries containing the chunks minimum and maximum timestamp and a reference to its position in the chunk file. Holding the time range data in the index allows dropping chunks irrelevant to queried time ranges without accessing them directly. -The series entry is prefixed with its length and terminated by a CRC32 checksum over its contents. ``` ┌─────────────────────────────────────────────────────────┐ -│ len │ +│ len │ ├─────────────────────────────────────────────────────────┤ │ ┌──────────────────┬──────────────────────────────────┐ │ │ │ │ ┌──────────────────────────┐ │ │ diff --git a/index.go b/index.go index 3f1682539..c0e96381f 100644 --- a/index.go +++ b/index.go @@ -176,7 +176,7 @@ func (w *indexWriter) write(bufs ...[]byte) error { return err } // For now the index file must not grow beyond 4GiB. Some of the fixed-sized - // offset references in v1 are only 4 byte large. + // offset references in v1 are only 4 bytes large. // Once we move to compressed/varint representations in those areas, this limitation // can be lifted. if w.pos > math.MaxUint32 { @@ -186,6 +186,15 @@ func (w *indexWriter) write(bufs ...[]byte) error { return nil } +// addPadding adds zero byte padding until the file size is a multiple of n. +func (w *indexWriter) addPadding(n int) error { + p := n - (int(w.pos) % n) + if p == 0 { + return nil + } + return errors.Wrap(w.write(make([]byte, p)), "add padding") +} + // ensureStage handles transitions between write stages and ensures that IndexWriter // methods are called in an order valid for the implementation. func (w *indexWriter) ensureStage(s indexWriterStage) error { @@ -353,6 +362,11 @@ func (w *indexWriter) WriteLabelIndex(names []string, values []string) error { } sort.Sort(valt) + // Align beginning to 4 bytes for more efficient index list scans. + if err := w.addPadding(4); err != nil { + return err + } + w.labelIndexes = append(w.labelIndexes, hashEntry{ keys: names, offset: w.pos, @@ -418,6 +432,11 @@ func (w *indexWriter) WritePostings(name, value string, it Postings) error { return errors.Wrap(err, "ensure stage") } + // Align beginning to 4 bytes for more efficient postings list scans. + if err := w.addPadding(4); err != nil { + return err + } + w.postings = append(w.postings, hashEntry{ keys: []string{name, value}, offset: w.pos, @@ -816,7 +835,6 @@ type serializedStringTuples struct { } func (t *serializedStringTuples) Len() int { - // TODO(fabxc): Cache this? return len(t.b) / (4 * t.l) }