chunks: don't mutate in bstream iterator

This replaces mutation of underlying bytes in the iterated slice with a shift counter, which is used when reading the head byte. This is avoids having to copy the entire slice for every new iterator.
2025-03-05 20:59:13 -08:00 · 2016-11-30 19:08:28 +01:00 · 2016-11-30 19:08:28 +01:00 · c9ee572709
parent f392c01c12
commit c9ee572709
2 changed files with 55 additions and 64 deletions
--- a/chunks/bstream.go
+++ b/chunks/bstream.go
@ -1,18 +1,14 @@
 package chunks

-import (
-	"bytes"
-	"encoding/binary"
-	"io"
-)
+import "io"

 // bstream is a stream of bits
 type bstream struct {
 	// the data stream
 	stream []byte

-	// how many bits are valid in current byte
-	count uint8
+	count uint8 // how many bits are valid in current byte
+	shift uint8 // pos of next bit in current byte
 }

 func newBReader(b []byte) *bstream {
@ -41,7 +37,6 @@ const (
 )

 func (b *bstream) writeBit(bit bit) {
-
 	if b.count == 0 {
 		b.stream = append(b.stream, 0)
 		b.count = 8
@ -57,7 +52,6 @@ func (b *bstream) writeBit(bit bit) {
 }

 func (b *bstream) writeByte(byt byte) {
-
 	if b.count == 0 {
 		b.stream = append(b.stream, 0)
 		b.count = 8
@ -89,14 +83,22 @@ func (b *bstream) writeBits(u uint64, nbits int) {
 	}
 }

-func (b *bstream) readBit() (bit, error) {
+func (b *bstream) headByte() byte {
+	return b.stream[0] << b.shift
+}

+func (b *bstream) advance() {
+	b.stream = b.stream[1:]
+	b.shift = 0
+}
+
+func (b *bstream) readBit() (bit, error) {
 	if len(b.stream) == 0 {
 		return false, io.EOF
 	}

 	if b.count == 0 {
-		b.stream = b.stream[1:]
+		b.advance()
 		// did we just run out of stuff to read?
 		if len(b.stream) == 0 {
 			return false, io.EOF
@ -104,20 +106,19 @@ func (b *bstream) readBit() (bit, error) {
 		b.count = 8
 	}

+	d := b.headByte() & 0x80
 	b.count--
-	d := b.stream[0] & 0x80
-	b.stream[0] <<= 1
+	b.shift++
 	return d != 0, nil
 }

 func (b *bstream) readByte() (byte, error) {
-
 	if len(b.stream) == 0 {
 		return 0, io.EOF
 	}

 	if b.count == 0 {
-		b.stream = b.stream[1:]
+		b.advance()

 		if len(b.stream) == 0 {
 			return 0, io.EOF
@ -128,18 +129,19 @@ func (b *bstream) readByte() (byte, error) {

 	if b.count == 8 {
 		b.count = 0
-		return b.stream[0], nil
+		return b.headByte(), nil
 	}

-	byt := b.stream[0]
-	b.stream = b.stream[1:]
+	byt := b.headByte()
+	b.advance()

 	if len(b.stream) == 0 {
 		return 0, io.EOF
 	}

+	// We just advanced the stream and can assume the shift to be 0.
 	byt |= b.stream[0] >> b.count
-	b.stream[0] <<= (8 - b.count)
+	b.shift = 8 - b.count

 	return byt, nil
 }
@ -162,9 +164,9 @@ func (b *bstream) readBits(nbits int) (uint64, error) {
 	}

 	if nbits > int(b.count) {
-		u = (u << uint(b.count)) | uint64(b.stream[0]>>(8-b.count))
+		u = (u << uint(b.count)) | uint64(b.headByte()>>(8-b.count))
 		nbits -= int(b.count)
-		b.stream = b.stream[1:]
+		b.advance()

 		if len(b.stream) == 0 {
 			return 0, io.EOF
@ -172,37 +174,8 @@ func (b *bstream) readBits(nbits int) (uint64, error) {
 		b.count = 8
 	}

-	u = (u << uint(nbits)) | uint64(b.stream[0]>>(8-uint(nbits)))
-	b.stream[0] <<= uint(nbits)
+	u = (u << uint(nbits)) | uint64(b.headByte()>>(8-uint(nbits)))
+	b.shift = b.shift + uint8(nbits)
 	b.count -= uint8(nbits)
 	return u, nil
 }
-
-// MarshalBinary implements the encoding.BinaryMarshaler interface
-func (b *bstream) MarshalBinary() ([]byte, error) {
-	buf := new(bytes.Buffer)
-	err := binary.Write(buf, binary.BigEndian, b.count)
-	if err != nil {
-		return nil, err
-	}
-	err = binary.Write(buf, binary.BigEndian, b.stream)
-	if err != nil {
-		return nil, err
-	}
-	return buf.Bytes(), nil
-}
-
-// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface
-func (b *bstream) UnmarshalBinary(bIn []byte) error {
-	buf := bytes.NewReader(bIn)
-	err := binary.Read(buf, binary.BigEndian, &b.count)
-	if err != nil {
-		return err
-	}
-	b.stream = make([]byte, buf.Len())
-	err = binary.Read(buf, binary.BigEndian, &b.stream)
-	if err != nil {
-		return err
-	}
-	return nil
-}
--- a/chunks/chunk_test.go
+++ b/chunks/chunk_test.go
@ -20,18 +20,20 @@ func TestChunk(t *testing.T) {
 		EncXOR: func(sz int) Chunk { return NewXORChunk(sz) },
 	} {
 		t.Run(fmt.Sprintf("%s", enc), func(t *testing.T) {
-			for range make([]struct{}, 10000) {
-				c := nc(rand.Intn(512))
-				testChunk(t, c)
+			for range make([]struct{}, 3000) {
+				c := nc(rand.Intn(1024))
+				if err := testChunk(c); err != nil {
+					t.Fatal(err)
+				}
 			}
 		})
 	}
 }

-func testChunk(t *testing.T, c Chunk) {
+func testChunk(c Chunk) error {
 	app, err := c.Appender()
 	if err != nil {
-		t.Fatal(err)
+		return err
 	}

 	var exp []pair
@ -39,18 +41,31 @@ func testChunk(t *testing.T, c Chunk) {
 		ts = int64(1234123324)
 		v  = 1243535.123
 	)
-	for {
+	i := 0
+	for i < 3 {
 		ts += int64(rand.Intn(10000) + 1)
 		v = rand.Float64()
+		// v += float64(100)

-		err := app.Append(ts, v)
+		// Start with a new appender every 10th sample. This emulates starting
+		// appending to a partially filled chunk.
+		if i%10 == 0 {
+			app, err = c.Appender()
+			if err != nil {
+				return err
+			}
+		}
+
+		err = app.Append(ts, v)
 		if err != nil {
 			if err == ErrChunkFull {
 				break
 			}
-			t.Fatal(err)
+			return err
 		}
 		exp = append(exp, pair{t: ts, v: v})
+		i++
+		// fmt.Println("appended", len(c.Bytes()), c.Bytes())
 	}

 	it := c.Iterator()
@ -60,11 +75,12 @@ func testChunk(t *testing.T, c Chunk) {
 		res = append(res, pair{t: ts, v: v})
 	}
 	if it.Err() != nil {
-		t.Fatal(it.Err())
+		return it.Err()
 	}
 	if !reflect.DeepEqual(exp, res) {
-		t.Fatalf("unexpected result\n\ngot: %v\n\nexp: %v", res, exp)
+		return fmt.Errorf("unexpected result\n\ngot: %v\n\nexp: %v", res, exp)
 	}
+	return nil
 }

 func benchmarkIterator(b *testing.B, newChunk func(int) Chunk) {
@ -75,7 +91,8 @@ func benchmarkIterator(b *testing.B, newChunk func(int) Chunk) {
 	var exp []pair
 	for i := 0; i < b.N; i++ {
 		t += int64(rand.Intn(10000) + 1)
-		v = rand.Float64()
+		// v = rand.Float64()
+		v += float64(100)
 		exp = append(exp, pair{t: t, v: v})
 	}

@ -140,7 +157,8 @@ func benchmarkAppender(b *testing.B, newChunk func(int) Chunk) {
 	var exp []pair
 	for i := 0; i < b.N; i++ {
 		t += int64(rand.Intn(10000) + 1)
-		v = rand.Float64()
+		// v = rand.Float64()
+		v += float64(100)
 		exp = append(exp, pair{t: t, v: v})
 	}