From b8159563419c0e87b61f4d3f6482bf898d14dfa1 Mon Sep 17 00:00:00 2001
From: Dan Milstein <milstein.dan@gmail.com>
Date: Thu, 25 Aug 2016 11:16:11 -0400
Subject: [PATCH 1/4] Catch errors when unmarshalling delta/doubleDelta encoded
 chunks

This is (hopefully) a fix for #1653

Specifically, this makes it so that if the length for the stored
delta/doubleDelta is somehow corrupted to be too small, the attempt to
unmarshal will return an error.

The current (broken) behavior is to return a malformed chunk, which can
then lead to a panic when there is an attempt to read header values.

The referenced issue proposed creating chunks with a minimum length -- I
instead opted to just error on the attempt to unmarshal, since I'm not
clear on how it could be safe to proceed when the length is
incorrect/unknown.

The issue also talked about possibly "quarantining series", but I don't
know the surrounding code well enough to understand how to make that
happen.
---
 storage/local/delta.go       |   6 +++
 storage/local/delta_test.go  | 101 +++++++++++++++++++++++++++++++++++
 storage/local/doubledelta.go |   7 +++
 3 files changed, 114 insertions(+)
 create mode 100644 storage/local/delta_test.go

diff --git a/storage/local/delta.go b/storage/local/delta.go
index 0ab2f0d9da..2fb7d3d764 100644
--- a/storage/local/delta.go
+++ b/storage/local/delta.go
@@ -242,6 +242,9 @@ func (c *deltaEncodedChunk) unmarshal(r io.Reader) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
+	if int(l) < deltaHeaderBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, deltaHeaderBytes)
+	}
 	*c = (*c)[:l]
 	return nil
 }
@@ -254,6 +257,9 @@ func (c *deltaEncodedChunk) unmarshalFromBuf(buf []byte) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
+	if int(l) < deltaHeaderBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, deltaHeaderBytes)
+	}
 	*c = (*c)[:l]
 	return nil
 }
diff --git a/storage/local/delta_test.go b/storage/local/delta_test.go
new file mode 100644
index 0000000000..8ad9985dff
--- /dev/null
+++ b/storage/local/delta_test.go
@@ -0,0 +1,101 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Note: this file has tests for code in both delta.go and doubledelta.go --
+// it may make sense to split those out later, but given that the tests are
+// near-identical and share a helper, this feels simpler for now.
+
+package local
+
+import (
+	"bytes"
+	"encoding/binary"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/common/model"
+)
+
+func verifyUnmarshallingError(t *testing.T, err error, typ string, badLen int) {
+
+	if err == nil {
+		t.Errorf("Failed to obtain an error when unmarshalling %s with corrupt length of %d", typ, badLen)
+		return
+	}
+
+	expectedStr := "header size"
+	if !strings.Contains(err.Error(), expectedStr) {
+		t.Errorf(
+			"'%s' not present in error when unmarshalling %s with corrupt length %d: '%s'",
+			expectedStr,
+			typ,
+			badLen,
+			err.Error())
+	}
+}
+
+func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
+	dec := newDeltaEncodedChunk(d1, d4, false, chunkLen)
+
+	cs, err := dec.add(model.SamplePair{
+		Timestamp: model.Now(),
+		Value:     model.SampleValue(100),
+	})
+	if err != nil {
+		t.Fatalf("Couldn't add sample to empty deltaEncodedChunk: %s", err)
+	}
+
+	buf := make([]byte, chunkLen)
+
+	cs[0].marshalToBuf(buf)
+
+	// Corrupt the length to be every possible too-small value
+	for i := 0; i < deltaHeaderBytes; i++ {
+		binary.LittleEndian.PutUint16(buf[deltaHeaderBufLenOffset:], uint16(i))
+
+		err = cs[0].unmarshalFromBuf(buf)
+		verifyUnmarshallingError(t, err, "deltaEncodedChunk (from buf)", i)
+
+		err = cs[0].unmarshal(bytes.NewBuffer(buf))
+		verifyUnmarshallingError(t, err, "deltaEncodedChunk (from Reader)", i)
+	}
+}
+
+func TestUnmarshalingCorruptedDoubleDeltaReturnsAnError(t *testing.T) {
+	ddec := newDoubleDeltaEncodedChunk(d1, d4, false, chunkLen)
+
+	cs, err := ddec.add(model.SamplePair{
+		Timestamp: model.Now(),
+		Value:     model.SampleValue(100),
+	})
+	if err != nil {
+		t.Fatalf("Couldn't add sample to empty doubleDeltaEncodedChunk: %s", err)
+	}
+
+	buf := make([]byte, chunkLen)
+
+	cs[0].marshalToBuf(buf)
+
+	// Corrupt the length to be every possible too-small value
+	for i := 0; i < doubleDeltaHeaderBytes; i++ {
+
+		binary.LittleEndian.PutUint16(buf[doubleDeltaHeaderBufLenOffset:], uint16(i))
+
+		err = cs[0].unmarshalFromBuf(buf)
+		verifyUnmarshallingError(t, err, "doubleDeltaEncodedChunk (from buf)", i)
+
+		err = cs[0].unmarshal(bytes.NewBuffer(buf))
+		verifyUnmarshallingError(t, err, "doubleDeltaEncodedChunk (from Reader)", i)
+	}
+
+}
diff --git a/storage/local/doubledelta.go b/storage/local/doubledelta.go
index 7e7eed9e31..2922600dcd 100644
--- a/storage/local/doubledelta.go
+++ b/storage/local/doubledelta.go
@@ -250,6 +250,10 @@ func (c *doubleDeltaEncodedChunk) unmarshal(r io.Reader) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
+	if int(l) < doubleDeltaHeaderBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderBytes)
+	}
+
 	*c = (*c)[:l]
 	return nil
 }
@@ -262,6 +266,9 @@ func (c *doubleDeltaEncodedChunk) unmarshalFromBuf(buf []byte) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
+	if int(l) < doubleDeltaHeaderBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderBytes)
+	}
 	*c = (*c)[:l]
 	return nil
 }

From f50f656a667ccaacf1e2af1b532abc08a361a3af Mon Sep 17 00:00:00 2001
From: Dan Milstein <milstein.dan@gmail.com>
Date: Mon, 29 Aug 2016 11:22:12 -0400
Subject: [PATCH 2/4] Fix double-delta unmarshaling to respect actual min
 header size

Turns out its valid to have an overall chunk which is smaller than the
full doubleDeltaHeaderBytes size -- if it has a single sample, it
doesn't fill the whole header.  Updated unmarshalling check to respect
this.
---
 storage/local/delta_test.go  |  2 +-
 storage/local/doubledelta.go | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/storage/local/delta_test.go b/storage/local/delta_test.go
index 8ad9985dff..b5d2a2cf34 100644
--- a/storage/local/delta_test.go
+++ b/storage/local/delta_test.go
@@ -87,7 +87,7 @@ func TestUnmarshalingCorruptedDoubleDeltaReturnsAnError(t *testing.T) {
 	cs[0].marshalToBuf(buf)
 
 	// Corrupt the length to be every possible too-small value
-	for i := 0; i < doubleDeltaHeaderBytes; i++ {
+	for i := 0; i < doubleDeltaHeaderMinBytes; i++ {
 
 		binary.LittleEndian.PutUint16(buf[doubleDeltaHeaderBufLenOffset:], uint16(i))
 
diff --git a/storage/local/doubledelta.go b/storage/local/doubledelta.go
index 2922600dcd..41a1471d6c 100644
--- a/storage/local/doubledelta.go
+++ b/storage/local/doubledelta.go
@@ -33,7 +33,8 @@ import (
 // - base time delta:          8 bytes
 // - base value delta:         8 bytes
 const (
-	doubleDeltaHeaderBytes = 37
+	doubleDeltaHeaderBytes    = 37
+	doubleDeltaHeaderMinBytes = 21 // header isn't full for chunk w/ one sample
 
 	doubleDeltaHeaderBufLenOffset         = 0
 	doubleDeltaHeaderTimeBytesOffset      = 2
@@ -250,8 +251,8 @@ func (c *doubleDeltaEncodedChunk) unmarshal(r io.Reader) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
-	if int(l) < doubleDeltaHeaderBytes {
-		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderBytes)
+	if int(l) < doubleDeltaHeaderMinBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderMinBytes)
 	}
 
 	*c = (*c)[:l]
@@ -266,8 +267,8 @@ func (c *doubleDeltaEncodedChunk) unmarshalFromBuf(buf []byte) error {
 	if int(l) > cap(*c) {
 		return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
 	}
-	if int(l) < doubleDeltaHeaderBytes {
-		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderBytes)
+	if int(l) < doubleDeltaHeaderMinBytes {
+		return fmt.Errorf("chunk length less than header size: %d < %d", l, doubleDeltaHeaderMinBytes)
 	}
 	*c = (*c)[:l]
 	return nil

From ac8788aca61fb655447ac056b228bfd3e3fc7b23 Mon Sep 17 00:00:00 2001
From: Dan Milstein <milstein.dan@gmail.com>
Date: Mon, 29 Aug 2016 11:58:31 -0400
Subject: [PATCH 3/4] Convert to table-driven test and inline helper func

---
 storage/local/delta_test.go | 131 +++++++++++++++++++-----------------
 1 file changed, 68 insertions(+), 63 deletions(-)

diff --git a/storage/local/delta_test.go b/storage/local/delta_test.go
index b5d2a2cf34..bd395f5b05 100644
--- a/storage/local/delta_test.go
+++ b/storage/local/delta_test.go
@@ -26,76 +26,81 @@ import (
 	"github.com/prometheus/common/model"
 )
 
-func verifyUnmarshallingError(t *testing.T, err error, typ string, badLen int) {
-
-	if err == nil {
-		t.Errorf("Failed to obtain an error when unmarshalling %s with corrupt length of %d", typ, badLen)
-		return
-	}
-
-	expectedStr := "header size"
-	if !strings.Contains(err.Error(), expectedStr) {
-		t.Errorf(
-			"'%s' not present in error when unmarshalling %s with corrupt length %d: '%s'",
-			expectedStr,
-			typ,
-			badLen,
-			err.Error())
-	}
-}
-
 func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
-	dec := newDeltaEncodedChunk(d1, d4, false, chunkLen)
 
-	cs, err := dec.add(model.SamplePair{
-		Timestamp: model.Now(),
-		Value:     model.SampleValue(100),
-	})
-	if err != nil {
-		t.Fatalf("Couldn't add sample to empty deltaEncodedChunk: %s", err)
+	var verifyUnmarshallingError = func(
+		err error,
+		chunkTypeName string,
+		unmarshalMethod string,
+		badLen int) {
+
+		if err == nil {
+			t.Errorf("Failed to obtain an error when unmarshalling %s (from %s) with corrupt length of %d", chunkTypeName, unmarshalMethod, badLen)
+			return
+		}
+
+		expectedStr := "header size"
+		if !strings.Contains(err.Error(), expectedStr) {
+			t.Errorf(
+				"'%s' not present in error when unmarshalling %s (from %s) with corrupt length %d: '%s'",
+				expectedStr,
+				chunkTypeName,
+				unmarshalMethod,
+				badLen,
+				err.Error())
+		}
 	}
 
-	buf := make([]byte, chunkLen)
+	cases := []struct {
+		chunkTypeName    string
+		chunkConstructor func(deltaBytes, deltaBytes, bool, int) chunk
+		minHeaderLen     int
+		chunkLenPos      int
+	}{
+		{
+			"deltaEncodedChunk",
+			// Hrm. Compiler didn't like a function which returned
+			// *deltaEncodedChunk for the chunkConstructor field. This is ugly,
+			// but I dunno how to make the compiler happy here.
+			func(a, b deltaBytes, c bool, d int) chunk {
+				return newDeltaEncodedChunk(a, b, c, d)
+			},
+			deltaHeaderBytes,
+			deltaHeaderBufLenOffset,
+		},
+		{
+			"doubleDeltaEncodedChunk",
+			func(a, b deltaBytes, c bool, d int) chunk {
+				return newDoubleDeltaEncodedChunk(a, b, c, d)
+			},
+			doubleDeltaHeaderMinBytes,
+			doubleDeltaHeaderBufLenOffset,
+		},
+	}
+	for _, c := range cases {
+		chunk := c.chunkConstructor(d1, d4, false, chunkLen)
 
-	cs[0].marshalToBuf(buf)
+		cs, err := chunk.add(model.SamplePair{
+			Timestamp: model.Now(),
+			Value:     model.SampleValue(100),
+		})
+		if err != nil {
+			t.Fatalf("Couldn't add sample to empty %s: %s", c.chunkTypeName, err)
+		}
 
-	// Corrupt the length to be every possible too-small value
-	for i := 0; i < deltaHeaderBytes; i++ {
-		binary.LittleEndian.PutUint16(buf[deltaHeaderBufLenOffset:], uint16(i))
+		buf := make([]byte, chunkLen)
 
-		err = cs[0].unmarshalFromBuf(buf)
-		verifyUnmarshallingError(t, err, "deltaEncodedChunk (from buf)", i)
+		cs[0].marshalToBuf(buf)
 
-		err = cs[0].unmarshal(bytes.NewBuffer(buf))
-		verifyUnmarshallingError(t, err, "deltaEncodedChunk (from Reader)", i)
+		// Corrupt the length to be every possible too-small value
+		for i := 0; i < c.minHeaderLen; i++ {
+			binary.LittleEndian.PutUint16(buf[c.chunkLenPos:], uint16(i))
+
+			err = cs[0].unmarshalFromBuf(buf)
+			verifyUnmarshallingError(err, c.chunkTypeName, "buf", i)
+
+			err = cs[0].unmarshal(bytes.NewBuffer(buf))
+			verifyUnmarshallingError(err, c.chunkTypeName, "Reader", i)
+		}
 	}
 }
-
-func TestUnmarshalingCorruptedDoubleDeltaReturnsAnError(t *testing.T) {
-	ddec := newDoubleDeltaEncodedChunk(d1, d4, false, chunkLen)
-
-	cs, err := ddec.add(model.SamplePair{
-		Timestamp: model.Now(),
-		Value:     model.SampleValue(100),
-	})
-	if err != nil {
-		t.Fatalf("Couldn't add sample to empty doubleDeltaEncodedChunk: %s", err)
-	}
-
-	buf := make([]byte, chunkLen)
-
-	cs[0].marshalToBuf(buf)
-
-	// Corrupt the length to be every possible too-small value
-	for i := 0; i < doubleDeltaHeaderMinBytes; i++ {
-
-		binary.LittleEndian.PutUint16(buf[doubleDeltaHeaderBufLenOffset:], uint16(i))
-
-		err = cs[0].unmarshalFromBuf(buf)
-		verifyUnmarshallingError(t, err, "doubleDeltaEncodedChunk (from buf)", i)
-
-		err = cs[0].unmarshal(bytes.NewBuffer(buf))
-		verifyUnmarshallingError(t, err, "doubleDeltaEncodedChunk (from Reader)", i)
-	}
-
-}

From ec064c96f682fb2335e20d56ecb8a3f7da2419f5 Mon Sep 17 00:00:00 2001
From: Dan Milstein <milstein.dan@gmail.com>
Date: Tue, 30 Aug 2016 07:54:40 -0400
Subject: [PATCH 4/4] Add field names to table-driven test fixtures

---
 storage/local/delta_test.go | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/storage/local/delta_test.go b/storage/local/delta_test.go
index bd395f5b05..7ad083d62a 100644
--- a/storage/local/delta_test.go
+++ b/storage/local/delta_test.go
@@ -58,23 +58,20 @@ func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
 		chunkLenPos      int
 	}{
 		{
-			"deltaEncodedChunk",
-			// Hrm. Compiler didn't like a function which returned
-			// *deltaEncodedChunk for the chunkConstructor field. This is ugly,
-			// but I dunno how to make the compiler happy here.
-			func(a, b deltaBytes, c bool, d int) chunk {
+			chunkTypeName: "deltaEncodedChunk",
+			chunkConstructor: func(a, b deltaBytes, c bool, d int) chunk {
 				return newDeltaEncodedChunk(a, b, c, d)
 			},
-			deltaHeaderBytes,
-			deltaHeaderBufLenOffset,
+			minHeaderLen: deltaHeaderBytes,
+			chunkLenPos:  deltaHeaderBufLenOffset,
 		},
 		{
-			"doubleDeltaEncodedChunk",
-			func(a, b deltaBytes, c bool, d int) chunk {
+			chunkTypeName: "doubleDeltaEncodedChunk",
+			chunkConstructor: func(a, b deltaBytes, c bool, d int) chunk {
 				return newDoubleDeltaEncodedChunk(a, b, c, d)
 			},
-			doubleDeltaHeaderMinBytes,
-			doubleDeltaHeaderBufLenOffset,
+			minHeaderLen: doubleDeltaHeaderMinBytes,
+			chunkLenPos:  doubleDeltaHeaderBufLenOffset,
 		},
 	}
 	for _, c := range cases {