2015-01-21 11:07:45 -08:00
|
|
|
// Copyright 2014 The Prometheus Authors
|
2014-09-19 09:18:44 -07:00
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
package local
|
2014-06-06 02:55:53 -07:00
|
|
|
|
|
|
|
import (
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
"bufio"
|
|
|
|
"errors"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2014-09-23 10:21:10 -07:00
|
|
|
"reflect"
|
2015-04-14 01:43:09 -07:00
|
|
|
"sync"
|
2014-06-06 02:55:53 -07:00
|
|
|
"testing"
|
2015-07-13 12:12:27 -07:00
|
|
|
"time"
|
2014-06-06 02:55:53 -07:00
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
"github.com/prometheus/common/model"
|
2014-06-06 02:55:53 -07:00
|
|
|
|
2014-09-23 10:21:10 -07:00
|
|
|
"github.com/prometheus/prometheus/storage/local/codable"
|
2014-09-14 06:33:56 -07:00
|
|
|
"github.com/prometheus/prometheus/storage/local/index"
|
2015-05-29 04:30:30 -07:00
|
|
|
"github.com/prometheus/prometheus/util/testutil"
|
2014-06-06 02:55:53 -07:00
|
|
|
)
|
|
|
|
|
2014-10-28 11:01:41 -07:00
|
|
|
var (
|
2015-08-20 08:18:46 -07:00
|
|
|
m1 = model.Metric{"label": "value1"}
|
|
|
|
m2 = model.Metric{"label": "value2"}
|
|
|
|
m3 = model.Metric{"label": "value3"}
|
|
|
|
m4 = model.Metric{"label": "value4"}
|
|
|
|
m5 = model.Metric{"label": "value5"}
|
2014-10-28 11:01:41 -07:00
|
|
|
)
|
|
|
|
|
2015-05-28 11:58:38 -07:00
|
|
|
func newTestPersistence(t *testing.T, encoding chunkEncoding) (*persistence, testutil.Closer) {
|
2015-06-15 03:49:28 -07:00
|
|
|
DefaultChunkEncoding = encoding
|
2015-05-28 11:58:38 -07:00
|
|
|
dir := testutil.NewTemporaryDirectory("test_persistence", t)
|
2016-01-11 07:42:10 -08:00
|
|
|
p, err := newPersistence(dir.Path(), false, false, func() bool { return false }, 0.1)
|
2014-06-06 02:55:53 -07:00
|
|
|
if err != nil {
|
2014-08-13 08:13:28 -07:00
|
|
|
dir.Close()
|
2014-06-06 02:55:53 -07:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-05-18 10:26:28 -07:00
|
|
|
go p.run()
|
2015-05-28 11:58:38 -07:00
|
|
|
return p, testutil.NewCallbackCloser(func() {
|
2014-10-07 10:11:24 -07:00
|
|
|
p.close()
|
2014-08-21 13:06:11 -07:00
|
|
|
dir.Close()
|
|
|
|
})
|
2014-08-12 08:46:46 -07:00
|
|
|
}
|
2014-08-13 08:13:28 -07:00
|
|
|
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func buildTestChunks(t *testing.T, encoding chunkEncoding) map[model.Fingerprint][]chunk {
|
2015-08-20 08:18:46 -07:00
|
|
|
fps := model.Fingerprints{
|
2015-05-05 09:17:51 -07:00
|
|
|
m1.FastFingerprint(),
|
|
|
|
m2.FastFingerprint(),
|
|
|
|
m3.FastFingerprint(),
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
2015-08-20 08:18:46 -07:00
|
|
|
fpToChunks := map[model.Fingerprint][]chunk{}
|
2014-08-13 08:13:28 -07:00
|
|
|
|
|
|
|
for _, fp := range fps {
|
2014-10-15 06:53:05 -07:00
|
|
|
fpToChunks[fp] = make([]chunk, 0, 10)
|
2014-08-13 08:13:28 -07:00
|
|
|
for i := 0; i < 10; i++ {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
ch, err := newChunkForEncoding(encoding)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
chs, err := ch.add(model.SamplePair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Timestamp: model.Time(i),
|
|
|
|
Value: model.SampleValue(fp),
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
fpToChunks[fp] = append(fpToChunks[fp], chs[0])
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return fpToChunks
|
|
|
|
}
|
|
|
|
|
|
|
|
func chunksEqual(c1, c2 chunk) bool {
|
2016-03-07 10:50:13 -08:00
|
|
|
it1 := c1.newIterator()
|
|
|
|
it2 := c2.newIterator()
|
|
|
|
for it1.scan() && it2.scan() {
|
|
|
|
if !(it1.value() == it2.value()) {
|
2014-08-13 08:13:28 -07:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
2016-03-07 10:50:13 -08:00
|
|
|
return it1.err() == nil && it2.err() == nil
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
func testPersistLoadDropChunks(t *testing.T, encoding chunkEncoding) {
|
|
|
|
p, closer := newTestPersistence(t, encoding)
|
2014-08-13 08:13:28 -07:00
|
|
|
defer closer.Close()
|
|
|
|
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
fpToChunks := buildTestChunks(t, encoding)
|
2014-08-13 08:13:28 -07:00
|
|
|
|
|
|
|
for fp, chunks := range fpToChunks {
|
2015-03-08 18:33:10 -07:00
|
|
|
firstTimeNotDropped, offset, numDropped, allDropped, err :=
|
2015-08-20 08:18:46 -07:00
|
|
|
p.dropAndPersistChunks(fp, model.Earliest, chunks)
|
2015-03-08 18:33:10 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-08-20 08:18:46 -07:00
|
|
|
if got, want := firstTimeNotDropped, model.Time(0); got != want {
|
2015-03-08 18:33:10 -07:00
|
|
|
t.Errorf("Want firstTimeNotDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := offset, 0; got != want {
|
|
|
|
t.Errorf("Want offset %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := numDropped, 0; got != want {
|
|
|
|
t.Errorf("Want numDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("All dropped.")
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for fp, expectedChunks := range fpToChunks {
|
|
|
|
indexes := make([]int, 0, len(expectedChunks))
|
2014-08-19 09:14:44 -07:00
|
|
|
for i := range expectedChunks {
|
2014-08-13 08:13:28 -07:00
|
|
|
indexes = append(indexes, i)
|
|
|
|
}
|
2014-10-27 12:40:48 -07:00
|
|
|
actualChunks, err := p.loadChunks(fp, indexes, 0)
|
2014-08-13 08:13:28 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
for _, i := range indexes {
|
|
|
|
if !chunksEqual(expectedChunks[i], actualChunks[i]) {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Errorf("%d. Chunks not equal.", i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Load all chunk descs.
|
2015-07-06 16:10:14 -07:00
|
|
|
actualChunkDescs, err := p.loadChunkDescs(fp, 0)
|
2014-10-28 11:01:41 -07:00
|
|
|
if len(actualChunkDescs) != 10 {
|
|
|
|
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10)
|
|
|
|
}
|
|
|
|
for i, cd := range actualChunkDescs {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
lastTime, err := cd.lastTime()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if cd.firstTime() != model.Time(i) || lastTime != model.Time(i) {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Errorf(
|
|
|
|
"Want ts=%v, got firstTime=%v, lastTime=%v.",
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
i, cd.firstTime(), lastTime,
|
2014-10-28 11:01:41 -07:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
// Load chunk descs partially.
|
|
|
|
actualChunkDescs, err = p.loadChunkDescs(fp, 5)
|
|
|
|
if len(actualChunkDescs) != 5 {
|
|
|
|
t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 5)
|
|
|
|
}
|
|
|
|
for i, cd := range actualChunkDescs {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
lastTime, err := cd.lastTime()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if cd.firstTime() != model.Time(i) || lastTime != model.Time(i) {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Errorf(
|
|
|
|
"Want ts=%v, got firstTime=%v, lastTime=%v.",
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
i, cd.firstTime(), lastTime,
|
2014-10-28 11:01:41 -07:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop half of the chunks.
|
|
|
|
for fp, expectedChunks := range fpToChunks {
|
2015-03-08 18:33:10 -07:00
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 5, nil)
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
if offset != 5 {
|
|
|
|
t.Errorf("want offset 5, got %d", offset)
|
|
|
|
}
|
2014-11-10 09:22:08 -08:00
|
|
|
if firstTime != 5 {
|
|
|
|
t.Errorf("want first time 5, got %d", firstTime)
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
if numDropped != 5 {
|
|
|
|
t.Errorf("want 5 dropped chunks, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
indexes := make([]int, 5)
|
|
|
|
for i := range indexes {
|
|
|
|
indexes[i] = i
|
|
|
|
}
|
|
|
|
actualChunks, err := p.loadChunks(fp, indexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
for _, i := range indexes {
|
|
|
|
if !chunksEqual(expectedChunks[i+5], actualChunks[i]) {
|
|
|
|
t.Errorf("%d. Chunks not equal.", i)
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
// Drop all the chunks.
|
|
|
|
for fp := range fpToChunks {
|
2015-03-08 18:33:10 -07:00
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 100, nil)
|
2014-11-10 09:22:08 -08:00
|
|
|
if firstTime != 0 {
|
|
|
|
t.Errorf("want first time 0, got %d", firstTime)
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
if offset != 0 {
|
|
|
|
t.Errorf("want offset 0, got %d", offset)
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
if numDropped != 5 {
|
|
|
|
t.Errorf("want 5 dropped chunks, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if !allDropped {
|
|
|
|
t.Error("not all chunks dropped")
|
|
|
|
}
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
// Re-add first two of the chunks.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTimeNotDropped, offset, numDropped, allDropped, err :=
|
2015-08-20 08:18:46 -07:00
|
|
|
p.dropAndPersistChunks(fp, model.Earliest, chunks[:2])
|
2015-03-08 18:33:10 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-08-20 08:18:46 -07:00
|
|
|
if got, want := firstTimeNotDropped, model.Time(0); got != want {
|
2015-03-08 18:33:10 -07:00
|
|
|
t.Errorf("Want firstTimeNotDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := offset, 0; got != want {
|
|
|
|
t.Errorf("Want offset %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := numDropped, 0; got != want {
|
|
|
|
t.Errorf("Want numDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("All dropped.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop the first of the chunks while adding two more.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 1, chunks[2:4])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 1 {
|
|
|
|
t.Errorf("want offset 1, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 1 {
|
|
|
|
t.Errorf("want first time 1, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 1 {
|
|
|
|
t.Errorf("want 1 dropped chunk, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
wantChunks := chunks[1:4]
|
|
|
|
indexes := make([]int, len(wantChunks))
|
|
|
|
for i := range indexes {
|
|
|
|
indexes[i] = i
|
|
|
|
}
|
|
|
|
gotChunks, err := p.loadChunks(fp, indexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
for i, wantChunk := range wantChunks {
|
|
|
|
if !chunksEqual(wantChunk, gotChunks[i]) {
|
|
|
|
t.Errorf("%d. Chunks not equal.", i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop all the chunks while adding two more.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 4, chunks[4:6])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 0 {
|
|
|
|
t.Errorf("want offset 0, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 4 {
|
|
|
|
t.Errorf("want first time 4, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 3 {
|
|
|
|
t.Errorf("want 3 dropped chunks, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
wantChunks := chunks[4:6]
|
|
|
|
indexes := make([]int, len(wantChunks))
|
|
|
|
for i := range indexes {
|
|
|
|
indexes[i] = i
|
|
|
|
}
|
|
|
|
gotChunks, err := p.loadChunks(fp, indexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
for i, wantChunk := range wantChunks {
|
|
|
|
if !chunksEqual(wantChunk, gotChunks[i]) {
|
|
|
|
t.Errorf("%d. Chunks not equal.", i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// While adding two more, drop all but one of the added ones.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 7, chunks[6:8])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 0 {
|
|
|
|
t.Errorf("want offset 0, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 7 {
|
|
|
|
t.Errorf("want first time 7, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 3 {
|
|
|
|
t.Errorf("want 3 dropped chunks, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
wantChunks := chunks[7:8]
|
|
|
|
indexes := make([]int, len(wantChunks))
|
|
|
|
for i := range indexes {
|
|
|
|
indexes[i] = i
|
|
|
|
}
|
|
|
|
gotChunks, err := p.loadChunks(fp, indexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
for i, wantChunk := range wantChunks {
|
|
|
|
if !chunksEqual(wantChunk, gotChunks[i]) {
|
|
|
|
t.Errorf("%d. Chunks not equal.", i)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// While adding two more, drop all chunks including the added ones.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 10, chunks[8:])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 0 {
|
|
|
|
t.Errorf("want offset 0, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 0 {
|
|
|
|
t.Errorf("want first time 0, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 3 {
|
|
|
|
t.Errorf("want 3 dropped chunks, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if !allDropped {
|
|
|
|
t.Error("not all chunks dropped")
|
|
|
|
}
|
|
|
|
}
|
2016-01-11 07:42:10 -08:00
|
|
|
// Now set minShrinkRatio to 0.25 and play with it.
|
|
|
|
p.minShrinkRatio = 0.25
|
|
|
|
// Re-add 8 chunks.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTimeNotDropped, offset, numDropped, allDropped, err :=
|
|
|
|
p.dropAndPersistChunks(fp, model.Earliest, chunks[:8])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if got, want := firstTimeNotDropped, model.Time(0); got != want {
|
|
|
|
t.Errorf("Want firstTimeNotDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := offset, 0; got != want {
|
|
|
|
t.Errorf("Want offset %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if got, want := numDropped, 0; got != want {
|
|
|
|
t.Errorf("Want numDropped %v, got %v.", got, want)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("All dropped.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop only the first chunk should not happen, but persistence should still work.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 1, chunks[8:9])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 8 {
|
|
|
|
t.Errorf("want offset 8, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 0 {
|
|
|
|
t.Errorf("want first time 0, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 0 {
|
|
|
|
t.Errorf("want 0 dropped chunk, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop only the first two chunks should not happen, either.
|
|
|
|
for fp := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 2, nil)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 0 {
|
|
|
|
t.Errorf("want offset 0, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 0 {
|
|
|
|
t.Errorf("want first time 0, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 0 {
|
|
|
|
t.Errorf("want 0 dropped chunk, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Drop the first three chunks should finally work.
|
|
|
|
for fp, chunks := range fpToChunks {
|
|
|
|
firstTime, offset, numDropped, allDropped, err := p.dropAndPersistChunks(fp, 3, chunks[9:])
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if offset != 6 {
|
|
|
|
t.Errorf("want offset 6, got %d", offset)
|
|
|
|
}
|
|
|
|
if firstTime != 3 {
|
|
|
|
t.Errorf("want first time 3, got %d", firstTime)
|
|
|
|
}
|
|
|
|
if numDropped != 3 {
|
|
|
|
t.Errorf("want 3 dropped chunk, got %v", numDropped)
|
|
|
|
}
|
|
|
|
if allDropped {
|
|
|
|
t.Error("all chunks dropped")
|
|
|
|
}
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
func TestPersistLoadDropChunksType0(t *testing.T) {
|
|
|
|
testPersistLoadDropChunks(t, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestPersistLoadDropChunksType1(t *testing.T) {
|
|
|
|
testPersistLoadDropChunks(t, 1)
|
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunkEncoding) {
|
|
|
|
p, closer := newTestPersistence(t, encoding)
|
2014-10-28 11:01:41 -07:00
|
|
|
defer closer.Close()
|
|
|
|
|
|
|
|
fpLocker := newFingerprintLocker(10)
|
|
|
|
sm := newSeriesMap()
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
s1, _ := newMemorySeries(m1, nil, time.Time{})
|
|
|
|
s2, _ := newMemorySeries(m2, nil, time.Time{})
|
|
|
|
s3, _ := newMemorySeries(m3, nil, time.Time{})
|
|
|
|
s4, _ := newMemorySeries(m4, nil, time.Time{})
|
|
|
|
s5, _ := newMemorySeries(m5, nil, time.Time{})
|
|
|
|
s1.add(model.SamplePair{Timestamp: 1, Value: 3.14})
|
|
|
|
s3.add(model.SamplePair{Timestamp: 2, Value: 2.7})
|
2015-03-08 18:33:10 -07:00
|
|
|
s3.headChunkClosed = true
|
|
|
|
s3.persistWatermark = 1
|
|
|
|
for i := 0; i < 10000; i++ {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
s4.add(model.SamplePair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Timestamp: model.Time(i),
|
|
|
|
Value: model.SampleValue(i) / 2,
|
2015-03-08 18:33:10 -07:00
|
|
|
})
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
s5.add(model.SamplePair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Timestamp: model.Time(i),
|
|
|
|
Value: model.SampleValue(i * i),
|
2015-03-08 18:33:10 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
s5.persistWatermark = 3
|
|
|
|
chunkCountS4 := len(s4.chunkDescs)
|
|
|
|
chunkCountS5 := len(s5.chunkDescs)
|
2015-05-05 09:17:51 -07:00
|
|
|
sm.put(m1.FastFingerprint(), s1)
|
|
|
|
sm.put(m2.FastFingerprint(), s2)
|
|
|
|
sm.put(m3.FastFingerprint(), s3)
|
|
|
|
sm.put(m4.FastFingerprint(), s4)
|
|
|
|
sm.put(m5.FastFingerprint(), s5)
|
2014-10-28 11:01:41 -07:00
|
|
|
|
|
|
|
if err := p.checkpointSeriesMapAndHeads(sm, fpLocker); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2015-03-08 18:33:10 -07:00
|
|
|
loadedSM, _, err := p.loadSeriesMapAndHeads()
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
if loadedSM.length() != 4 {
|
|
|
|
t.Errorf("want 4 series in map, got %d", loadedSM.length())
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
2015-05-05 09:17:51 -07:00
|
|
|
if loadedS1, ok := loadedSM.get(m1.FastFingerprint()); ok {
|
2014-10-28 11:01:41 -07:00
|
|
|
if !reflect.DeepEqual(loadedS1.metric, m1) {
|
|
|
|
t.Errorf("want metric %v, got %v", m1, loadedS1.metric)
|
|
|
|
}
|
2015-05-20 10:13:06 -07:00
|
|
|
if !reflect.DeepEqual(loadedS1.head().c, s1.head().c) {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("head chunks differ")
|
|
|
|
}
|
|
|
|
if loadedS1.chunkDescsOffset != 0 {
|
|
|
|
t.Errorf("want chunkDescsOffset 0, got %d", loadedS1.chunkDescsOffset)
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
if loadedS1.headChunkClosed {
|
|
|
|
t.Error("headChunkClosed is true")
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
2016-02-24 04:58:34 -08:00
|
|
|
if loadedS1.head().chunkFirstTime != 1 {
|
|
|
|
t.Errorf("want chunkFirstTime in head chunk to be 1, got %d", loadedS1.head().chunkFirstTime)
|
|
|
|
}
|
|
|
|
if loadedS1.head().chunkLastTime != model.Earliest {
|
|
|
|
t.Error("want chunkLastTime in head chunk to be unset")
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
} else {
|
|
|
|
t.Errorf("couldn't find %v in loaded map", m1)
|
|
|
|
}
|
2015-05-05 09:17:51 -07:00
|
|
|
if loadedS3, ok := loadedSM.get(m3.FastFingerprint()); ok {
|
2014-10-28 11:01:41 -07:00
|
|
|
if !reflect.DeepEqual(loadedS3.metric, m3) {
|
|
|
|
t.Errorf("want metric %v, got %v", m3, loadedS3.metric)
|
|
|
|
}
|
2015-05-20 10:13:06 -07:00
|
|
|
if loadedS3.head().c != nil {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("head chunk not evicted")
|
|
|
|
}
|
2015-07-13 12:12:27 -07:00
|
|
|
if loadedS3.chunkDescsOffset != 0 {
|
|
|
|
t.Errorf("want chunkDescsOffset 0, got %d", loadedS3.chunkDescsOffset)
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
if !loadedS3.headChunkClosed {
|
|
|
|
t.Error("headChunkClosed is false")
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
2016-02-24 04:58:34 -08:00
|
|
|
if loadedS3.head().chunkFirstTime != 2 {
|
|
|
|
t.Errorf("want chunkFirstTime in head chunk to be 2, got %d", loadedS3.head().chunkFirstTime)
|
|
|
|
}
|
|
|
|
if loadedS3.head().chunkLastTime != 2 {
|
|
|
|
t.Errorf("want chunkLastTime in head chunk to be 2, got %d", loadedS3.head().chunkLastTime)
|
|
|
|
}
|
2014-10-28 11:01:41 -07:00
|
|
|
} else {
|
2015-03-08 18:33:10 -07:00
|
|
|
t.Errorf("couldn't find %v in loaded map", m3)
|
|
|
|
}
|
2015-05-05 09:17:51 -07:00
|
|
|
if loadedS4, ok := loadedSM.get(m4.FastFingerprint()); ok {
|
2015-03-08 18:33:10 -07:00
|
|
|
if !reflect.DeepEqual(loadedS4.metric, m4) {
|
|
|
|
t.Errorf("want metric %v, got %v", m4, loadedS4.metric)
|
|
|
|
}
|
|
|
|
if got, want := len(loadedS4.chunkDescs), chunkCountS4; got != want {
|
|
|
|
t.Errorf("got %d chunkDescs, want %d", got, want)
|
|
|
|
}
|
|
|
|
if got, want := loadedS4.persistWatermark, 0; got != want {
|
|
|
|
t.Errorf("got persistWatermark %d, want %d", got, want)
|
|
|
|
}
|
|
|
|
if loadedS4.chunkDescs[2].isEvicted() {
|
|
|
|
t.Error("3rd chunk evicted")
|
|
|
|
}
|
|
|
|
if loadedS4.chunkDescs[3].isEvicted() {
|
|
|
|
t.Error("4th chunk evicted")
|
|
|
|
}
|
|
|
|
if loadedS4.chunkDescsOffset != 0 {
|
|
|
|
t.Errorf("want chunkDescsOffset 0, got %d", loadedS4.chunkDescsOffset)
|
|
|
|
}
|
|
|
|
if loadedS4.headChunkClosed {
|
|
|
|
t.Error("headChunkClosed is true")
|
|
|
|
}
|
2016-02-24 04:58:34 -08:00
|
|
|
for i, cd := range loadedS4.chunkDescs {
|
|
|
|
if cd.chunkFirstTime != cd.c.firstTime() {
|
|
|
|
t.Errorf(
|
|
|
|
"chunkDesc[%d]: chunkFirstTime not consistent with chunk, want %d, got %d",
|
|
|
|
i, cd.c.firstTime(), cd.chunkFirstTime,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
if i == len(loadedS4.chunkDescs)-1 {
|
|
|
|
// Head chunk.
|
|
|
|
if cd.chunkLastTime != model.Earliest {
|
|
|
|
t.Error("want chunkLastTime in head chunk to be unset")
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
lastTime, err := cd.c.newIterator().lastTimestamp()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if cd.chunkLastTime != lastTime {
|
2016-02-24 04:58:34 -08:00
|
|
|
t.Errorf(
|
|
|
|
"chunkDesc[%d]: chunkLastTime not consistent with chunk, want %d, got %d",
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
i, lastTime, cd.chunkLastTime,
|
2016-02-24 04:58:34 -08:00
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
} else {
|
|
|
|
t.Errorf("couldn't find %v in loaded map", m4)
|
|
|
|
}
|
2015-05-05 09:17:51 -07:00
|
|
|
if loadedS5, ok := loadedSM.get(m5.FastFingerprint()); ok {
|
2015-03-08 18:33:10 -07:00
|
|
|
if !reflect.DeepEqual(loadedS5.metric, m5) {
|
|
|
|
t.Errorf("want metric %v, got %v", m5, loadedS5.metric)
|
|
|
|
}
|
|
|
|
if got, want := len(loadedS5.chunkDescs), chunkCountS5; got != want {
|
|
|
|
t.Errorf("got %d chunkDescs, want %d", got, want)
|
|
|
|
}
|
|
|
|
if got, want := loadedS5.persistWatermark, 3; got != want {
|
|
|
|
t.Errorf("got persistWatermark %d, want %d", got, want)
|
|
|
|
}
|
|
|
|
if !loadedS5.chunkDescs[2].isEvicted() {
|
|
|
|
t.Error("3rd chunk not evicted")
|
|
|
|
}
|
|
|
|
if loadedS5.chunkDescs[3].isEvicted() {
|
|
|
|
t.Error("4th chunk evicted")
|
|
|
|
}
|
|
|
|
if loadedS5.chunkDescsOffset != 0 {
|
|
|
|
t.Errorf("want chunkDescsOffset 0, got %d", loadedS5.chunkDescsOffset)
|
|
|
|
}
|
|
|
|
if loadedS5.headChunkClosed {
|
|
|
|
t.Error("headChunkClosed is true")
|
|
|
|
}
|
2016-02-24 04:58:34 -08:00
|
|
|
for i, cd := range loadedS5.chunkDescs {
|
|
|
|
if i < 3 {
|
|
|
|
// Evicted chunks.
|
|
|
|
if cd.chunkFirstTime == model.Earliest {
|
|
|
|
t.Errorf("chunkDesc[%d]: chunkLastTime not set", i)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if cd.chunkFirstTime != cd.c.firstTime() {
|
|
|
|
t.Errorf(
|
|
|
|
"chunkDesc[%d]: chunkFirstTime not consistent with chunk, want %d, got %d",
|
|
|
|
i, cd.c.firstTime(), cd.chunkFirstTime,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
if i == len(loadedS5.chunkDescs)-1 {
|
|
|
|
// Head chunk.
|
|
|
|
if cd.chunkLastTime != model.Earliest {
|
|
|
|
t.Error("want chunkLastTime in head chunk to be unset")
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
lastTime, err := cd.c.newIterator().lastTimestamp()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if cd.chunkLastTime != lastTime {
|
2016-02-24 04:58:34 -08:00
|
|
|
t.Errorf(
|
|
|
|
"chunkDesc[%d]: chunkLastTime not consistent with chunk, want %d, got %d",
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
i, cd.chunkLastTime, lastTime,
|
2016-02-24 04:58:34 -08:00
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2015-03-08 18:33:10 -07:00
|
|
|
} else {
|
|
|
|
t.Errorf("couldn't find %v in loaded map", m5)
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
func TestCheckpointAndLoadSeriesMapAndHeadsChunkType0(t *testing.T) {
|
|
|
|
testCheckpointAndLoadSeriesMapAndHeads(t, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestCheckpointAndLoadSeriesMapAndHeadsChunkType1(t *testing.T) {
|
|
|
|
testCheckpointAndLoadSeriesMapAndHeads(t, 1)
|
|
|
|
}
|
|
|
|
|
2016-03-12 12:34:51 -08:00
|
|
|
func TestCheckpointAndLoadSeriesMapAndHeadsChunkType2(t *testing.T) {
|
|
|
|
testCheckpointAndLoadSeriesMapAndHeads(t, 2)
|
|
|
|
}
|
|
|
|
|
2015-05-06 07:53:12 -07:00
|
|
|
func TestCheckpointAndLoadFPMappings(t *testing.T) {
|
|
|
|
p, closer := newTestPersistence(t, 1)
|
|
|
|
defer closer.Close()
|
|
|
|
|
|
|
|
in := fpMappings{
|
2015-08-20 08:18:46 -07:00
|
|
|
1: map[string]model.Fingerprint{
|
2015-05-06 07:53:12 -07:00
|
|
|
"foo": 1,
|
|
|
|
"bar": 2,
|
|
|
|
},
|
2015-08-20 08:18:46 -07:00
|
|
|
3: map[string]model.Fingerprint{
|
2015-05-06 07:53:12 -07:00
|
|
|
"baz": 4,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := p.checkpointFPMappings(in); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
out, fp, err := p.loadFPMappings()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2015-08-20 08:18:46 -07:00
|
|
|
if got, want := fp, model.Fingerprint(4); got != want {
|
2015-05-06 07:53:12 -07:00
|
|
|
t.Errorf("got highest FP %v, want %v", got, want)
|
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(in, out) {
|
|
|
|
t.Errorf("got collision map %v, want %v", out, in)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
func testFingerprintsModifiedBefore(t *testing.T, encoding chunkEncoding) {
|
2015-03-13 07:49:07 -07:00
|
|
|
p, closer := newTestPersistence(t, encoding)
|
2014-10-28 11:01:41 -07:00
|
|
|
defer closer.Close()
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
m1 := model.Metric{"n1": "v1"}
|
|
|
|
m2 := model.Metric{"n2": "v2"}
|
|
|
|
m3 := model.Metric{"n1": "v2"}
|
2014-10-28 11:01:41 -07:00
|
|
|
p.archiveMetric(1, m1, 2, 4)
|
|
|
|
p.archiveMetric(2, m2, 1, 6)
|
|
|
|
p.archiveMetric(3, m3, 5, 5)
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
expectedFPs := map[model.Time][]model.Fingerprint{
|
2014-10-28 11:01:41 -07:00
|
|
|
0: {},
|
|
|
|
1: {},
|
|
|
|
2: {2},
|
|
|
|
3: {1, 2},
|
|
|
|
4: {1, 2},
|
|
|
|
5: {1, 2},
|
|
|
|
6: {1, 2, 3},
|
|
|
|
}
|
|
|
|
|
|
|
|
for ts, want := range expectedFPs {
|
2015-05-20 10:13:06 -07:00
|
|
|
got, err := p.fingerprintsModifiedBefore(ts)
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(want, got) {
|
|
|
|
t.Errorf("timestamp: %v, want FPs %v, got %v", ts, want, got)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-13 12:12:27 -07:00
|
|
|
unarchived, err := p.unarchiveMetric(1)
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if !unarchived {
|
2015-07-13 12:12:27 -07:00
|
|
|
t.Error("expected actual unarchival")
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
2015-07-13 12:12:27 -07:00
|
|
|
unarchived, err = p.unarchiveMetric(1)
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if unarchived {
|
2015-07-13 12:12:27 -07:00
|
|
|
t.Error("expected no unarchival")
|
2014-10-28 11:01:41 -07:00
|
|
|
}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
expectedFPs = map[model.Time][]model.Fingerprint{
|
2014-10-28 11:01:41 -07:00
|
|
|
0: {},
|
|
|
|
1: {},
|
|
|
|
2: {2},
|
|
|
|
3: {2},
|
|
|
|
4: {2},
|
|
|
|
5: {2},
|
|
|
|
6: {2, 3},
|
|
|
|
}
|
|
|
|
|
|
|
|
for ts, want := range expectedFPs {
|
2015-05-20 10:13:06 -07:00
|
|
|
got, err := p.fingerprintsModifiedBefore(ts)
|
2014-10-28 11:01:41 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(want, got) {
|
|
|
|
t.Errorf("timestamp: %v, want FPs %v, got %v", ts, want, got)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
func TestFingerprintsModifiedBeforeChunkType0(t *testing.T) {
|
|
|
|
testFingerprintsModifiedBefore(t, 0)
|
2015-03-04 04:40:18 -08:00
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
func TestFingerprintsModifiedBeforeChunkType1(t *testing.T) {
|
|
|
|
testFingerprintsModifiedBefore(t, 1)
|
2015-03-04 04:40:18 -08:00
|
|
|
}
|
|
|
|
|
2016-03-12 12:34:51 -08:00
|
|
|
func TestFingerprintsModifiedBeforeChunkType2(t *testing.T) {
|
|
|
|
testFingerprintsModifiedBefore(t, 2)
|
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
func testDropArchivedMetric(t *testing.T, encoding chunkEncoding) {
|
|
|
|
p, closer := newTestPersistence(t, encoding)
|
2014-10-28 11:01:41 -07:00
|
|
|
defer closer.Close()
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
m1 := model.Metric{"n1": "v1"}
|
|
|
|
m2 := model.Metric{"n2": "v2"}
|
2014-10-28 11:01:41 -07:00
|
|
|
p.archiveMetric(1, m1, 2, 4)
|
|
|
|
p.archiveMetric(2, m2, 1, 6)
|
|
|
|
p.indexMetric(1, m1)
|
|
|
|
p.indexMetric(2, m2)
|
|
|
|
p.waitForIndexing()
|
|
|
|
|
2016-03-09 09:56:30 -08:00
|
|
|
outFPs := p.fingerprintsForLabelPair(model.LabelPair{Name: "n1", Value: "v1"})
|
2015-08-20 08:18:46 -07:00
|
|
|
want := model.Fingerprints{1}
|
2014-10-28 11:01:41 -07:00
|
|
|
if !reflect.DeepEqual(outFPs, want) {
|
|
|
|
t.Errorf("want %#v, got %#v", want, outFPs)
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
outFPs = p.fingerprintsForLabelPair(model.LabelPair{Name: "n2", Value: "v2"})
|
2015-08-20 08:18:46 -07:00
|
|
|
want = model.Fingerprints{2}
|
2014-10-28 11:01:41 -07:00
|
|
|
if !reflect.DeepEqual(outFPs, want) {
|
|
|
|
t.Errorf("want %#v, got %#v", want, outFPs)
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
if archived, _, _ := p.hasArchivedMetric(1); !archived {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("want FP 1 archived")
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
if archived, _, _ := p.hasArchivedMetric(2); !archived {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("want FP 2 archived")
|
|
|
|
}
|
|
|
|
|
2016-03-09 09:56:30 -08:00
|
|
|
if err := p.purgeArchivedMetric(1); err != nil {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
if err := p.purgeArchivedMetric(3); err != nil {
|
2015-02-26 06:19:44 -08:00
|
|
|
// Purging something that has not beet archived is not an error.
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
p.waitForIndexing()
|
|
|
|
|
2016-03-09 09:56:30 -08:00
|
|
|
outFPs = p.fingerprintsForLabelPair(model.LabelPair{Name: "n1", Value: "v1"})
|
2014-10-28 11:01:41 -07:00
|
|
|
want = nil
|
|
|
|
if !reflect.DeepEqual(outFPs, want) {
|
|
|
|
t.Errorf("want %#v, got %#v", want, outFPs)
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
outFPs = p.fingerprintsForLabelPair(model.LabelPair{Name: "n2", Value: "v2"})
|
2015-08-20 08:18:46 -07:00
|
|
|
want = model.Fingerprints{2}
|
2014-10-28 11:01:41 -07:00
|
|
|
if !reflect.DeepEqual(outFPs, want) {
|
|
|
|
t.Errorf("want %#v, got %#v", want, outFPs)
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
if archived, _, _ := p.hasArchivedMetric(1); archived {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("want FP 1 not archived")
|
|
|
|
}
|
2016-03-09 09:56:30 -08:00
|
|
|
if archived, _, _ := p.hasArchivedMetric(2); !archived {
|
2014-10-28 11:01:41 -07:00
|
|
|
t.Error("want FP 2 archived")
|
|
|
|
}
|
2014-08-13 08:13:28 -07:00
|
|
|
}
|
2014-09-14 06:33:56 -07:00
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
func TestDropArchivedMetricChunkType0(t *testing.T) {
|
|
|
|
testDropArchivedMetric(t, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestDropArchivedMetricChunkType1(t *testing.T) {
|
|
|
|
testDropArchivedMetric(t, 1)
|
|
|
|
}
|
|
|
|
|
2016-03-12 12:34:51 -08:00
|
|
|
func TestDropArchivedMetricChunkType2(t *testing.T) {
|
|
|
|
testDropArchivedMetric(t, 2)
|
|
|
|
}
|
|
|
|
|
2014-09-14 06:33:56 -07:00
|
|
|
type incrementalBatch struct {
|
|
|
|
fpToMetric index.FingerprintMetricMapping
|
|
|
|
expectedLnToLvs index.LabelNameLabelValuesMapping
|
|
|
|
expectedLpToFps index.LabelPairFingerprintsMapping
|
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
func testIndexing(t *testing.T, encoding chunkEncoding) {
|
2014-09-14 06:33:56 -07:00
|
|
|
batches := []incrementalBatch{
|
|
|
|
{
|
|
|
|
fpToMetric: index.FingerprintMetricMapping{
|
|
|
|
0: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_0",
|
|
|
|
"label_1": "value_1",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
1: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_0",
|
|
|
|
"label_2": "value_2",
|
|
|
|
"label_3": "value_3",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
2: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_1",
|
|
|
|
"label_1": "value_2",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
},
|
|
|
|
expectedLnToLvs: index.LabelNameLabelValuesMapping{
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: codable.LabelValueSet{
|
2014-09-23 10:21:10 -07:00
|
|
|
"metric_0": struct{}{},
|
|
|
|
"metric_1": struct{}{},
|
|
|
|
},
|
|
|
|
"label_1": codable.LabelValueSet{
|
|
|
|
"value_1": struct{}{},
|
|
|
|
"value_2": struct{}{},
|
|
|
|
},
|
|
|
|
"label_2": codable.LabelValueSet{
|
|
|
|
"value_2": struct{}{},
|
|
|
|
},
|
|
|
|
"label_3": codable.LabelValueSet{
|
|
|
|
"value_3": struct{}{},
|
|
|
|
},
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
expectedLpToFps: index.LabelPairFingerprintsMapping{
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Name: model.MetricNameLabel,
|
2014-09-14 06:33:56 -07:00
|
|
|
Value: "metric_0",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{0: struct{}{}, 1: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Name: model.MetricNameLabel,
|
2014-09-14 06:33:56 -07:00
|
|
|
Value: "metric_1",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{2: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_1",
|
|
|
|
Value: "value_1",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{0: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_1",
|
|
|
|
Value: "value_2",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{2: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_2",
|
|
|
|
Value: "value_2",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{1: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_3",
|
|
|
|
Value: "value_3",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{1: struct{}{}},
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
}, {
|
|
|
|
fpToMetric: index.FingerprintMetricMapping{
|
|
|
|
3: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_0",
|
|
|
|
"label_1": "value_3",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
4: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_2",
|
|
|
|
"label_2": "value_2",
|
|
|
|
"label_3": "value_1",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
5: {
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: "metric_1",
|
|
|
|
"label_1": "value_3",
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
},
|
|
|
|
expectedLnToLvs: index.LabelNameLabelValuesMapping{
|
2015-08-20 08:18:46 -07:00
|
|
|
model.MetricNameLabel: codable.LabelValueSet{
|
2014-09-23 10:21:10 -07:00
|
|
|
"metric_0": struct{}{},
|
|
|
|
"metric_1": struct{}{},
|
|
|
|
"metric_2": struct{}{},
|
|
|
|
},
|
|
|
|
"label_1": codable.LabelValueSet{
|
|
|
|
"value_1": struct{}{},
|
|
|
|
"value_2": struct{}{},
|
|
|
|
"value_3": struct{}{},
|
|
|
|
},
|
|
|
|
"label_2": codable.LabelValueSet{
|
|
|
|
"value_2": struct{}{},
|
|
|
|
},
|
|
|
|
"label_3": codable.LabelValueSet{
|
|
|
|
"value_1": struct{}{},
|
|
|
|
"value_3": struct{}{},
|
|
|
|
},
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
expectedLpToFps: index.LabelPairFingerprintsMapping{
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Name: model.MetricNameLabel,
|
2014-09-14 06:33:56 -07:00
|
|
|
Value: "metric_0",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{0: struct{}{}, 1: struct{}{}, 3: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Name: model.MetricNameLabel,
|
2014-09-14 06:33:56 -07:00
|
|
|
Value: "metric_1",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{2: struct{}{}, 5: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2015-08-20 08:18:46 -07:00
|
|
|
Name: model.MetricNameLabel,
|
2014-09-14 06:33:56 -07:00
|
|
|
Value: "metric_2",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{4: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_1",
|
|
|
|
Value: "value_1",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{0: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_1",
|
|
|
|
Value: "value_2",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{2: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_1",
|
|
|
|
Value: "value_3",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{3: struct{}{}, 5: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_2",
|
|
|
|
Value: "value_2",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{1: struct{}{}, 4: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_3",
|
|
|
|
Value: "value_1",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{4: struct{}{}},
|
2015-08-22 04:32:13 -07:00
|
|
|
model.LabelPair{
|
2014-09-14 06:33:56 -07:00
|
|
|
Name: "label_3",
|
|
|
|
Value: "value_3",
|
2014-09-23 10:21:10 -07:00
|
|
|
}: codable.FingerprintSet{1: struct{}{}},
|
2014-09-14 06:33:56 -07:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
p, closer := newTestPersistence(t, encoding)
|
2014-09-14 06:33:56 -07:00
|
|
|
defer closer.Close()
|
|
|
|
|
|
|
|
indexedFpsToMetrics := index.FingerprintMetricMapping{}
|
|
|
|
for i, b := range batches {
|
|
|
|
for fp, m := range b.fpToMetric {
|
2014-10-28 11:01:41 -07:00
|
|
|
p.indexMetric(fp, m)
|
2016-03-09 09:56:30 -08:00
|
|
|
p.archiveMetric(fp, m, 1, 2)
|
2014-09-14 06:33:56 -07:00
|
|
|
indexedFpsToMetrics[fp] = m
|
|
|
|
}
|
2014-10-07 10:11:24 -07:00
|
|
|
verifyIndexedState(i, t, b, indexedFpsToMetrics, p)
|
2014-09-14 06:33:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
for i := len(batches) - 1; i >= 0; i-- {
|
|
|
|
b := batches[i]
|
2014-10-07 10:11:24 -07:00
|
|
|
verifyIndexedState(i, t, batches[i], indexedFpsToMetrics, p)
|
2014-09-14 06:33:56 -07:00
|
|
|
for fp, m := range b.fpToMetric {
|
2014-10-28 11:01:41 -07:00
|
|
|
p.unindexMetric(fp, m)
|
2015-07-13 12:12:27 -07:00
|
|
|
unarchived, err := p.unarchiveMetric(fp)
|
2014-09-14 06:33:56 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if !unarchived {
|
|
|
|
t.Errorf("%d. metric not unarchived", i)
|
|
|
|
}
|
|
|
|
delete(indexedFpsToMetrics, fp)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
func TestIndexingChunkType0(t *testing.T) {
|
2015-03-06 03:53:00 -08:00
|
|
|
testIndexing(t, 0)
|
2015-03-04 04:40:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestIndexingChunkType1(t *testing.T) {
|
|
|
|
testIndexing(t, 1)
|
|
|
|
}
|
|
|
|
|
2016-03-12 12:34:51 -08:00
|
|
|
func TestIndexingChunkType2(t *testing.T) {
|
|
|
|
testIndexing(t, 2)
|
|
|
|
}
|
|
|
|
|
2014-10-07 10:11:24 -07:00
|
|
|
func verifyIndexedState(i int, t *testing.T, b incrementalBatch, indexedFpsToMetrics index.FingerprintMetricMapping, p *persistence) {
|
|
|
|
p.waitForIndexing()
|
2014-09-14 06:33:56 -07:00
|
|
|
for fp, m := range indexedFpsToMetrics {
|
|
|
|
// Compare archived metrics with input metrics.
|
2015-05-20 10:13:06 -07:00
|
|
|
mOut, err := p.archivedMetric(fp)
|
2014-09-14 06:33:56 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if !mOut.Equal(m) {
|
|
|
|
t.Errorf("%d. %v: Got: %s; want %s", i, fp, mOut, m)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that archived metrics are in membership index.
|
2016-03-09 09:56:30 -08:00
|
|
|
has, first, last := p.hasArchivedMetric(fp)
|
2014-09-14 06:33:56 -07:00
|
|
|
if !has {
|
|
|
|
t.Errorf("%d. fingerprint %v not found", i, fp)
|
|
|
|
}
|
|
|
|
if first != 1 || last != 2 {
|
|
|
|
t.Errorf(
|
|
|
|
"%d. %v: Got first: %d, last %d; want first: %d, last %d",
|
|
|
|
i, fp, first, last, 1, 2,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare label name -> label values mappings.
|
|
|
|
for ln, lvs := range b.expectedLnToLvs {
|
2016-03-09 09:56:30 -08:00
|
|
|
outLvs := p.labelValuesForLabelName(ln)
|
2014-09-14 06:33:56 -07:00
|
|
|
|
2014-09-23 10:21:10 -07:00
|
|
|
outSet := codable.LabelValueSet{}
|
|
|
|
for _, lv := range outLvs {
|
|
|
|
outSet[lv] = struct{}{}
|
2014-09-14 06:33:56 -07:00
|
|
|
}
|
2014-09-23 10:21:10 -07:00
|
|
|
|
|
|
|
if !reflect.DeepEqual(lvs, outSet) {
|
|
|
|
t.Errorf("%d. label values don't match. Got: %v; want %v", i, outSet, lvs)
|
2014-09-14 06:33:56 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compare label pair -> fingerprints mappings.
|
|
|
|
for lp, fps := range b.expectedLpToFps {
|
2016-03-09 09:56:30 -08:00
|
|
|
outFPs := p.fingerprintsForLabelPair(lp)
|
2014-09-14 06:33:56 -07:00
|
|
|
|
2014-09-23 10:21:10 -07:00
|
|
|
outSet := codable.FingerprintSet{}
|
2014-10-28 11:01:41 -07:00
|
|
|
for _, fp := range outFPs {
|
2014-09-23 10:21:10 -07:00
|
|
|
outSet[fp] = struct{}{}
|
2014-09-14 06:33:56 -07:00
|
|
|
}
|
2014-09-23 10:21:10 -07:00
|
|
|
|
|
|
|
if !reflect.DeepEqual(fps, outSet) {
|
|
|
|
t.Errorf("%d. %v: fingerprints don't match. Got: %v; want %v", i, lp, outSet, fps)
|
2014-09-14 06:33:56 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-03-19 11:28:21 -07:00
|
|
|
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func TestQuranatineSeriesFile(t *testing.T) {
|
|
|
|
p, closer := newTestPersistence(t, 1)
|
|
|
|
defer closer.Close()
|
|
|
|
|
|
|
|
verify := func(fp model.Fingerprint, seriesFileShouldExist bool, contentHintFile ...string) {
|
|
|
|
var (
|
|
|
|
fpStr = fp.String()
|
|
|
|
originalFile = p.fileNameForFingerprint(fp)
|
|
|
|
quarantinedFile = filepath.Join(p.basePath, "orphaned", fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+seriesFileSuffix)
|
|
|
|
hintFile = filepath.Join(p.basePath, "orphaned", fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+hintFileSuffix)
|
|
|
|
)
|
|
|
|
if _, err := os.Stat(originalFile); !os.IsNotExist(err) {
|
|
|
|
t.Errorf("Expected file %q to not exist.", originalFile)
|
|
|
|
}
|
|
|
|
if _, err := os.Stat(quarantinedFile); (os.IsNotExist(err) && seriesFileShouldExist) || (err == nil && !seriesFileShouldExist) {
|
|
|
|
t.Errorf("Unexpected state of quarantined file %q. Expected it to exist: %t. os.Stat returned: %s.", quarantinedFile, seriesFileShouldExist, err)
|
|
|
|
}
|
|
|
|
f, err := os.Open(hintFile)
|
|
|
|
defer f.Close()
|
|
|
|
if err != nil {
|
|
|
|
t.Errorf("Could not open hint file %q: %s", hintFile, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
|
|
for _, want := range contentHintFile {
|
|
|
|
if !scanner.Scan() {
|
|
|
|
t.Errorf("Unexpected end of hint file %q.", hintFile)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
got := scanner.Text()
|
|
|
|
if want != got {
|
|
|
|
t.Errorf("Want hint line %q, got %q.", want, got)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if scanner.Scan() {
|
|
|
|
t.Errorf("Unexpected spurious content in hint file %q: %q", hintFile, scanner.Text())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(0, nil, nil); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
verify(0, false, "[UNKNOWN METRIC]", "[UNKNOWN REASON]")
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(
|
|
|
|
1, errors.New("file does not exist"),
|
|
|
|
nil,
|
|
|
|
); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
verify(1, false, "[UNKNOWN METRIC]", "file does not exist")
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(
|
|
|
|
2, errors.New("file does not exist"),
|
|
|
|
model.Metric{"foo": "bar", "dings": "bums"},
|
|
|
|
); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
verify(2, false, `{dings="bums", foo="bar"}`, "file does not exist")
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(
|
|
|
|
3, nil,
|
|
|
|
model.Metric{"foo": "bar", "dings": "bums"},
|
|
|
|
); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
verify(3, false, `{dings="bums", foo="bar"}`, "[UNKNOWN REASON]")
|
|
|
|
|
|
|
|
err := os.Mkdir(filepath.Join(p.basePath, "00"), os.ModePerm)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
f, err := os.Create(p.fileNameForFingerprint(4))
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
f.Close()
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(
|
|
|
|
4, errors.New("file exists"),
|
|
|
|
model.Metric{"sound": "cloud"},
|
|
|
|
); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
verify(4, true, `{sound="cloud"}`, "file exists")
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(4, nil, nil); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
// Overwrites hint file but leaves series file intact.
|
|
|
|
verify(4, true, "[UNKNOWN METRIC]", "[UNKNOWN REASON]")
|
|
|
|
|
|
|
|
if err := p.quarantineSeriesFile(
|
|
|
|
4, errors.New("file exists"),
|
|
|
|
model.Metric{"sound": "cloud"},
|
|
|
|
); err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
// Overwrites everything.
|
|
|
|
verify(4, true, `{sound="cloud"}`, "file exists")
|
|
|
|
}
|
|
|
|
|
2015-03-19 11:28:21 -07:00
|
|
|
var fpStrings = []string{
|
|
|
|
"b004b821ca50ba26",
|
|
|
|
"b037c21e884e4fc5",
|
|
|
|
"b037de1e884e5469",
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkLoadChunksSequentially(b *testing.B) {
|
|
|
|
p := persistence{
|
|
|
|
basePath: "fixtures",
|
2015-04-14 01:43:09 -07:00
|
|
|
bufPool: sync.Pool{New: func() interface{} { return make([]byte, 0, 3*chunkLenWithHeader) }},
|
2015-03-19 11:28:21 -07:00
|
|
|
}
|
|
|
|
sequentialIndexes := make([]int, 47)
|
|
|
|
for i := range sequentialIndexes {
|
|
|
|
sequentialIndexes[i] = i
|
|
|
|
}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
var fp model.Fingerprint
|
2015-03-19 11:28:21 -07:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for _, s := range fpStrings {
|
2015-08-20 08:18:46 -07:00
|
|
|
fp, _ = model.FingerprintFromString(s)
|
2015-03-19 11:28:21 -07:00
|
|
|
cds, err := p.loadChunks(fp, sequentialIndexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
b.Error(err)
|
|
|
|
}
|
|
|
|
if len(cds) == 0 {
|
|
|
|
b.Error("could not read any chunks")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkLoadChunksRandomly(b *testing.B) {
|
|
|
|
p := persistence{
|
|
|
|
basePath: "fixtures",
|
2015-04-14 01:43:09 -07:00
|
|
|
bufPool: sync.Pool{New: func() interface{} { return make([]byte, 0, 3*chunkLenWithHeader) }},
|
2015-03-19 11:28:21 -07:00
|
|
|
}
|
|
|
|
randomIndexes := []int{1, 5, 6, 8, 11, 14, 18, 23, 29, 33, 42, 46}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
var fp model.Fingerprint
|
2015-03-19 11:28:21 -07:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for _, s := range fpStrings {
|
2015-08-20 08:18:46 -07:00
|
|
|
fp, _ = model.FingerprintFromString(s)
|
2015-03-19 11:28:21 -07:00
|
|
|
cds, err := p.loadChunks(fp, randomIndexes, 0)
|
|
|
|
if err != nil {
|
|
|
|
b.Error(err)
|
|
|
|
}
|
|
|
|
if len(cds) == 0 {
|
|
|
|
b.Error("could not read any chunks")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkLoadChunkDescs(b *testing.B) {
|
|
|
|
p := persistence{
|
|
|
|
basePath: "fixtures",
|
|
|
|
}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
var fp model.Fingerprint
|
2015-03-19 11:28:21 -07:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
for _, s := range fpStrings {
|
2015-08-20 08:18:46 -07:00
|
|
|
fp, _ = model.FingerprintFromString(s)
|
2015-07-06 16:10:14 -07:00
|
|
|
cds, err := p.loadChunkDescs(fp, 0)
|
2015-03-19 11:28:21 -07:00
|
|
|
if err != nil {
|
|
|
|
b.Error(err)
|
|
|
|
}
|
|
|
|
if len(cds) == 0 {
|
|
|
|
b.Error("could not read any chunk descs")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|