2015-01-21 11:07:45 -08:00
|
|
|
// Copyright 2014 The Prometheus Authors
|
2014-09-19 09:18:44 -07:00
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
package local
|
2014-06-06 02:55:53 -07:00
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"math"
|
|
|
|
"sort"
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
"github.com/prometheus/common/model"
|
2014-06-06 02:55:53 -07:00
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/storage/metric"
|
|
|
|
)
|
|
|
|
|
|
|
|
// The 21-byte header of a delta-encoded chunk looks like:
|
|
|
|
//
|
|
|
|
// - time delta bytes: 1 bytes
|
|
|
|
// - value delta bytes: 1 bytes
|
|
|
|
// - is integer: 1 byte
|
|
|
|
// - base time: 8 bytes
|
|
|
|
// - base value: 8 bytes
|
|
|
|
// - used buf bytes: 2 bytes
|
|
|
|
const (
|
|
|
|
deltaHeaderBytes = 21
|
|
|
|
|
|
|
|
deltaHeaderTimeBytesOffset = 0
|
|
|
|
deltaHeaderValueBytesOffset = 1
|
|
|
|
deltaHeaderIsIntOffset = 2
|
|
|
|
deltaHeaderBaseTimeOffset = 3
|
|
|
|
deltaHeaderBaseValueOffset = 11
|
|
|
|
deltaHeaderBufLenOffset = 19
|
|
|
|
)
|
|
|
|
|
2014-08-19 09:14:44 -07:00
|
|
|
// A deltaEncodedChunk adaptively stores sample timestamps and values with a
|
2015-03-04 04:40:18 -08:00
|
|
|
// delta encoding of various types (int, float) and bit widths. However, once 8
|
2014-08-19 09:14:44 -07:00
|
|
|
// bytes would be needed to encode a delta value, a fall-back to the absolute
|
|
|
|
// numbers happens (so that timestamps are saved directly as int64 and values as
|
2014-09-16 06:47:24 -07:00
|
|
|
// float64). It implements the chunk interface.
|
2015-03-04 04:40:18 -08:00
|
|
|
type deltaEncodedChunk []byte
|
2014-06-06 02:55:53 -07:00
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
// newDeltaEncodedChunk returns a newly allocated deltaEncodedChunk.
|
2015-03-04 04:40:18 -08:00
|
|
|
func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncodedChunk {
|
|
|
|
if tb < 1 {
|
|
|
|
panic("need at least 1 time delta byte")
|
|
|
|
}
|
|
|
|
if length < deltaHeaderBytes+16 {
|
|
|
|
panic(fmt.Errorf(
|
|
|
|
"chunk length %d bytes is insufficient, need at least %d",
|
|
|
|
length, deltaHeaderBytes+16,
|
|
|
|
))
|
|
|
|
}
|
|
|
|
c := make(deltaEncodedChunk, deltaHeaderIsIntOffset+1, length)
|
2014-06-06 02:55:53 -07:00
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
c[deltaHeaderTimeBytesOffset] = byte(tb)
|
|
|
|
c[deltaHeaderValueBytesOffset] = byte(vb)
|
2014-08-19 09:14:44 -07:00
|
|
|
if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes.
|
2015-03-04 04:40:18 -08:00
|
|
|
c[deltaHeaderIsIntOffset] = 1
|
2014-06-06 02:55:53 -07:00
|
|
|
} else {
|
2015-03-04 04:40:18 -08:00
|
|
|
c[deltaHeaderIsIntOffset] = 0
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
return &c
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
// add implements chunk.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (c deltaEncodedChunk) add(s model.SamplePair) ([]chunk, error) {
|
2015-03-06 03:53:00 -08:00
|
|
|
if c.len() == 0 {
|
2015-03-04 04:40:18 -08:00
|
|
|
c = c[:deltaHeaderBytes]
|
|
|
|
binary.LittleEndian.PutUint64(c[deltaHeaderBaseTimeOffset:], uint64(s.Timestamp))
|
|
|
|
binary.LittleEndian.PutUint64(c[deltaHeaderBaseValueOffset:], math.Float64bits(float64(s.Value)))
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
remainingBytes := cap(c) - len(c)
|
2014-06-06 02:55:53 -07:00
|
|
|
sampleSize := c.sampleSize()
|
|
|
|
|
|
|
|
// Do we generally have space for another sample in this chunk? If not,
|
2014-08-19 09:14:44 -07:00
|
|
|
// overflow into a new one.
|
2014-06-06 02:55:53 -07:00
|
|
|
if remainingBytes < sampleSize {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
overflowChunks, err := newChunk().add(s)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return []chunk{&c, overflowChunks[0]}, nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-03-06 07:03:03 -08:00
|
|
|
baseValue := c.baseValue()
|
2014-06-06 02:55:53 -07:00
|
|
|
dt := s.Timestamp - c.baseTime()
|
2015-07-13 12:12:27 -07:00
|
|
|
if dt < 0 {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return nil, fmt.Errorf("time delta is less than zero: %v", dt)
|
2015-07-13 12:12:27 -07:00
|
|
|
}
|
|
|
|
|
2015-03-06 07:03:03 -08:00
|
|
|
dv := s.Value - baseValue
|
2014-08-19 09:14:44 -07:00
|
|
|
tb := c.timeBytes()
|
|
|
|
vb := c.valueBytes()
|
2015-03-13 07:49:07 -07:00
|
|
|
isInt := c.isInt()
|
2014-06-06 02:55:53 -07:00
|
|
|
|
|
|
|
// If the new sample is incompatible with the current encoding, reencode the
|
|
|
|
// existing chunk data into new chunk(s).
|
2015-03-06 07:03:03 -08:00
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
ntb, nvb, nInt := tb, vb, isInt
|
|
|
|
if isInt && !isInt64(dv) {
|
|
|
|
// int->float.
|
|
|
|
nvb = d4
|
|
|
|
nInt = false
|
2015-08-20 08:18:46 -07:00
|
|
|
} else if !isInt && vb == d4 && baseValue+model.SampleValue(float32(dv)) != s.Value {
|
2015-03-13 07:49:07 -07:00
|
|
|
// float32->float64.
|
|
|
|
nvb = d8
|
|
|
|
} else {
|
|
|
|
if tb < d8 {
|
|
|
|
// Maybe more bytes for timestamp.
|
|
|
|
ntb = max(tb, bytesNeededForUnsignedTimestampDelta(dt))
|
|
|
|
}
|
|
|
|
if c.isInt() && vb < d8 {
|
|
|
|
// Maybe more bytes for sample value.
|
|
|
|
nvb = max(vb, bytesNeededForIntegerSampleValueDelta(dv))
|
|
|
|
}
|
2015-03-06 07:03:03 -08:00
|
|
|
}
|
2015-03-13 07:49:07 -07:00
|
|
|
if tb != ntb || vb != nvb || isInt != nInt {
|
|
|
|
if len(c)*2 < cap(c) {
|
|
|
|
return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
|
|
|
|
}
|
|
|
|
// Chunk is already half full. Better create a new one and save the transcoding efforts.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
overflowChunks, err := newChunk().add(s)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return []chunk{&c, overflowChunks[0]}, nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
2015-03-13 07:49:07 -07:00
|
|
|
|
2015-03-04 04:40:18 -08:00
|
|
|
offset := len(c)
|
|
|
|
c = c[:offset+sampleSize]
|
2014-06-06 02:55:53 -07:00
|
|
|
|
2014-08-19 09:14:44 -07:00
|
|
|
switch tb {
|
|
|
|
case d1:
|
2015-03-04 04:40:18 -08:00
|
|
|
c[offset] = byte(dt)
|
2014-08-19 09:14:44 -07:00
|
|
|
case d2:
|
2015-03-04 04:40:18 -08:00
|
|
|
binary.LittleEndian.PutUint16(c[offset:], uint16(dt))
|
2014-08-19 09:14:44 -07:00
|
|
|
case d4:
|
2015-03-04 04:40:18 -08:00
|
|
|
binary.LittleEndian.PutUint32(c[offset:], uint32(dt))
|
2014-08-19 09:14:44 -07:00
|
|
|
case d8:
|
|
|
|
// Store the absolute value (no delta) in case of d8.
|
2015-03-04 04:40:18 -08:00
|
|
|
binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
|
2014-08-19 09:14:44 -07:00
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2014-08-19 09:14:44 -07:00
|
|
|
offset += int(tb)
|
2014-06-06 02:55:53 -07:00
|
|
|
|
|
|
|
if c.isInt() {
|
2014-08-19 09:14:44 -07:00
|
|
|
switch vb {
|
|
|
|
case d0:
|
2014-06-06 02:55:53 -07:00
|
|
|
// No-op. Constant value is stored as base value.
|
2014-08-19 09:14:44 -07:00
|
|
|
case d1:
|
2015-07-13 02:19:11 -07:00
|
|
|
c[offset] = byte(int8(dv))
|
2014-08-19 09:14:44 -07:00
|
|
|
case d2:
|
2015-07-13 02:19:11 -07:00
|
|
|
binary.LittleEndian.PutUint16(c[offset:], uint16(int16(dv)))
|
2014-08-19 09:14:44 -07:00
|
|
|
case d4:
|
2015-07-13 02:19:11 -07:00
|
|
|
binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
|
2014-08-19 09:14:44 -07:00
|
|
|
// d8 must not happen. Those samples are encoded as float64.
|
2014-06-06 02:55:53 -07:00
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
} else {
|
2014-08-19 09:14:44 -07:00
|
|
|
switch vb {
|
|
|
|
case d4:
|
2015-03-04 04:40:18 -08:00
|
|
|
binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(dv)))
|
2014-08-19 09:14:44 -07:00
|
|
|
case d8:
|
|
|
|
// Store the absolute value (no delta) in case of d8.
|
2015-03-04 04:40:18 -08:00
|
|
|
binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
|
2014-06-06 02:55:53 -07:00
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return []chunk{&c}, nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
// clone implements chunk.
|
|
|
|
func (c deltaEncodedChunk) clone() chunk {
|
|
|
|
clone := make(deltaEncodedChunk, len(c), cap(c))
|
|
|
|
copy(clone, c)
|
|
|
|
return &clone
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
// firstTime implements chunk.
|
2015-08-20 08:18:46 -07:00
|
|
|
func (c deltaEncodedChunk) firstTime() model.Time {
|
2015-04-14 04:46:38 -07:00
|
|
|
return c.baseTime()
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// newIterator implements chunk.
|
|
|
|
func (c *deltaEncodedChunk) newIterator() chunkIterator {
|
|
|
|
return &deltaEncodedChunkIterator{
|
2015-04-14 04:46:38 -07:00
|
|
|
c: *c,
|
|
|
|
len: c.len(),
|
|
|
|
baseT: c.baseTime(),
|
|
|
|
baseV: c.baseValue(),
|
|
|
|
tBytes: c.timeBytes(),
|
|
|
|
vBytes: c.valueBytes(),
|
|
|
|
isInt: c.isInt(),
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// marshal implements chunk.
|
|
|
|
func (c deltaEncodedChunk) marshal(w io.Writer) error {
|
|
|
|
if len(c) > math.MaxUint16 {
|
|
|
|
panic("chunk buffer length would overflow a 16 bit uint.")
|
|
|
|
}
|
|
|
|
binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
|
|
|
|
|
|
|
|
n, err := w.Write(c[:cap(c)])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if n != cap(c) {
|
2016-01-25 07:36:36 -08:00
|
|
|
return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// marshalToBuf implements chunk.
|
|
|
|
func (c deltaEncodedChunk) marshalToBuf(buf []byte) error {
|
|
|
|
if len(c) > math.MaxUint16 {
|
|
|
|
panic("chunk buffer length would overflow a 16 bit uint")
|
|
|
|
}
|
|
|
|
binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
|
|
|
|
|
|
|
|
n := copy(buf, c)
|
|
|
|
if n != len(c) {
|
|
|
|
return fmt.Errorf("wanted to copy %d bytes to buffer, copied %d", len(c), n)
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// unmarshal implements chunk.
|
|
|
|
func (c *deltaEncodedChunk) unmarshal(r io.Reader) error {
|
|
|
|
*c = (*c)[:cap(*c)]
|
2015-04-13 11:20:26 -07:00
|
|
|
if _, err := io.ReadFull(r, *c); err != nil {
|
|
|
|
return err
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
|
|
|
|
if int(l) > cap(*c) {
|
|
|
|
return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
|
|
|
|
}
|
|
|
|
*c = (*c)[:l]
|
2015-03-13 07:49:07 -07:00
|
|
|
return nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-04-13 11:20:26 -07:00
|
|
|
// unmarshalFromBuf implements chunk.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (c *deltaEncodedChunk) unmarshalFromBuf(buf []byte) error {
|
2015-04-13 11:20:26 -07:00
|
|
|
*c = (*c)[:cap(*c)]
|
|
|
|
copy(*c, buf)
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
|
|
|
|
if int(l) > cap(*c) {
|
|
|
|
return fmt.Errorf("chunk length exceeded during unmarshaling: %d", l)
|
|
|
|
}
|
|
|
|
*c = (*c)[:l]
|
|
|
|
return nil
|
2015-04-13 11:20:26 -07:00
|
|
|
}
|
|
|
|
|
2015-03-13 07:49:07 -07:00
|
|
|
// encoding implements chunk.
|
|
|
|
func (c deltaEncodedChunk) encoding() chunkEncoding { return delta }
|
|
|
|
|
|
|
|
func (c deltaEncodedChunk) timeBytes() deltaBytes {
|
|
|
|
return deltaBytes(c[deltaHeaderTimeBytesOffset])
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c deltaEncodedChunk) valueBytes() deltaBytes {
|
|
|
|
return deltaBytes(c[deltaHeaderValueBytesOffset])
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c deltaEncodedChunk) isInt() bool {
|
|
|
|
return c[deltaHeaderIsIntOffset] == 1
|
|
|
|
}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
func (c deltaEncodedChunk) baseTime() model.Time {
|
|
|
|
return model.Time(binary.LittleEndian.Uint64(c[deltaHeaderBaseTimeOffset:]))
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
|
|
|
|
2015-08-20 08:18:46 -07:00
|
|
|
func (c deltaEncodedChunk) baseValue() model.SampleValue {
|
|
|
|
return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c[deltaHeaderBaseValueOffset:])))
|
2015-03-13 07:49:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c deltaEncodedChunk) sampleSize() int {
|
|
|
|
return int(c.timeBytes() + c.valueBytes())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c deltaEncodedChunk) len() int {
|
|
|
|
if len(c) < deltaHeaderBytes {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return (len(c) - deltaHeaderBytes) / c.sampleSize()
|
|
|
|
}
|
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
// deltaEncodedChunkIterator implements chunkIterator.
|
2014-06-06 02:55:53 -07:00
|
|
|
type deltaEncodedChunkIterator struct {
|
2015-04-14 04:46:38 -07:00
|
|
|
c deltaEncodedChunk
|
|
|
|
len int
|
2015-08-20 08:18:46 -07:00
|
|
|
baseT model.Time
|
|
|
|
baseV model.SampleValue
|
2015-04-14 04:46:38 -07:00
|
|
|
tBytes, vBytes deltaBytes
|
|
|
|
isInt bool
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-04-14 04:46:38 -07:00
|
|
|
// length implements chunkIterator.
|
|
|
|
func (it *deltaEncodedChunkIterator) length() int { return it.len }
|
|
|
|
|
Streamline series iterator creation
This will fix issue #1035 and will also help to make issue #1264 less
bad.
The fundamental problem in the current code:
In the preload phase, we quite accurately determine which chunks will
be used for the query being executed. However, in the subsequent step
of creating series iterators, the created iterators are referencing
_all_ in-memory chunks in their series, even the un-pinned ones. In
iterator creation, we copy a pointer to each in-memory chunk of a
series into the iterator. While this creates a certain amount of
allocation churn, the worst thing about it is that copying the chunk
pointer out of the chunkDesc requires a mutex acquisition. (Remember
that the iterator will also reference un-pinned chunks, so we need to
acquire the mutex to protect against concurrent eviction.) The worst
case happens if a series doesn't even contain any relevant samples for
the query time range. We notice that during preloading but then we
will still create a series iterator for it. But even for series that
do contain relevant samples, the overhead is quite bad for instant
queries that retrieve a single sample from each series, but still go
through all the effort of series iterator creation. All of that is
particularly bad if a series has many in-memory chunks.
This commit addresses the problem from two sides:
First, it merges preloading and iterator creation into one step,
i.e. the preload call returns an iterator for exactly the preloaded
chunks.
Second, the required mutex acquisition in chunkDesc has been greatly
reduced. That was enabled by a side effect of the first step, which is
that the iterator is only referencing pinned chunks, so there is no
risk of concurrent eviction anymore, and chunks can be accessed
without mutex acquisition.
To simplify the code changes for the above, the long-planned change of
ValueAtTime to ValueAtOrBefore time was performed at the same
time. (It should have been done first, but it kind of accidentally
happened while I was in the middle of writing the series iterator
changes. Sorry for that.) So far, we actively filtered the up to two
values that were returned by ValueAtTime, i.e. we invested work to
retrieve up to two values, and then we invested more work to throw one
of them away.
The SeriesIterator.BoundaryValues method can be removed once #1401 is
fixed. But I really didn't want to load even more changes into this
PR.
Benchmarks:
The BenchmarkFuzz.* benchmarks run 83% faster (i.e. about six times
faster) and allocate 95% fewer bytes. The reason for that is that the
benchmark reads one sample after another from the time series and
creates a new series iterator for each sample read.
To find out how much these improvements matter in practice, I have
mirrored a beefy Prometheus server at SoundCloud that suffers from
both issues #1035 and #1264. To reach steady state that would be
comparable, the server needs to run for 15d. So far, it has run for
1d. The test server currently has only half as many memory time series
and 60% of the memory chunks the main server has. The 90th percentile
rule evaluation cycle time is ~11s on the main server and only ~3s on
the test server. However, these numbers might get much closer over
time.
In addition to performance improvements, this commit removes about 150
LOC.
2016-02-16 09:47:50 -08:00
|
|
|
// valueAtOrBeforeTime implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) valueAtOrBeforeTime(t model.Time) (model.SamplePair, error) {
|
|
|
|
var lastErr error
|
2015-04-14 04:46:38 -07:00
|
|
|
i := sort.Search(it.len, func(i int) bool {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
ts, err := it.timestampAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
lastErr = err
|
|
|
|
}
|
|
|
|
return ts.After(t)
|
2014-06-06 02:55:53 -07:00
|
|
|
})
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
if i == 0 || lastErr != nil {
|
|
|
|
return ZeroSamplePair, lastErr
|
|
|
|
}
|
|
|
|
ts, err := it.timestampAtIndex(i - 1)
|
|
|
|
if err != nil {
|
|
|
|
return ZeroSamplePair, err
|
Streamline series iterator creation
This will fix issue #1035 and will also help to make issue #1264 less
bad.
The fundamental problem in the current code:
In the preload phase, we quite accurately determine which chunks will
be used for the query being executed. However, in the subsequent step
of creating series iterators, the created iterators are referencing
_all_ in-memory chunks in their series, even the un-pinned ones. In
iterator creation, we copy a pointer to each in-memory chunk of a
series into the iterator. While this creates a certain amount of
allocation churn, the worst thing about it is that copying the chunk
pointer out of the chunkDesc requires a mutex acquisition. (Remember
that the iterator will also reference un-pinned chunks, so we need to
acquire the mutex to protect against concurrent eviction.) The worst
case happens if a series doesn't even contain any relevant samples for
the query time range. We notice that during preloading but then we
will still create a series iterator for it. But even for series that
do contain relevant samples, the overhead is quite bad for instant
queries that retrieve a single sample from each series, but still go
through all the effort of series iterator creation. All of that is
particularly bad if a series has many in-memory chunks.
This commit addresses the problem from two sides:
First, it merges preloading and iterator creation into one step,
i.e. the preload call returns an iterator for exactly the preloaded
chunks.
Second, the required mutex acquisition in chunkDesc has been greatly
reduced. That was enabled by a side effect of the first step, which is
that the iterator is only referencing pinned chunks, so there is no
risk of concurrent eviction anymore, and chunks can be accessed
without mutex acquisition.
To simplify the code changes for the above, the long-planned change of
ValueAtTime to ValueAtOrBefore time was performed at the same
time. (It should have been done first, but it kind of accidentally
happened while I was in the middle of writing the series iterator
changes. Sorry for that.) So far, we actively filtered the up to two
values that were returned by ValueAtTime, i.e. we invested work to
retrieve up to two values, and then we invested more work to throw one
of them away.
The SeriesIterator.BoundaryValues method can be removed once #1401 is
fixed. But I really didn't want to load even more changes into this
PR.
Benchmarks:
The BenchmarkFuzz.* benchmarks run 83% faster (i.e. about six times
faster) and allocate 95% fewer bytes. The reason for that is that the
benchmark reads one sample after another from the time series and
creates a new series iterator for each sample read.
To find out how much these improvements matter in practice, I have
mirrored a beefy Prometheus server at SoundCloud that suffers from
both issues #1035 and #1264. To reach steady state that would be
comparable, the server needs to run for 15d. So far, it has run for
1d. The test server currently has only half as many memory time series
and 60% of the memory chunks the main server has. The 90th percentile
rule evaluation cycle time is ~11s on the main server and only ~3s on
the test server. However, these numbers might get much closer over
time.
In addition to performance improvements, this commit removes about 150
LOC.
2016-02-16 09:47:50 -08:00
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
v, err := it.sampleValueAtIndex(i - 1)
|
|
|
|
if err != nil {
|
|
|
|
return ZeroSamplePair, err
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return model.SamplePair{Timestamp: ts, Value: v}, nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
// rangeValues implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) rangeValues(in metric.Interval) ([]model.SamplePair, error) {
|
|
|
|
var lastErr error
|
|
|
|
|
2015-04-14 04:46:38 -07:00
|
|
|
oldest := sort.Search(it.len, func(i int) bool {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
t, err := it.timestampAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
lastErr = err
|
|
|
|
}
|
|
|
|
return !t.Before(in.OldestInclusive)
|
2014-06-06 02:55:53 -07:00
|
|
|
})
|
|
|
|
|
2015-04-14 04:46:38 -07:00
|
|
|
newest := sort.Search(it.len, func(i int) bool {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
t, err := it.timestampAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
lastErr = err
|
|
|
|
}
|
|
|
|
return t.After(in.NewestInclusive)
|
2014-06-06 02:55:53 -07:00
|
|
|
})
|
|
|
|
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
if oldest == it.len || lastErr != nil {
|
|
|
|
return nil, lastErr
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2015-08-22 05:52:35 -07:00
|
|
|
result := make([]model.SamplePair, 0, newest-oldest)
|
2014-06-06 02:55:53 -07:00
|
|
|
for i := oldest; i < newest; i++ {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
t, err := it.timestampAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
v, err := it.sampleValueAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
result = append(result, model.SamplePair{Timestamp: t, Value: v})
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return result, nil
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|
|
|
|
|
2014-09-16 06:47:24 -07:00
|
|
|
// contains implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) contains(t model.Time) (bool, error) {
|
|
|
|
lastT, err := it.timestampAtIndex(it.len - 1)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
return !t.Before(it.baseT) && !t.After(lastT), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// values implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) values() <-chan struct {
|
|
|
|
model.SamplePair
|
|
|
|
error
|
|
|
|
} {
|
|
|
|
valuesChan := make(chan struct {
|
|
|
|
model.SamplePair
|
|
|
|
error
|
|
|
|
})
|
2015-04-14 04:46:38 -07:00
|
|
|
go func() {
|
|
|
|
for i := 0; i < it.len; i++ {
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
t, err := it.timestampAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
valuesChan <- struct {
|
|
|
|
model.SamplePair
|
|
|
|
error
|
|
|
|
}{ZeroSamplePair, err}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
v, err := it.sampleValueAtIndex(i)
|
|
|
|
if err != nil {
|
|
|
|
valuesChan <- struct {
|
|
|
|
model.SamplePair
|
|
|
|
error
|
|
|
|
}{ZeroSamplePair, err}
|
|
|
|
break
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
valuesChan <- struct {
|
|
|
|
model.SamplePair
|
|
|
|
error
|
|
|
|
}{model.SamplePair{Timestamp: t, Value: v}, nil}
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
close(valuesChan)
|
|
|
|
}()
|
|
|
|
return valuesChan
|
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
// timestampAtIndex implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) timestampAtIndex(idx int) (model.Time, error) {
|
2015-04-14 04:46:38 -07:00
|
|
|
offset := deltaHeaderBytes + idx*int(it.tBytes+it.vBytes)
|
|
|
|
|
|
|
|
switch it.tBytes {
|
|
|
|
case d1:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseT + model.Time(uint8(it.c[offset])), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d2:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseT + model.Time(binary.LittleEndian.Uint16(it.c[offset:])), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d4:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseT + model.Time(binary.LittleEndian.Uint32(it.c[offset:])), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d8:
|
|
|
|
// Take absolute value for d8.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return model.Time(binary.LittleEndian.Uint64(it.c[offset:])), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return 0, fmt.Errorf("invalid number of bytes for time delta: %d", it.tBytes)
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
// lastTimestamp implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) lastTimestamp() (model.Time, error) {
|
2015-05-20 10:13:06 -07:00
|
|
|
return it.timestampAtIndex(it.len - 1)
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
// sampleValueAtIndex implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) sampleValueAtIndex(idx int) (model.SampleValue, error) {
|
2015-04-14 04:46:38 -07:00
|
|
|
offset := deltaHeaderBytes + idx*int(it.tBytes+it.vBytes) + int(it.tBytes)
|
|
|
|
|
|
|
|
if it.isInt {
|
|
|
|
switch it.vBytes {
|
|
|
|
case d0:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseV, nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d1:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseV + model.SampleValue(int8(it.c[offset])), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d2:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseV + model.SampleValue(int16(binary.LittleEndian.Uint16(it.c[offset:]))), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d4:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseV + model.SampleValue(int32(binary.LittleEndian.Uint32(it.c[offset:]))), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
// No d8 for ints.
|
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return 0, fmt.Errorf("invalid number of bytes for integer delta: %d", it.vBytes)
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch it.vBytes {
|
|
|
|
case d4:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return it.baseV + model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(it.c[offset:]))), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
case d8:
|
|
|
|
// Take absolute value for d8.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(it.c[offset:]))), nil
|
2015-04-14 04:46:38 -07:00
|
|
|
default:
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
return 0, fmt.Errorf("invalid number of bytes for floating point delta: %d", it.vBytes)
|
2015-04-14 04:46:38 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-20 10:13:06 -07:00
|
|
|
// lastSampleValue implements chunkIterator.
|
Handle errors caused by data corruption more gracefully
This requires all the panic calls upon unexpected data to be converted
into errors returned. This pollute the function signatures quite
lot. Well, this is Go...
The ideas behind this are the following:
- panic only if it's a programming error. Data corruptions happen, and
they are not programming errors.
- If we detect a data corruption, we "quarantine" the series,
essentially removing it from the database and putting its data into
a separate directory for forensics.
- Failure during writing to a series file is not considered corruption
automatically. It will call setDirty, though, so that a
crashrecovery upon the next restart will commence and check for
that.
- Series quarantining and setDirty calls are logged and counted in
metrics, but are hidden from the user of the interfaces in
interface.go, whith the notable exception of Append(). The reasoning
is that we treat corruption by removing the corrupted series, i.e. a
query for it will return no results on its next call anyway, so
return no results right now. In the case of Append(), we want to
tell the user that no data has been appended, though.
Minor side effects:
- Now consistently using filepath.* instead of path.*.
- Introduced structured logging where I touched it. This makes things
less consistent, but a complete change to structured logging would
be out of scope for this PR.
2016-02-25 03:23:42 -08:00
|
|
|
func (it *deltaEncodedChunkIterator) lastSampleValue() (model.SampleValue, error) {
|
2015-05-20 10:13:06 -07:00
|
|
|
return it.sampleValueAtIndex(it.len - 1)
|
2014-06-06 02:55:53 -07:00
|
|
|
}
|