mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-11 08:04:04 -08:00
c13b1ecfe9
This finally extracts all the common code of the two chunk iterators into one. Any future chunk encodings with fast access by index can use the same iterator by simply providing an indexAccessor. Other future chunk encodings without fast index access (like Gorilla-style) can still implement the chunkIterator interface as usual.
445 lines
15 KiB
Go
445 lines
15 KiB
Go
// Copyright 2014 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package local
|
|
|
|
import (
|
|
"container/list"
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/prometheus/common/model"
|
|
|
|
"github.com/prometheus/prometheus/storage/metric"
|
|
)
|
|
|
|
// The DefaultChunkEncoding can be changed via a flag.
|
|
var DefaultChunkEncoding = doubleDelta
|
|
|
|
type chunkEncoding byte
|
|
|
|
// String implements flag.Value.
|
|
func (ce chunkEncoding) String() string {
|
|
return fmt.Sprintf("%d", ce)
|
|
}
|
|
|
|
// Set implements flag.Value.
|
|
func (ce *chunkEncoding) Set(s string) error {
|
|
switch s {
|
|
case "0":
|
|
*ce = delta
|
|
case "1":
|
|
*ce = doubleDelta
|
|
default:
|
|
return fmt.Errorf("invalid chunk encoding: %s", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
const (
|
|
delta chunkEncoding = iota
|
|
doubleDelta
|
|
)
|
|
|
|
// chunkDesc contains meta-data for a chunk. Pay special attention to the
|
|
// documented requirements for calling its methods concurrently (WRT pinning and
|
|
// locking). The doc comments spell out the requirements for each method, but
|
|
// here is an overview and general explanation:
|
|
//
|
|
// Everything that changes the pinning of the underlying chunk or deals with its
|
|
// eviction is protected by a mutex. This affects the following methods: pin,
|
|
// unpin, refCount, isEvicted, maybeEvict. These methods can be called at any
|
|
// time without further prerequisites.
|
|
//
|
|
// Another group of methods acts on (or sets) the underlying chunk. These
|
|
// methods involve no locking. They may only be called if the caller has pinned
|
|
// the chunk (to guarantee the chunk is not evicted concurrently). Also, the
|
|
// caller must make sure nobody else will call these methods concurrently,
|
|
// either by holding the sole reference to the chunkDesc (usually during loading
|
|
// or creation) or by locking the fingerprint of the series the chunkDesc
|
|
// belongs to. The affected methods are: add, maybePopulateLastTime, setChunk.
|
|
//
|
|
// Finally, there are the special cases firstTime and lastTime. lastTime requires
|
|
// to have locked the fingerprint of the series but the chunk does not need to
|
|
// be pinned. That's because the chunkLastTime field in chunkDesc gets populated
|
|
// upon completion of the chunk (when it is still pinned, and which happens
|
|
// while the series's fingerprint is locked). Once that has happened, calling
|
|
// lastTime does not require the chunk to be loaded anymore. Before that has
|
|
// happened, the chunk is pinned anyway. The chunkFirstTime field in chunkDesc
|
|
// is populated upon creation of a chunkDesc, so it is alway safe to call
|
|
// firstTime. The firstTime method is arguably not needed and only there for
|
|
// consistency with lastTime.
|
|
type chunkDesc struct {
|
|
sync.Mutex // Protects pinning.
|
|
c chunk // nil if chunk is evicted.
|
|
rCnt int
|
|
chunkFirstTime model.Time // Populated at creation. Immutable.
|
|
chunkLastTime model.Time // Populated on closing of the chunk, model.Earliest if unset.
|
|
|
|
// evictListElement is nil if the chunk is not in the evict list.
|
|
// evictListElement is _not_ protected by the chunkDesc mutex.
|
|
// It must only be touched by the evict list handler in memorySeriesStorage.
|
|
evictListElement *list.Element
|
|
}
|
|
|
|
// newChunkDesc creates a new chunkDesc pointing to the provided chunk. The
|
|
// provided chunk is assumed to be not persisted yet. Therefore, the refCount of
|
|
// the new chunkDesc is 1 (preventing eviction prior to persisting).
|
|
func newChunkDesc(c chunk, firstTime model.Time) *chunkDesc {
|
|
chunkOps.WithLabelValues(createAndPin).Inc()
|
|
atomic.AddInt64(&numMemChunks, 1)
|
|
numMemChunkDescs.Inc()
|
|
return &chunkDesc{
|
|
c: c,
|
|
rCnt: 1,
|
|
chunkFirstTime: firstTime,
|
|
chunkLastTime: model.Earliest,
|
|
}
|
|
}
|
|
|
|
// add adds a sample pair to the underlying chunk. For safe concurrent access,
|
|
// The chunk must be pinned, and the caller must have locked the fingerprint of
|
|
// the series.
|
|
func (cd *chunkDesc) add(s model.SamplePair) ([]chunk, error) {
|
|
return cd.c.add(s)
|
|
}
|
|
|
|
// pin increments the refCount by one. Upon increment from 0 to 1, this
|
|
// chunkDesc is removed from the evict list. To enable the latter, the
|
|
// evictRequests channel has to be provided. This method can be called
|
|
// concurrently at any time.
|
|
func (cd *chunkDesc) pin(evictRequests chan<- evictRequest) {
|
|
cd.Lock()
|
|
defer cd.Unlock()
|
|
|
|
if cd.rCnt == 0 {
|
|
// Remove ourselves from the evict list.
|
|
evictRequests <- evictRequest{cd, false}
|
|
}
|
|
cd.rCnt++
|
|
}
|
|
|
|
// unpin decrements the refCount by one. Upon decrement from 1 to 0, this
|
|
// chunkDesc is added to the evict list. To enable the latter, the evictRequests
|
|
// channel has to be provided. This method can be called concurrently at any
|
|
// time.
|
|
func (cd *chunkDesc) unpin(evictRequests chan<- evictRequest) {
|
|
cd.Lock()
|
|
defer cd.Unlock()
|
|
|
|
if cd.rCnt == 0 {
|
|
panic("cannot unpin already unpinned chunk")
|
|
}
|
|
cd.rCnt--
|
|
if cd.rCnt == 0 {
|
|
// Add ourselves to the back of the evict list.
|
|
evictRequests <- evictRequest{cd, true}
|
|
}
|
|
}
|
|
|
|
// refCount returns the number of pins. This method can be called concurrently
|
|
// at any time.
|
|
func (cd *chunkDesc) refCount() int {
|
|
cd.Lock()
|
|
defer cd.Unlock()
|
|
|
|
return cd.rCnt
|
|
}
|
|
|
|
// firstTime returns the timestamp of the first sample in the chunk. This method
|
|
// can be called concurrently at any time. It only returns the immutable
|
|
// cd.chunkFirstTime without any locking. Arguably, this method is
|
|
// useless. However, it provides consistency with the lastTime method.
|
|
func (cd *chunkDesc) firstTime() model.Time {
|
|
return cd.chunkFirstTime
|
|
}
|
|
|
|
// lastTime returns the timestamp of the last sample in the chunk. For safe
|
|
// concurrent access, this method requires the fingerprint of the time series to
|
|
// be locked.
|
|
func (cd *chunkDesc) lastTime() (model.Time, error) {
|
|
if cd.chunkLastTime != model.Earliest || cd.c == nil {
|
|
return cd.chunkLastTime, nil
|
|
}
|
|
return cd.c.newIterator().lastTimestamp()
|
|
}
|
|
|
|
// maybePopulateLastTime populates the chunkLastTime from the underlying chunk
|
|
// if it has not yet happened. Call this method directly after having added the
|
|
// last sample to a chunk or after closing a head chunk due to age. For safe
|
|
// concurrent access, the chunk must be pinned, and the caller must have locked
|
|
// the fingerprint of the series.
|
|
func (cd *chunkDesc) maybePopulateLastTime() error {
|
|
if cd.chunkLastTime == model.Earliest && cd.c != nil {
|
|
t, err := cd.c.newIterator().lastTimestamp()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cd.chunkLastTime = t
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// isEvicted returns whether the chunk is evicted. For safe concurrent access,
|
|
// the caller must have locked the fingerprint of the series.
|
|
func (cd *chunkDesc) isEvicted() bool {
|
|
// Locking required here because we do not want the caller to force
|
|
// pinning the chunk first, so it could be evicted while this method is
|
|
// called.
|
|
cd.Lock()
|
|
defer cd.Unlock()
|
|
|
|
return cd.c == nil
|
|
}
|
|
|
|
// setChunk sets the underlying chunk. The caller must have locked the
|
|
// fingerprint of the series and must have "pre-pinned" the chunk (i.e. first
|
|
// call pin and then set the chunk).
|
|
func (cd *chunkDesc) setChunk(c chunk) {
|
|
if cd.c != nil {
|
|
panic("chunk already set")
|
|
}
|
|
cd.c = c
|
|
}
|
|
|
|
// maybeEvict evicts the chunk if the refCount is 0. It returns whether the chunk
|
|
// is now evicted, which includes the case that the chunk was evicted even
|
|
// before this method was called. It can be called concurrently at any time.
|
|
func (cd *chunkDesc) maybeEvict() bool {
|
|
cd.Lock()
|
|
defer cd.Unlock()
|
|
|
|
if cd.c == nil {
|
|
return true
|
|
}
|
|
if cd.rCnt != 0 {
|
|
return false
|
|
}
|
|
if cd.chunkLastTime == model.Earliest {
|
|
// This must never happen.
|
|
panic("chunkLastTime not populated for evicted chunk")
|
|
}
|
|
cd.c = nil
|
|
chunkOps.WithLabelValues(evict).Inc()
|
|
atomic.AddInt64(&numMemChunks, -1)
|
|
return true
|
|
}
|
|
|
|
// chunk is the interface for all chunks. Chunks are generally not
|
|
// goroutine-safe.
|
|
type chunk interface {
|
|
// add adds a SamplePair to the chunks, performs any necessary
|
|
// re-encoding, and adds any necessary overflow chunks. It returns the
|
|
// new version of the original chunk, followed by overflow chunks, if
|
|
// any. The first chunk returned might be the same as the original one
|
|
// or a newly allocated version. In any case, take the returned chunk as
|
|
// the relevant one and discard the original chunk.
|
|
add(sample model.SamplePair) ([]chunk, error)
|
|
clone() chunk
|
|
firstTime() model.Time
|
|
newIterator() chunkIterator
|
|
marshal(io.Writer) error
|
|
marshalToBuf([]byte) error
|
|
unmarshal(io.Reader) error
|
|
unmarshalFromBuf([]byte) error
|
|
encoding() chunkEncoding
|
|
}
|
|
|
|
// A chunkIterator enables efficient access to the content of a chunk. It is
|
|
// generally not safe to use a chunkIterator concurrently with or after chunk
|
|
// mutation. The error returned by any of the methods is always the last error
|
|
// encountered by the iterator, i.e. once an error has been encountered, no
|
|
// method will ever return a nil error again. In general, errors signal data
|
|
// corruption in the chunk and require quarantining.
|
|
type chunkIterator interface {
|
|
// Gets the last timestamp in the chunk.
|
|
lastTimestamp() (model.Time, error)
|
|
// Gets the value that is closest before the given time. In case a value
|
|
// exists at precisely the given time, that value is returned. If no
|
|
// applicable value exists, ZeroSamplePair is returned.
|
|
valueAtOrBeforeTime(model.Time) (model.SamplePair, error)
|
|
// Gets all values contained within a given interval.
|
|
rangeValues(metric.Interval) ([]model.SamplePair, error)
|
|
// Whether a given timestamp is contained between first and last value
|
|
// in the chunk.
|
|
contains(model.Time) (bool, error)
|
|
// scan, value, and err implement a bufio.Scanner-like interface. The
|
|
// scan method advances the iterator to the next value in the chunk and
|
|
// returns true if that worked. In that case, the value method will
|
|
// return the sample pair the iterator has advanced to. If the scan
|
|
// method returns false, either an error has occured or the end of the
|
|
// chunk has been reached. In the former case, the err method will
|
|
// return the error. In the latter case, the err method will return nil.
|
|
// Upon creation, the iterator is at position "minus one". After the
|
|
// first scan call, value will return the 1st value in the
|
|
// chunk. valueAtOrBeforeTime and rangeValues all modify the iterator
|
|
// position, too. They behave as if their return values were retrieved
|
|
// after a scan call, i.e. calling the value or err methods after a call
|
|
// to those methods will retrieve the same return value again (or the
|
|
// last value in the range in case of rangeValues), and subsequent scan
|
|
// calls will advance the iterator from there.
|
|
scan() bool
|
|
value() model.SamplePair
|
|
err() error
|
|
}
|
|
|
|
func transcodeAndAdd(dst chunk, src chunk, s model.SamplePair) ([]chunk, error) {
|
|
chunkOps.WithLabelValues(transcode).Inc()
|
|
|
|
var (
|
|
head = dst
|
|
body, newChunks []chunk
|
|
err error
|
|
)
|
|
|
|
it := src.newIterator()
|
|
for it.scan() {
|
|
if newChunks, err = head.add(it.value()); err != nil {
|
|
return nil, err
|
|
}
|
|
body = append(body, newChunks[:len(newChunks)-1]...)
|
|
head = newChunks[len(newChunks)-1]
|
|
}
|
|
if it.err() != nil {
|
|
return nil, it.err()
|
|
}
|
|
|
|
if newChunks, err = head.add(s); err != nil {
|
|
return nil, err
|
|
}
|
|
return append(body, newChunks...), nil
|
|
}
|
|
|
|
// newChunk creates a new chunk according to the encoding set by the
|
|
// defaultChunkEncoding flag.
|
|
func newChunk() chunk {
|
|
chunk, err := newChunkForEncoding(DefaultChunkEncoding)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return chunk
|
|
}
|
|
|
|
func newChunkForEncoding(encoding chunkEncoding) (chunk, error) {
|
|
switch encoding {
|
|
case delta:
|
|
return newDeltaEncodedChunk(d1, d0, true, chunkLen), nil
|
|
case doubleDelta:
|
|
return newDoubleDeltaEncodedChunk(d1, d0, true, chunkLen), nil
|
|
default:
|
|
return nil, fmt.Errorf("unknown chunk encoding: %v", encoding)
|
|
}
|
|
}
|
|
|
|
// indexAccessor allows accesses to samples by index.
|
|
type indexAccessor interface {
|
|
timestampAtIndex(int) model.Time
|
|
sampleValueAtIndex(int) model.SampleValue
|
|
err() error
|
|
}
|
|
|
|
// indexAccessingChunkIterator is a chunk iterator for chunks for which an
|
|
// indexAccessor implementation exists.
|
|
type indexAccessingChunkIterator struct {
|
|
len int
|
|
pos int
|
|
lastValue model.SamplePair
|
|
acc indexAccessor
|
|
}
|
|
|
|
func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingChunkIterator {
|
|
return &indexAccessingChunkIterator{
|
|
len: len,
|
|
pos: -1,
|
|
lastValue: ZeroSamplePair,
|
|
acc: acc,
|
|
}
|
|
}
|
|
|
|
// lastTimestamp implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) lastTimestamp() (model.Time, error) {
|
|
return it.acc.timestampAtIndex(it.len - 1), it.acc.err()
|
|
}
|
|
|
|
// valueAtOrBeforeTime implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) valueAtOrBeforeTime(t model.Time) (model.SamplePair, error) {
|
|
i := sort.Search(it.len, func(i int) bool {
|
|
return it.acc.timestampAtIndex(i).After(t)
|
|
})
|
|
if i == 0 || it.acc.err() != nil {
|
|
return ZeroSamplePair, it.acc.err()
|
|
}
|
|
it.pos = i - 1
|
|
it.lastValue = model.SamplePair{
|
|
Timestamp: it.acc.timestampAtIndex(i - 1),
|
|
Value: it.acc.sampleValueAtIndex(i - 1),
|
|
}
|
|
return it.lastValue, it.acc.err()
|
|
}
|
|
|
|
// rangeValues implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) rangeValues(in metric.Interval) ([]model.SamplePair, error) {
|
|
oldest := sort.Search(it.len, func(i int) bool {
|
|
return !it.acc.timestampAtIndex(i).Before(in.OldestInclusive)
|
|
})
|
|
newest := sort.Search(it.len, func(i int) bool {
|
|
return it.acc.timestampAtIndex(i).After(in.NewestInclusive)
|
|
})
|
|
if oldest == it.len || it.acc.err() != nil {
|
|
return nil, it.acc.err()
|
|
}
|
|
|
|
result := make([]model.SamplePair, 0, newest-oldest)
|
|
for i := oldest; i < newest; i++ {
|
|
it.pos = i
|
|
it.lastValue = model.SamplePair{
|
|
Timestamp: it.acc.timestampAtIndex(i),
|
|
Value: it.acc.sampleValueAtIndex(i),
|
|
}
|
|
result = append(result, it.lastValue)
|
|
}
|
|
return result, it.acc.err()
|
|
}
|
|
|
|
// contains implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) contains(t model.Time) (bool, error) {
|
|
return !t.Before(it.acc.timestampAtIndex(0)) &&
|
|
!t.After(it.acc.timestampAtIndex(it.len-1)), it.acc.err()
|
|
}
|
|
|
|
// scan implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) scan() bool {
|
|
it.pos++
|
|
if it.pos >= it.len {
|
|
return false
|
|
}
|
|
it.lastValue = model.SamplePair{
|
|
Timestamp: it.acc.timestampAtIndex(it.pos),
|
|
Value: it.acc.sampleValueAtIndex(it.pos),
|
|
}
|
|
return it.acc.err() == nil
|
|
}
|
|
|
|
// value implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) value() model.SamplePair {
|
|
return it.lastValue
|
|
}
|
|
|
|
// err implements chunkIterator.
|
|
func (it *indexAccessingChunkIterator) err() error {
|
|
return it.acc.err()
|
|
}
|