mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-17 19:14:04 -08:00
c1b669bf9b
* Introduce out-of-order TSDB support This implementation is based on this design doc: https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing This commit adds support to accept out-of-order ("OOO") sample into the TSDB up to a configurable time allowance. If OOO is enabled, overlapping querying are automatically enabled. Most of the additions have been borrowed from https://github.com/grafana/mimir-prometheus/ Here is the list ist of the original commits cherry picked from mimir-prometheus into this branch: -4b2198d7ec
-2836e5513f
-00b379c3a5
-ff0dc75758
-a632c73352
-c6f3d4ab33
-5e8406a1d4
-abde1e0ba1
-e70e769889
-df59320886
Co-authored-by: Jesus Vazquez <jesus.vazquez@grafana.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * gofumpt files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add license header to missing files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO tests due to existing chunk disk mapper implementation Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix truncate int overflow Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add Sync method to the WAL and update tests Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * remove useless sync Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Update minOOOTime after truncating Head * Update minOOOTime after truncating Head Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Add a unit test Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Load OutOfOrderTimeWindow only once per appender Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO Head LabelValues and PostingsForMatchers Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix replay of OOO mmap chunks Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Remove unnecessary err check Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Prevent panic with ApplyConfig Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run OOO compaction after restart if there is OOO data from WBL Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Apply Bartek's suggestions Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Refactor OOO compaction Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address comments and TODOs - Added a comment explaining why we need the allow overlapping compaction toggle - Clarified TSDBConfig OutOfOrderTimeWindow doc - Added an owner to all the TODOs in the code Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run go format Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix remaining review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Change wbl reference when truncating ooo in TestHeadMinOOOTimeUpdate Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix TestWBLAndMmapReplay test failure on windows Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address most of the feedback Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Refactor the block meta for out of order Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix windows error Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
211 lines
5.7 KiB
Go
211 lines
5.7 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package chunkenc
|
|
|
|
import (
|
|
"math"
|
|
"sync"
|
|
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
// Encoding is the identifier for a chunk encoding.
|
|
type Encoding uint8
|
|
|
|
func (e Encoding) String() string {
|
|
switch e {
|
|
case EncNone:
|
|
return "none"
|
|
case EncXOR:
|
|
return "XOR"
|
|
}
|
|
return "<unknown>"
|
|
}
|
|
|
|
// The different available chunk encodings.
|
|
const (
|
|
EncNone Encoding = iota
|
|
EncXOR
|
|
)
|
|
|
|
// Chunk encodings for out-of-order chunks.
|
|
// These encodings must be only used by the Head block for its internal bookkeeping.
|
|
const (
|
|
OutOfOrderMask = 0b10000000
|
|
EncOOOXOR = EncXOR | OutOfOrderMask
|
|
)
|
|
|
|
func IsOutOfOrderChunk(e Encoding) bool {
|
|
return (e & OutOfOrderMask) != 0
|
|
}
|
|
|
|
func IsValidEncoding(e Encoding) bool {
|
|
return e == EncXOR || e == EncOOOXOR
|
|
}
|
|
|
|
// Chunk holds a sequence of sample pairs that can be iterated over and appended to.
|
|
type Chunk interface {
|
|
// Bytes returns the underlying byte slice of the chunk.
|
|
Bytes() []byte
|
|
|
|
// Encoding returns the encoding type of the chunk.
|
|
Encoding() Encoding
|
|
|
|
// Appender returns an appender to append samples to the chunk.
|
|
Appender() (Appender, error)
|
|
|
|
// The iterator passed as argument is for re-use.
|
|
// Depending on implementation, the iterator can
|
|
// be re-used or a new iterator can be allocated.
|
|
Iterator(Iterator) Iterator
|
|
|
|
// NumSamples returns the number of samples in the chunk.
|
|
NumSamples() int
|
|
|
|
// Compact is called whenever a chunk is expected to be complete (no more
|
|
// samples appended) and the underlying implementation can eventually
|
|
// optimize the chunk.
|
|
// There's no strong guarantee that no samples will be appended once
|
|
// Compact() is called. Implementing this function is optional.
|
|
Compact()
|
|
}
|
|
|
|
// Appender adds sample pairs to a chunk.
|
|
type Appender interface {
|
|
Append(int64, float64)
|
|
}
|
|
|
|
// Iterator is a simple iterator that can only get the next value.
|
|
// Iterator iterates over the samples of a time series, in timestamp-increasing order.
|
|
type Iterator interface {
|
|
// Next advances the iterator by one.
|
|
Next() bool
|
|
// Seek advances the iterator forward to the first sample with the timestamp equal or greater than t.
|
|
// If current sample found by previous `Next` or `Seek` operation already has this property, Seek has no effect.
|
|
// Seek returns true, if such sample exists, false otherwise.
|
|
// Iterator is exhausted when the Seek returns false.
|
|
Seek(t int64) bool
|
|
// At returns the current timestamp/value pair.
|
|
// Before the iterator has advanced At behaviour is unspecified.
|
|
At() (int64, float64)
|
|
// Err returns the current error. It should be used only after iterator is
|
|
// exhausted, that is `Next` or `Seek` returns false.
|
|
Err() error
|
|
}
|
|
|
|
// MockSeriesIterator returns an iterator for a mock series with custom timeStamps and values.
|
|
func MockSeriesIterator(timestamps []int64, values []float64) Iterator {
|
|
return &mockSeriesIterator{
|
|
timeStamps: timestamps,
|
|
values: values,
|
|
currIndex: 0,
|
|
}
|
|
}
|
|
|
|
type mockSeriesIterator struct {
|
|
timeStamps []int64
|
|
values []float64
|
|
currIndex int
|
|
}
|
|
|
|
func (it *mockSeriesIterator) Seek(int64) bool { return false }
|
|
func (it *mockSeriesIterator) At() (int64, float64) {
|
|
return it.timeStamps[it.currIndex], it.values[it.currIndex]
|
|
}
|
|
|
|
func (it *mockSeriesIterator) Next() bool {
|
|
if it.currIndex < len(it.timeStamps)-1 {
|
|
it.currIndex++
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
func (it *mockSeriesIterator) Err() error { return nil }
|
|
|
|
// NewNopIterator returns a new chunk iterator that does not hold any data.
|
|
func NewNopIterator() Iterator {
|
|
return nopIterator{}
|
|
}
|
|
|
|
type nopIterator struct{}
|
|
|
|
func (nopIterator) Seek(int64) bool { return false }
|
|
func (nopIterator) At() (int64, float64) { return math.MinInt64, 0 }
|
|
func (nopIterator) Next() bool { return false }
|
|
func (nopIterator) Err() error { return nil }
|
|
|
|
// Pool is used to create and reuse chunk references to avoid allocations.
|
|
type Pool interface {
|
|
Put(Chunk) error
|
|
Get(e Encoding, b []byte) (Chunk, error)
|
|
}
|
|
|
|
// pool is a memory pool of chunk objects.
|
|
type pool struct {
|
|
xor sync.Pool
|
|
}
|
|
|
|
// NewPool returns a new pool.
|
|
func NewPool() Pool {
|
|
return &pool{
|
|
xor: sync.Pool{
|
|
New: func() interface{} {
|
|
return &XORChunk{b: bstream{}}
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
|
|
switch e {
|
|
case EncXOR, EncOOOXOR:
|
|
c := p.xor.Get().(*XORChunk)
|
|
c.b.stream = b
|
|
c.b.count = 0
|
|
return c, nil
|
|
}
|
|
return nil, errors.Errorf("invalid chunk encoding %q", e)
|
|
}
|
|
|
|
func (p *pool) Put(c Chunk) error {
|
|
switch c.Encoding() {
|
|
case EncXOR, EncOOOXOR:
|
|
xc, ok := c.(*XORChunk)
|
|
// This may happen often with wrapped chunks. Nothing we can really do about
|
|
// it but returning an error would cause a lot of allocations again. Thus,
|
|
// we just skip it.
|
|
if !ok {
|
|
return nil
|
|
}
|
|
xc.b.stream = nil
|
|
xc.b.count = 0
|
|
p.xor.Put(c)
|
|
default:
|
|
return errors.Errorf("invalid chunk encoding %q", c.Encoding())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// FromData returns a chunk from a byte slice of chunk data.
|
|
// This is there so that users of the library can easily create chunks from
|
|
// bytes.
|
|
func FromData(e Encoding, d []byte) (Chunk, error) {
|
|
switch e {
|
|
case EncXOR, EncOOOXOR:
|
|
return &XORChunk{b: bstream{count: 0, stream: d}}, nil
|
|
}
|
|
return nil, errors.Errorf("invalid chunk encoding %q", e)
|
|
}
|