mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-10 13:27:27 -08:00
c1b669bf9b
* Introduce out-of-order TSDB support This implementation is based on this design doc: https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing This commit adds support to accept out-of-order ("OOO") sample into the TSDB up to a configurable time allowance. If OOO is enabled, overlapping querying are automatically enabled. Most of the additions have been borrowed from https://github.com/grafana/mimir-prometheus/ Here is the list ist of the original commits cherry picked from mimir-prometheus into this branch: -4b2198d7ec
-2836e5513f
-00b379c3a5
-ff0dc75758
-a632c73352
-c6f3d4ab33
-5e8406a1d4
-abde1e0ba1
-e70e769889
-df59320886
Co-authored-by: Jesus Vazquez <jesus.vazquez@grafana.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * gofumpt files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add license header to missing files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO tests due to existing chunk disk mapper implementation Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix truncate int overflow Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add Sync method to the WAL and update tests Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * remove useless sync Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Update minOOOTime after truncating Head * Update minOOOTime after truncating Head Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Add a unit test Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Load OutOfOrderTimeWindow only once per appender Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO Head LabelValues and PostingsForMatchers Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix replay of OOO mmap chunks Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Remove unnecessary err check Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Prevent panic with ApplyConfig Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run OOO compaction after restart if there is OOO data from WBL Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Apply Bartek's suggestions Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Refactor OOO compaction Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address comments and TODOs - Added a comment explaining why we need the allow overlapping compaction toggle - Clarified TSDBConfig OutOfOrderTimeWindow doc - Added an owner to all the TODOs in the code Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run go format Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix remaining review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Change wbl reference when truncating ooo in TestHeadMinOOOTimeUpdate Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix TestWBLAndMmapReplay test failure on windows Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address most of the feedback Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Refactor the block meta for out of order Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix windows error Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
160 lines
4.2 KiB
Go
160 lines
4.2 KiB
Go
// Copyright 2022 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tsdb
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
"github.com/prometheus/prometheus/tsdb/tombstones"
|
|
)
|
|
|
|
// OOOChunk maintains samples in time-ascending order.
|
|
// Inserts for timestamps already seen, are dropped.
|
|
// Samples are stored uncompressed to allow easy sorting.
|
|
// Perhaps we can be more efficient later.
|
|
type OOOChunk struct {
|
|
samples []sample
|
|
}
|
|
|
|
func NewOOOChunk() *OOOChunk {
|
|
return &OOOChunk{samples: make([]sample, 0, 4)}
|
|
}
|
|
|
|
// Insert inserts the sample such that order is maintained.
|
|
// Returns false if insert was not possible due to the same timestamp already existing.
|
|
func (o *OOOChunk) Insert(t int64, v float64) bool {
|
|
// Find index of sample we should replace.
|
|
i := sort.Search(len(o.samples), func(i int) bool { return o.samples[i].t >= t })
|
|
|
|
if i >= len(o.samples) {
|
|
// none found. append it at the end
|
|
o.samples = append(o.samples, sample{t, v})
|
|
return true
|
|
}
|
|
|
|
if o.samples[i].t == t {
|
|
return false
|
|
}
|
|
|
|
// Expand length by 1 to make room. use a zero sample, we will overwrite it anyway.
|
|
o.samples = append(o.samples, sample{})
|
|
copy(o.samples[i+1:], o.samples[i:])
|
|
o.samples[i] = sample{t, v}
|
|
|
|
return true
|
|
}
|
|
|
|
func (o *OOOChunk) NumSamples() int {
|
|
return len(o.samples)
|
|
}
|
|
|
|
func (o *OOOChunk) ToXOR() (*chunkenc.XORChunk, error) {
|
|
x := chunkenc.NewXORChunk()
|
|
app, err := x.Appender()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, s := range o.samples {
|
|
app.Append(s.t, s.v)
|
|
}
|
|
return x, nil
|
|
}
|
|
|
|
func (o *OOOChunk) ToXORBetweenTimestamps(mint, maxt int64) (*chunkenc.XORChunk, error) {
|
|
x := chunkenc.NewXORChunk()
|
|
app, err := x.Appender()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, s := range o.samples {
|
|
if s.t < mint {
|
|
continue
|
|
}
|
|
if s.t > maxt {
|
|
break
|
|
}
|
|
app.Append(s.t, s.v)
|
|
}
|
|
return x, nil
|
|
}
|
|
|
|
var _ BlockReader = &OOORangeHead{}
|
|
|
|
// OOORangeHead allows querying Head out of order samples via BlockReader
|
|
// interface implementation.
|
|
type OOORangeHead struct {
|
|
head *Head
|
|
// mint and maxt are tracked because when a query is handled we only want
|
|
// the timerange of the query and having preexisting pointers to the first
|
|
// and last timestamp help with that.
|
|
mint, maxt int64
|
|
}
|
|
|
|
func NewOOORangeHead(head *Head, mint, maxt int64) *OOORangeHead {
|
|
return &OOORangeHead{
|
|
head: head,
|
|
mint: mint,
|
|
maxt: maxt,
|
|
}
|
|
}
|
|
|
|
func (oh *OOORangeHead) Index() (IndexReader, error) {
|
|
return NewOOOHeadIndexReader(oh.head, oh.mint, oh.maxt), nil
|
|
}
|
|
|
|
func (oh *OOORangeHead) Chunks() (ChunkReader, error) {
|
|
return NewOOOHeadChunkReader(oh.head, oh.mint, oh.maxt), nil
|
|
}
|
|
|
|
func (oh *OOORangeHead) Tombstones() (tombstones.Reader, error) {
|
|
// As stated in the design doc https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing
|
|
// Tombstones are not supported for out of order metrics.
|
|
return tombstones.NewMemTombstones(), nil
|
|
}
|
|
|
|
func (oh *OOORangeHead) Meta() BlockMeta {
|
|
var id [16]byte
|
|
copy(id[:], "____ooo_head____")
|
|
return BlockMeta{
|
|
MinTime: oh.mint,
|
|
MaxTime: oh.maxt,
|
|
ULID: id,
|
|
Stats: BlockStats{
|
|
NumSeries: oh.head.NumSeries(),
|
|
},
|
|
}
|
|
}
|
|
|
|
// Size returns the size taken by the Head block.
|
|
func (oh *OOORangeHead) Size() int64 {
|
|
return oh.head.Size()
|
|
}
|
|
|
|
// String returns an human readable representation of the out of order range
|
|
// head. It's important to keep this function in order to avoid the struct dump
|
|
// when the head is stringified in errors or logs.
|
|
func (oh *OOORangeHead) String() string {
|
|
return fmt.Sprintf("ooo range head (mint: %d, maxt: %d)", oh.MinTime(), oh.MaxTime())
|
|
}
|
|
|
|
func (oh *OOORangeHead) MinTime() int64 {
|
|
return oh.mint
|
|
}
|
|
|
|
func (oh *OOORangeHead) MaxTime() int64 {
|
|
return oh.maxt
|
|
}
|