mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-10 13:27:27 -08:00
c1b669bf9b
* Introduce out-of-order TSDB support This implementation is based on this design doc: https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing This commit adds support to accept out-of-order ("OOO") sample into the TSDB up to a configurable time allowance. If OOO is enabled, overlapping querying are automatically enabled. Most of the additions have been borrowed from https://github.com/grafana/mimir-prometheus/ Here is the list ist of the original commits cherry picked from mimir-prometheus into this branch: -4b2198d7ec
-2836e5513f
-00b379c3a5
-ff0dc75758
-a632c73352
-c6f3d4ab33
-5e8406a1d4
-abde1e0ba1
-e70e769889
-df59320886
Co-authored-by: Jesus Vazquez <jesus.vazquez@grafana.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * gofumpt files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add license header to missing files Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO tests due to existing chunk disk mapper implementation Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix truncate int overflow Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Add Sync method to the WAL and update tests Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * remove useless sync Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Update minOOOTime after truncating Head * Update minOOOTime after truncating Head Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Add a unit test Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Load OutOfOrderTimeWindow only once per appender Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix OOO Head LabelValues and PostingsForMatchers Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix replay of OOO mmap chunks Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Remove unnecessary err check Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Prevent panic with ApplyConfig Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run OOO compaction after restart if there is OOO data from WBL Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Apply Bartek's suggestions Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Refactor OOO compaction Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address comments and TODOs - Added a comment explaining why we need the allow overlapping compaction toggle - Clarified TSDBConfig OutOfOrderTimeWindow doc - Added an owner to all the TODOs in the code Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Run go format Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix remaining review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Change wbl reference when truncating ooo in TestHeadMinOOOTimeUpdate Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> * Fix TestWBLAndMmapReplay test failure on windows Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Address most of the feedback Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Refactor the block meta for out of order Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix windows error Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com> Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Dieter Plaetinck <dieter@grafana.com> Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
122 lines
3.8 KiB
Go
122 lines
3.8 KiB
Go
// Copyright 2020 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tsdb
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"os"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/oklog/ulid"
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/prometheus/prometheus/model/timestamp"
|
|
"github.com/prometheus/prometheus/storage"
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
)
|
|
|
|
// BlockWriter is a block writer that allows appending and flushing series to disk.
|
|
type BlockWriter struct {
|
|
logger log.Logger
|
|
destinationDir string
|
|
|
|
head *Head
|
|
blockSize int64 // in ms
|
|
chunkDir string
|
|
}
|
|
|
|
// ErrNoSeriesAppended is returned if the series count is zero while flushing blocks.
|
|
var ErrNoSeriesAppended = errors.New("no series appended, aborting")
|
|
|
|
// NewBlockWriter create a new block writer.
|
|
//
|
|
// The returned writer accumulates all the series in the Head block until `Flush` is called.
|
|
//
|
|
// Note that the writer will not check if the target directory exists or
|
|
// contains anything at all. It is the caller's responsibility to
|
|
// ensure that the resulting blocks do not overlap etc.
|
|
// Writer ensures the block flush is atomic (via rename).
|
|
func NewBlockWriter(logger log.Logger, dir string, blockSize int64) (*BlockWriter, error) {
|
|
w := &BlockWriter{
|
|
logger: logger,
|
|
destinationDir: dir,
|
|
blockSize: blockSize,
|
|
}
|
|
if err := w.initHead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return w, nil
|
|
}
|
|
|
|
// initHead creates and initialises a new TSDB head.
|
|
func (w *BlockWriter) initHead() error {
|
|
chunkDir, err := os.MkdirTemp(os.TempDir(), "head")
|
|
if err != nil {
|
|
return errors.Wrap(err, "create temp dir")
|
|
}
|
|
w.chunkDir = chunkDir
|
|
opts := DefaultHeadOptions()
|
|
opts.ChunkRange = w.blockSize
|
|
opts.ChunkDirRoot = w.chunkDir
|
|
h, err := NewHead(nil, w.logger, nil, nil, opts, NewHeadStats())
|
|
if err != nil {
|
|
return errors.Wrap(err, "tsdb.NewHead")
|
|
}
|
|
|
|
w.head = h
|
|
return w.head.Init(math.MinInt64)
|
|
}
|
|
|
|
// Appender returns a new appender on the database.
|
|
// Appender can't be called concurrently. However, the returned Appender can safely be used concurrently.
|
|
func (w *BlockWriter) Appender(ctx context.Context) storage.Appender {
|
|
return w.head.Appender(ctx)
|
|
}
|
|
|
|
// Flush implements the Writer interface. This is where actual block writing
|
|
// happens. After flush completes, no writes can be done.
|
|
func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) {
|
|
mint := w.head.MinTime()
|
|
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
|
|
// Because of this block intervals are always +1 than the total samples it includes.
|
|
maxt := w.head.MaxTime() + 1
|
|
level.Info(w.logger).Log("msg", "flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt))
|
|
|
|
compactor, err := NewLeveledCompactor(ctx,
|
|
nil,
|
|
w.logger,
|
|
[]int64{w.blockSize},
|
|
chunkenc.NewPool(), nil)
|
|
if err != nil {
|
|
return ulid.ULID{}, errors.Wrap(err, "create leveled compactor")
|
|
}
|
|
id, err := compactor.Write(w.destinationDir, w.head, mint, maxt, nil)
|
|
if err != nil {
|
|
return ulid.ULID{}, errors.Wrap(err, "compactor write")
|
|
}
|
|
|
|
return id, nil
|
|
}
|
|
|
|
func (w *BlockWriter) Close() error {
|
|
defer func() {
|
|
if err := os.RemoveAll(w.chunkDir); err != nil {
|
|
level.Error(w.logger).Log("msg", "error in deleting BlockWriter files", "err", err)
|
|
}
|
|
}()
|
|
return w.head.Close()
|
|
}
|