mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-26 22:19:40 -08:00
06d54fcc6c
Simple follow-up to #13620. Modify `tsdb.PostingsForMatchers` to use the optimized tsdb.IndexReader.PostingsForLabelMatching method also for inverse matching. Introduce method `PostingsForAllLabelValues`, to avoid changing the existing method. The performance is much improved for a subset of the cases; there are up to ~60% CPU gains and ~12.5% reduction in memory usage. Remove `TestReader_InversePostingsForMatcherHonorsContextCancel` since `inversePostingsForMatcher` only passes `ctx` to `IndexReader` implementations now. Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
749 lines
22 KiB
Go
749 lines
22 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tsdb
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"slices"
|
|
"sync"
|
|
|
|
"github.com/oklog/ulid"
|
|
|
|
"github.com/prometheus/common/promslog"
|
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/storage"
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
"github.com/prometheus/prometheus/tsdb/chunks"
|
|
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
|
|
"github.com/prometheus/prometheus/tsdb/fileutil"
|
|
"github.com/prometheus/prometheus/tsdb/index"
|
|
"github.com/prometheus/prometheus/tsdb/tombstones"
|
|
)
|
|
|
|
// IndexWriter serializes the index for a block of series data.
|
|
// The methods must be called in the order they are specified in.
|
|
type IndexWriter interface {
|
|
// AddSymbol registers a single symbol.
|
|
// Symbols must be registered in sorted order.
|
|
AddSymbol(sym string) error
|
|
|
|
// AddSeries populates the index writer with a series and its offsets
|
|
// of chunks that the index can reference.
|
|
// Implementations may require series to be insert in strictly increasing order by
|
|
// their labels. The reference numbers are used to resolve entries in postings lists
|
|
// that are added later.
|
|
AddSeries(ref storage.SeriesRef, l labels.Labels, chunks ...chunks.Meta) error
|
|
|
|
// Close writes any finalization and closes the resources associated with
|
|
// the underlying writer.
|
|
Close() error
|
|
}
|
|
|
|
// IndexReader provides reading access of serialized index data.
|
|
type IndexReader interface {
|
|
// Symbols return an iterator over sorted string symbols that may occur in
|
|
// series' labels and indices. It is not safe to use the returned strings
|
|
// beyond the lifetime of the index reader.
|
|
Symbols() index.StringIter
|
|
|
|
// SortedLabelValues returns sorted possible label values.
|
|
SortedLabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error)
|
|
|
|
// LabelValues returns possible label values which may not be sorted.
|
|
LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error)
|
|
|
|
// Postings returns the postings list iterator for the label pairs.
|
|
// The Postings here contain the offsets to the series inside the index.
|
|
// Found IDs are not strictly required to point to a valid Series, e.g.
|
|
// during background garbage collections.
|
|
Postings(ctx context.Context, name string, values ...string) (index.Postings, error)
|
|
|
|
// PostingsForLabelMatching returns a sorted iterator over postings having a label with the given name and a value for which match returns true.
|
|
// If no postings are found having at least one matching label, an empty iterator is returned.
|
|
PostingsForLabelMatching(ctx context.Context, name string, match func(value string) bool) index.Postings
|
|
|
|
// PostingsForAllLabelValues returns a sorted iterator over all postings having a label with the given name.
|
|
// If no postings are found with the label in question, an empty iterator is returned.
|
|
PostingsForAllLabelValues(ctx context.Context, name string) index.Postings
|
|
|
|
// SortedPostings returns a postings list that is reordered to be sorted
|
|
// by the label set of the underlying series.
|
|
SortedPostings(index.Postings) index.Postings
|
|
|
|
// ShardedPostings returns a postings list filtered by the provided shardIndex
|
|
// out of shardCount. For a given posting, its shard MUST be computed hashing
|
|
// the series labels mod shardCount, using a hash function which is consistent over time.
|
|
ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings
|
|
|
|
// Series populates the given builder and chunk metas for the series identified
|
|
// by the reference.
|
|
// Returns storage.ErrNotFound if the ref does not resolve to a known series.
|
|
Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error
|
|
|
|
// LabelNames returns all the unique label names present in the index in sorted order.
|
|
LabelNames(ctx context.Context, matchers ...*labels.Matcher) ([]string, error)
|
|
|
|
// LabelValueFor returns label value for the given label name in the series referred to by ID.
|
|
// If the series couldn't be found or the series doesn't have the requested label a
|
|
// storage.ErrNotFound is returned as error.
|
|
LabelValueFor(ctx context.Context, id storage.SeriesRef, label string) (string, error)
|
|
|
|
// LabelNamesFor returns all the label names for the series referred to by the postings.
|
|
// The names returned are sorted.
|
|
LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error)
|
|
|
|
// Close releases the underlying resources of the reader.
|
|
Close() error
|
|
}
|
|
|
|
// ChunkWriter serializes a time block of chunked series data.
|
|
type ChunkWriter interface {
|
|
// WriteChunks writes several chunks. The Chunk field of the ChunkMetas
|
|
// must be populated.
|
|
// After returning successfully, the Ref fields in the ChunkMetas
|
|
// are set and can be used to retrieve the chunks from the written data.
|
|
WriteChunks(chunks ...chunks.Meta) error
|
|
|
|
// Close writes any required finalization and closes the resources
|
|
// associated with the underlying writer.
|
|
Close() error
|
|
}
|
|
|
|
// ChunkReader provides reading access of serialized time series data.
|
|
type ChunkReader interface {
|
|
// ChunkOrIterable returns the series data for the given chunks.Meta.
|
|
// Either a single chunk will be returned, or an iterable.
|
|
// A single chunk should be returned if chunks.Meta maps to a chunk that
|
|
// already exists and doesn't need modifications.
|
|
// An iterable should be returned if chunks.Meta maps to a subset of the
|
|
// samples in a stored chunk, or multiple chunks. (E.g. OOOHeadChunkReader
|
|
// could return an iterable where multiple histogram samples have counter
|
|
// resets. There can only be one counter reset per histogram chunk so
|
|
// multiple chunks would be created from the iterable in this case.)
|
|
// Only one of chunk or iterable should be returned. In some cases you may
|
|
// always expect a chunk to be returned. You can check that iterable is nil
|
|
// in those cases.
|
|
ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error)
|
|
|
|
// Close releases all underlying resources of the reader.
|
|
Close() error
|
|
}
|
|
|
|
// BlockReader provides reading access to a data block.
|
|
type BlockReader interface {
|
|
// Index returns an IndexReader over the block's data.
|
|
Index() (IndexReader, error)
|
|
|
|
// Chunks returns a ChunkReader over the block's data.
|
|
Chunks() (ChunkReader, error)
|
|
|
|
// Tombstones returns a tombstones.Reader over the block's deleted data.
|
|
Tombstones() (tombstones.Reader, error)
|
|
|
|
// Meta provides meta information about the block reader.
|
|
Meta() BlockMeta
|
|
|
|
// Size returns the number of bytes that the block takes up on disk.
|
|
Size() int64
|
|
}
|
|
|
|
// BlockMeta provides meta information about a block.
|
|
type BlockMeta struct {
|
|
// Unique identifier for the block and its contents. Changes on compaction.
|
|
ULID ulid.ULID `json:"ulid"`
|
|
|
|
// MinTime and MaxTime specify the time range all samples
|
|
// in the block are in.
|
|
MinTime int64 `json:"minTime"`
|
|
MaxTime int64 `json:"maxTime"`
|
|
|
|
// Stats about the contents of the block.
|
|
Stats BlockStats `json:"stats,omitempty"`
|
|
|
|
// Information on compactions the block was created from.
|
|
Compaction BlockMetaCompaction `json:"compaction"`
|
|
|
|
// Version of the index format.
|
|
Version int `json:"version"`
|
|
}
|
|
|
|
// BlockStats contains stats about contents of a block.
|
|
type BlockStats struct {
|
|
NumSamples uint64 `json:"numSamples,omitempty"`
|
|
NumSeries uint64 `json:"numSeries,omitempty"`
|
|
NumChunks uint64 `json:"numChunks,omitempty"`
|
|
NumTombstones uint64 `json:"numTombstones,omitempty"`
|
|
}
|
|
|
|
// BlockDesc describes a block by ULID and time range.
|
|
type BlockDesc struct {
|
|
ULID ulid.ULID `json:"ulid"`
|
|
MinTime int64 `json:"minTime"`
|
|
MaxTime int64 `json:"maxTime"`
|
|
}
|
|
|
|
// BlockMetaCompaction holds information about compactions a block went through.
|
|
type BlockMetaCompaction struct {
|
|
// Maximum number of compaction cycles any source block has
|
|
// gone through.
|
|
Level int `json:"level"`
|
|
// ULIDs of all source head blocks that went into the block.
|
|
Sources []ulid.ULID `json:"sources,omitempty"`
|
|
// Indicates that during compaction it resulted in a block without any samples
|
|
// so it should be deleted on the next reloadBlocks.
|
|
Deletable bool `json:"deletable,omitempty"`
|
|
// Short descriptions of the direct blocks that were used to create
|
|
// this block.
|
|
Parents []BlockDesc `json:"parents,omitempty"`
|
|
Failed bool `json:"failed,omitempty"`
|
|
// Additional information about the compaction, for example, block created from out-of-order chunks.
|
|
Hints []string `json:"hints,omitempty"`
|
|
}
|
|
|
|
func (bm *BlockMetaCompaction) SetOutOfOrder() {
|
|
if bm.containsHint(CompactionHintFromOutOfOrder) {
|
|
return
|
|
}
|
|
bm.Hints = append(bm.Hints, CompactionHintFromOutOfOrder)
|
|
slices.Sort(bm.Hints)
|
|
}
|
|
|
|
func (bm *BlockMetaCompaction) FromOutOfOrder() bool {
|
|
return bm.containsHint(CompactionHintFromOutOfOrder)
|
|
}
|
|
|
|
func (bm *BlockMetaCompaction) containsHint(hint string) bool {
|
|
for _, h := range bm.Hints {
|
|
if h == hint {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
const (
|
|
indexFilename = "index"
|
|
metaFilename = "meta.json"
|
|
metaVersion1 = 1
|
|
|
|
// CompactionHintFromOutOfOrder is a hint noting that the block
|
|
// was created from out-of-order chunks.
|
|
CompactionHintFromOutOfOrder = "from-out-of-order"
|
|
)
|
|
|
|
func chunkDir(dir string) string { return filepath.Join(dir, "chunks") }
|
|
|
|
func readMetaFile(dir string) (*BlockMeta, int64, error) {
|
|
b, err := os.ReadFile(filepath.Join(dir, metaFilename))
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
var m BlockMeta
|
|
|
|
if err := json.Unmarshal(b, &m); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
if m.Version != metaVersion1 {
|
|
return nil, 0, fmt.Errorf("unexpected meta file version %d", m.Version)
|
|
}
|
|
|
|
return &m, int64(len(b)), nil
|
|
}
|
|
|
|
func writeMetaFile(logger *slog.Logger, dir string, meta *BlockMeta) (int64, error) {
|
|
meta.Version = metaVersion1
|
|
|
|
// Make any changes to the file appear atomic.
|
|
path := filepath.Join(dir, metaFilename)
|
|
tmp := path + ".tmp"
|
|
defer func() {
|
|
if err := os.RemoveAll(tmp); err != nil {
|
|
logger.Error("remove tmp file", "err", err.Error())
|
|
}
|
|
}()
|
|
|
|
f, err := os.Create(tmp)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
jsonMeta, err := json.MarshalIndent(meta, "", "\t")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
n, err := f.Write(jsonMeta)
|
|
if err != nil {
|
|
return 0, tsdb_errors.NewMulti(err, f.Close()).Err()
|
|
}
|
|
|
|
// Force the kernel to persist the file on disk to avoid data loss if the host crashes.
|
|
if err := f.Sync(); err != nil {
|
|
return 0, tsdb_errors.NewMulti(err, f.Close()).Err()
|
|
}
|
|
if err := f.Close(); err != nil {
|
|
return 0, err
|
|
}
|
|
return int64(n), fileutil.Replace(tmp, path)
|
|
}
|
|
|
|
// Block represents a directory of time series data covering a continuous time range.
|
|
type Block struct {
|
|
mtx sync.RWMutex
|
|
closing bool
|
|
pendingReaders sync.WaitGroup
|
|
|
|
dir string
|
|
meta BlockMeta
|
|
|
|
// Symbol Table Size in bytes.
|
|
// We maintain this variable to avoid recalculation every time.
|
|
symbolTableSize uint64
|
|
|
|
chunkr ChunkReader
|
|
indexr IndexReader
|
|
tombstones tombstones.Reader
|
|
|
|
logger *slog.Logger
|
|
|
|
numBytesChunks int64
|
|
numBytesIndex int64
|
|
numBytesTombstone int64
|
|
numBytesMeta int64
|
|
}
|
|
|
|
// OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used
|
|
// to instantiate chunk structs.
|
|
func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (pb *Block, err error) {
|
|
if logger == nil {
|
|
logger = promslog.NewNopLogger()
|
|
}
|
|
var closers []io.Closer
|
|
defer func() {
|
|
if err != nil {
|
|
err = tsdb_errors.NewMulti(err, tsdb_errors.CloseAll(closers)).Err()
|
|
}
|
|
}()
|
|
meta, sizeMeta, err := readMetaFile(dir)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cr, err := chunks.NewDirReader(chunkDir(dir), pool)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, cr)
|
|
|
|
decoder := index.DecodePostingsRaw
|
|
if postingsDecoderFactory != nil {
|
|
decoder = postingsDecoderFactory(meta)
|
|
}
|
|
ir, err := index.NewFileReader(filepath.Join(dir, indexFilename), decoder)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, ir)
|
|
|
|
tr, sizeTomb, err := tombstones.ReadTombstones(dir)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, tr)
|
|
|
|
pb = &Block{
|
|
dir: dir,
|
|
meta: *meta,
|
|
chunkr: cr,
|
|
indexr: ir,
|
|
tombstones: tr,
|
|
symbolTableSize: ir.SymbolTableSize(),
|
|
logger: logger,
|
|
numBytesChunks: cr.Size(),
|
|
numBytesIndex: ir.Size(),
|
|
numBytesTombstone: sizeTomb,
|
|
numBytesMeta: sizeMeta,
|
|
}
|
|
return pb, nil
|
|
}
|
|
|
|
// Close closes the on-disk block. It blocks as long as there are readers reading from the block.
|
|
func (pb *Block) Close() error {
|
|
pb.mtx.Lock()
|
|
pb.closing = true
|
|
pb.mtx.Unlock()
|
|
|
|
pb.pendingReaders.Wait()
|
|
|
|
return tsdb_errors.NewMulti(
|
|
pb.chunkr.Close(),
|
|
pb.indexr.Close(),
|
|
pb.tombstones.Close(),
|
|
).Err()
|
|
}
|
|
|
|
func (pb *Block) String() string {
|
|
return pb.meta.ULID.String()
|
|
}
|
|
|
|
// Dir returns the directory of the block.
|
|
func (pb *Block) Dir() string { return pb.dir }
|
|
|
|
// Meta returns meta information about the block.
|
|
func (pb *Block) Meta() BlockMeta { return pb.meta }
|
|
|
|
// MinTime returns the min time of the meta.
|
|
func (pb *Block) MinTime() int64 { return pb.meta.MinTime }
|
|
|
|
// MaxTime returns the max time of the meta.
|
|
func (pb *Block) MaxTime() int64 { return pb.meta.MaxTime }
|
|
|
|
// Size returns the number of bytes that the block takes up.
|
|
func (pb *Block) Size() int64 {
|
|
return pb.numBytesChunks + pb.numBytesIndex + pb.numBytesTombstone + pb.numBytesMeta
|
|
}
|
|
|
|
// ErrClosing is returned when a block is in the process of being closed.
|
|
var ErrClosing = errors.New("block is closing")
|
|
|
|
func (pb *Block) startRead() error {
|
|
pb.mtx.RLock()
|
|
defer pb.mtx.RUnlock()
|
|
|
|
if pb.closing {
|
|
return ErrClosing
|
|
}
|
|
pb.pendingReaders.Add(1)
|
|
return nil
|
|
}
|
|
|
|
// Index returns a new IndexReader against the block data.
|
|
func (pb *Block) Index() (IndexReader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockIndexReader{ir: pb.indexr, b: pb}, nil
|
|
}
|
|
|
|
// Chunks returns a new ChunkReader against the block data.
|
|
func (pb *Block) Chunks() (ChunkReader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockChunkReader{ChunkReader: pb.chunkr, b: pb}, nil
|
|
}
|
|
|
|
// Tombstones returns a new TombstoneReader against the block data.
|
|
func (pb *Block) Tombstones() (tombstones.Reader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockTombstoneReader{Reader: pb.tombstones, b: pb}, nil
|
|
}
|
|
|
|
// GetSymbolTableSize returns the Symbol Table Size in the index of this block.
|
|
func (pb *Block) GetSymbolTableSize() uint64 {
|
|
return pb.symbolTableSize
|
|
}
|
|
|
|
func (pb *Block) setCompactionFailed() error {
|
|
pb.meta.Compaction.Failed = true
|
|
n, err := writeMetaFile(pb.logger, pb.dir, &pb.meta)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesMeta = n
|
|
return nil
|
|
}
|
|
|
|
type blockIndexReader struct {
|
|
ir IndexReader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockIndexReader) Symbols() index.StringIter {
|
|
return r.ir.Symbols()
|
|
}
|
|
|
|
func (r blockIndexReader) SortedLabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
|
|
var st []string
|
|
var err error
|
|
|
|
if len(matchers) == 0 {
|
|
st, err = r.ir.SortedLabelValues(ctx, name)
|
|
} else {
|
|
st, err = r.LabelValues(ctx, name, matchers...)
|
|
if err == nil {
|
|
slices.Sort(st)
|
|
}
|
|
}
|
|
if err != nil {
|
|
return st, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err)
|
|
}
|
|
return st, nil
|
|
}
|
|
|
|
func (r blockIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
|
|
if len(matchers) == 0 {
|
|
st, err := r.ir.LabelValues(ctx, name)
|
|
if err != nil {
|
|
return st, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err)
|
|
}
|
|
return st, nil
|
|
}
|
|
|
|
return labelValuesWithMatchers(ctx, r.ir, name, matchers...)
|
|
}
|
|
|
|
func (r blockIndexReader) LabelNames(ctx context.Context, matchers ...*labels.Matcher) ([]string, error) {
|
|
if len(matchers) == 0 {
|
|
return r.b.LabelNames(ctx)
|
|
}
|
|
|
|
return labelNamesWithMatchers(ctx, r.ir, matchers...)
|
|
}
|
|
|
|
func (r blockIndexReader) Postings(ctx context.Context, name string, values ...string) (index.Postings, error) {
|
|
p, err := r.ir.Postings(ctx, name, values...)
|
|
if err != nil {
|
|
return p, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
func (r blockIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings {
|
|
return r.ir.PostingsForLabelMatching(ctx, name, match)
|
|
}
|
|
|
|
func (r blockIndexReader) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings {
|
|
return r.ir.PostingsForAllLabelValues(ctx, name)
|
|
}
|
|
|
|
func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings {
|
|
return r.ir.SortedPostings(p)
|
|
}
|
|
|
|
func (r blockIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings {
|
|
return r.ir.ShardedPostings(p, shardIndex, shardCount)
|
|
}
|
|
|
|
func (r blockIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
|
|
if err := r.ir.Series(ref, builder, chks); err != nil {
|
|
return fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r blockIndexReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
// LabelValueFor returns label value for the given label name in the series referred to by ID.
|
|
func (r blockIndexReader) LabelValueFor(ctx context.Context, id storage.SeriesRef, label string) (string, error) {
|
|
return r.ir.LabelValueFor(ctx, id, label)
|
|
}
|
|
|
|
// LabelNamesFor returns all the label names for the series referred to by the postings.
|
|
// The names returned are sorted.
|
|
func (r blockIndexReader) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) {
|
|
return r.ir.LabelNamesFor(ctx, postings)
|
|
}
|
|
|
|
type blockTombstoneReader struct {
|
|
tombstones.Reader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockTombstoneReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
type blockChunkReader struct {
|
|
ChunkReader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockChunkReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
// Delete matching series between mint and maxt in the block.
|
|
func (pb *Block) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Matcher) error {
|
|
pb.mtx.Lock()
|
|
defer pb.mtx.Unlock()
|
|
|
|
if pb.closing {
|
|
return ErrClosing
|
|
}
|
|
|
|
p, err := PostingsForMatchers(ctx, pb.indexr, ms...)
|
|
if err != nil {
|
|
return fmt.Errorf("select series: %w", err)
|
|
}
|
|
|
|
ir := pb.indexr
|
|
|
|
// Choose only valid postings which have chunks in the time-range.
|
|
stones := tombstones.NewMemTombstones()
|
|
|
|
var chks []chunks.Meta
|
|
var builder labels.ScratchBuilder
|
|
|
|
Outer:
|
|
for p.Next() {
|
|
err := ir.Series(p.At(), &builder, &chks)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, chk := range chks {
|
|
if chk.OverlapsClosedInterval(mint, maxt) {
|
|
// Delete only until the current values and not beyond.
|
|
tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime)
|
|
stones.AddInterval(p.At(), tombstones.Interval{Mint: tmin, Maxt: tmax})
|
|
continue Outer
|
|
}
|
|
}
|
|
}
|
|
|
|
if p.Err() != nil {
|
|
return p.Err()
|
|
}
|
|
|
|
err = pb.tombstones.Iter(func(id storage.SeriesRef, ivs tombstones.Intervals) error {
|
|
for _, iv := range ivs {
|
|
stones.AddInterval(id, iv)
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.tombstones = stones
|
|
pb.meta.Stats.NumTombstones = pb.tombstones.Total()
|
|
|
|
n, err := tombstones.WriteFile(pb.logger, pb.dir, pb.tombstones)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesTombstone = n
|
|
n, err = writeMetaFile(pb.logger, pb.dir, &pb.meta)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesMeta = n
|
|
return nil
|
|
}
|
|
|
|
// CleanTombstones will remove the tombstones and rewrite the block (only if there are any tombstones).
|
|
// If there was a rewrite, then it returns the ULID of new blocks written, else nil.
|
|
// If a resultant block is empty (tombstones covered the whole block), then it returns an empty slice.
|
|
// It returns a boolean indicating if the parent block can be deleted safely of not.
|
|
func (pb *Block) CleanTombstones(dest string, c Compactor) ([]ulid.ULID, bool, error) {
|
|
numStones := 0
|
|
|
|
if err := pb.tombstones.Iter(func(id storage.SeriesRef, ivs tombstones.Intervals) error {
|
|
numStones += len(ivs)
|
|
return nil
|
|
}); err != nil {
|
|
// This should never happen, as the iteration function only returns nil.
|
|
panic(err)
|
|
}
|
|
if numStones == 0 {
|
|
return nil, false, nil
|
|
}
|
|
|
|
meta := pb.Meta()
|
|
uids, err := c.Write(dest, pb, pb.meta.MinTime, pb.meta.MaxTime, &meta)
|
|
if err != nil {
|
|
return nil, false, err
|
|
}
|
|
|
|
return uids, true, nil
|
|
}
|
|
|
|
// Snapshot creates snapshot of the block into dir.
|
|
func (pb *Block) Snapshot(dir string) error {
|
|
blockDir := filepath.Join(dir, pb.meta.ULID.String())
|
|
if err := os.MkdirAll(blockDir, 0o777); err != nil {
|
|
return fmt.Errorf("create snapshot block dir: %w", err)
|
|
}
|
|
|
|
chunksDir := chunkDir(blockDir)
|
|
if err := os.MkdirAll(chunksDir, 0o777); err != nil {
|
|
return fmt.Errorf("create snapshot chunk dir: %w", err)
|
|
}
|
|
|
|
// Hardlink meta, index and tombstones
|
|
for _, fname := range []string{
|
|
metaFilename,
|
|
indexFilename,
|
|
tombstones.TombstonesFilename,
|
|
} {
|
|
if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil {
|
|
return fmt.Errorf("create snapshot %s: %w", fname, err)
|
|
}
|
|
}
|
|
|
|
// Hardlink the chunks
|
|
curChunkDir := chunkDir(pb.dir)
|
|
files, err := os.ReadDir(curChunkDir)
|
|
if err != nil {
|
|
return fmt.Errorf("ReadDir the current chunk dir: %w", err)
|
|
}
|
|
|
|
for _, f := range files {
|
|
err := os.Link(filepath.Join(curChunkDir, f.Name()), filepath.Join(chunksDir, f.Name()))
|
|
if err != nil {
|
|
return fmt.Errorf("hardlink a chunk: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// OverlapsClosedInterval returns true if the block overlaps [mint, maxt].
|
|
func (pb *Block) OverlapsClosedInterval(mint, maxt int64) bool {
|
|
// The block itself is a half-open interval
|
|
// [pb.meta.MinTime, pb.meta.MaxTime).
|
|
return pb.meta.MinTime <= maxt && mint < pb.meta.MaxTime
|
|
}
|
|
|
|
// LabelNames returns all the unique label names present in the Block in sorted order.
|
|
func (pb *Block) LabelNames(ctx context.Context) ([]string, error) {
|
|
return pb.indexr.LabelNames(ctx)
|
|
}
|
|
|
|
func clampInterval(a, b, mint, maxt int64) (int64, int64) {
|
|
if a < mint {
|
|
a = mint
|
|
}
|
|
if b > maxt {
|
|
b = maxt
|
|
}
|
|
return a, b
|
|
}
|