// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package tsdb import ( "context" "errors" "fmt" "math" "slices" "github.com/oklog/ulid" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/util/annotations" ) // checkContextEveryNIterations is used in some tight loops to check if the context is done. const checkContextEveryNIterations = 100 type blockBaseQuerier struct { blockID ulid.ULID index IndexReader chunks ChunkReader tombstones tombstones.Reader closed bool mint, maxt int64 } func newBlockBaseQuerier(b BlockReader, mint, maxt int64) (*blockBaseQuerier, error) { indexr, err := b.Index() if err != nil { return nil, fmt.Errorf("open index reader: %w", err) } chunkr, err := b.Chunks() if err != nil { indexr.Close() return nil, fmt.Errorf("open chunk reader: %w", err) } tombsr, err := b.Tombstones() if err != nil { indexr.Close() chunkr.Close() return nil, fmt.Errorf("open tombstone reader: %w", err) } if tombsr == nil { tombsr = tombstones.NewMemTombstones() } return &blockBaseQuerier{ blockID: b.Meta().ULID, mint: mint, maxt: maxt, index: indexr, chunks: chunkr, tombstones: tombsr, }, nil } func (q *blockBaseQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { res, err := q.index.SortedLabelValues(ctx, name, matchers...) return res, nil, err } func (q *blockBaseQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { res, err := q.index.LabelNames(ctx, matchers...) return res, nil, err } func (q *blockBaseQuerier) Close() error { if q.closed { return errors.New("block querier already closed") } errs := tsdb_errors.NewMulti( q.index.Close(), q.chunks.Close(), q.tombstones.Close(), ) q.closed = true return errs.Err() } type blockQuerier struct { *blockBaseQuerier } // NewBlockQuerier returns a querier against the block reader and requested min and max time range. func NewBlockQuerier(b BlockReader, mint, maxt int64) (storage.Querier, error) { q, err := newBlockBaseQuerier(b, mint, maxt) if err != nil { return nil, err } return &blockQuerier{blockBaseQuerier: q}, nil } func (q *blockQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet { mint := q.mint maxt := q.maxt disableTrimming := false sharded := hints != nil && hints.ShardCount > 0 p, err := PostingsForMatchers(ctx, q.index, ms...) if err != nil { return storage.ErrSeriesSet(err) } if sharded { p = q.index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount) } if sortSeries { p = q.index.SortedPostings(p) } if hints != nil { mint = hints.Start maxt = hints.End disableTrimming = hints.DisableTrimming if hints.Func == "series" { // When you're only looking up metadata (for example series API), you don't need to load any chunks. return newBlockSeriesSet(q.index, newNopChunkReader(), q.tombstones, p, mint, maxt, disableTrimming) } } return newBlockSeriesSet(q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming) } // blockChunkQuerier provides chunk querying access to a single block database. type blockChunkQuerier struct { *blockBaseQuerier } // NewBlockChunkQuerier returns a chunk querier against the block reader and requested min and max time range. func NewBlockChunkQuerier(b BlockReader, mint, maxt int64) (storage.ChunkQuerier, error) { q, err := newBlockBaseQuerier(b, mint, maxt) if err != nil { return nil, err } return &blockChunkQuerier{blockBaseQuerier: q}, nil } func (q *blockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.ChunkSeriesSet { mint := q.mint maxt := q.maxt disableTrimming := false sharded := hints != nil && hints.ShardCount > 0 if hints != nil { mint = hints.Start maxt = hints.End disableTrimming = hints.DisableTrimming } p, err := PostingsForMatchers(ctx, q.index, ms...) if err != nil { return storage.ErrChunkSeriesSet(err) } if sharded { p = q.index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount) } if sortSeries { p = q.index.SortedPostings(p) } return NewBlockChunkSeriesSet(q.blockID, q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming) } // PostingsForMatchers assembles a single postings iterator against the index reader // based on the given matchers. The resulting postings are not ordered by series. func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) { var its, notIts []index.Postings // See which label must be non-empty. // Optimization for case like {l=~".", l!="1"}. labelMustBeSet := make(map[string]bool, len(ms)) for _, m := range ms { if !m.Matches("") { labelMustBeSet[m.Name] = true } } isSubtractingMatcher := func(m *labels.Matcher) bool { if !labelMustBeSet[m.Name] { return true } return (m.Type == labels.MatchNotEqual || m.Type == labels.MatchNotRegexp) && m.Matches("") } hasSubtractingMatchers, hasIntersectingMatchers := false, false for _, m := range ms { if isSubtractingMatcher(m) { hasSubtractingMatchers = true } else { hasIntersectingMatchers = true } } if hasSubtractingMatchers && !hasIntersectingMatchers { // If there's nothing to subtract from, add in everything and remove the notIts later. // We prefer to get AllPostings so that the base of subtraction (i.e. allPostings) // doesn't include series that may be added to the index reader during this function call. k, v := index.AllPostingsKey() allPostings, err := ix.Postings(ctx, k, v) if err != nil { return nil, err } its = append(its, allPostings) } // Sort matchers to have the intersecting matchers first. // This way the base for subtraction is smaller and // there is no chance that the set we subtract from // contains postings of series that didn't exist when // we constructed the set we subtract by. slices.SortStableFunc(ms, func(i, j *labels.Matcher) int { if !isSubtractingMatcher(i) && isSubtractingMatcher(j) { return -1 } return +1 }) for _, m := range ms { if ctx.Err() != nil { return nil, ctx.Err() } switch { case m.Name == "" && m.Value == "": // Special-case for AllPostings, used in tests at least. k, v := index.AllPostingsKey() allPostings, err := ix.Postings(ctx, k, v) if err != nil { return nil, err } its = append(its, allPostings) case labelMustBeSet[m.Name]: // If this matcher must be non-empty, we can be smarter. matchesEmpty := m.Matches("") isNot := m.Type == labels.MatchNotEqual || m.Type == labels.MatchNotRegexp switch { case isNot && matchesEmpty: // l!="foo" // If the label can't be empty and is a Not and the inner matcher // doesn't match empty, then subtract it out at the end. inverse, err := m.Inverse() if err != nil { return nil, err } it, err := postingsForMatcher(ctx, ix, inverse) if err != nil { return nil, err } notIts = append(notIts, it) case isNot && !matchesEmpty: // l!="" // If the label can't be empty and is a Not, but the inner matcher can // be empty we need to use inversePostingsForMatcher. inverse, err := m.Inverse() if err != nil { return nil, err } it, err := inversePostingsForMatcher(ctx, ix, inverse) if err != nil { return nil, err } if index.IsEmptyPostingsType(it) { return index.EmptyPostings(), nil } its = append(its, it) default: // l="a" // Non-Not matcher, use normal postingsForMatcher. it, err := postingsForMatcher(ctx, ix, m) if err != nil { return nil, err } if index.IsEmptyPostingsType(it) { return index.EmptyPostings(), nil } its = append(its, it) } default: // l="" // If the matchers for a labelname selects an empty value, it selects all // the series which don't have the label name set too. See: // https://github.com/prometheus/prometheus/issues/3575 and // https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555 it, err := inversePostingsForMatcher(ctx, ix, m) if err != nil { return nil, err } notIts = append(notIts, it) } } it := index.Intersect(its...) for _, n := range notIts { it = index.Without(it, n) } return it, nil } func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) (index.Postings, error) { // This method will not return postings for missing labels. // Fast-path for equal matching. if m.Type == labels.MatchEqual { return ix.Postings(ctx, m.Name, m.Value) } // Fast-path for set matching. if m.Type == labels.MatchRegexp { setMatches := m.SetMatches() if len(setMatches) > 0 { return ix.Postings(ctx, m.Name, setMatches...) } } it := ix.PostingsForLabelMatching(ctx, m.Name, m.Matches) return it, it.Err() } // inversePostingsForMatcher returns the postings for the series with the label name set but not matching the matcher. func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) (index.Postings, error) { // Fast-path for MatchNotRegexp matching. // Inverse of a MatchNotRegexp is MatchRegexp (double negation). // Fast-path for set matching. if m.Type == labels.MatchNotRegexp { setMatches := m.SetMatches() if len(setMatches) > 0 { return ix.Postings(ctx, m.Name, setMatches...) } } // Fast-path for MatchNotEqual matching. // Inverse of a MatchNotEqual is MatchEqual (double negation). if m.Type == labels.MatchNotEqual { return ix.Postings(ctx, m.Name, m.Value) } vals, err := ix.LabelValues(ctx, m.Name) if err != nil { return nil, err } res := vals[:0] // If the match before inversion was !="" or !~"", we just want all the values. if m.Value == "" && (m.Type == labels.MatchRegexp || m.Type == labels.MatchEqual) { res = vals } else { count := 1 for _, val := range vals { if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { return nil, ctx.Err() } count++ if !m.Matches(val) { res = append(res, val) } } } return ix.Postings(ctx, m.Name, res...) } func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, matchers ...*labels.Matcher) ([]string, error) { allValues, err := r.LabelValues(ctx, name) if err != nil { return nil, fmt.Errorf("fetching values of label %s: %w", name, err) } // If we have a matcher for the label name, we can filter out values that don't match // before we fetch postings. This is especially useful for labels with many values. // e.g. __name__ with a selector like {__name__="xyz"} hasMatchersForOtherLabels := false for _, m := range matchers { if m.Name != name { hasMatchersForOtherLabels = true continue } // re-use the allValues slice to avoid allocations // this is safe because the iteration is always ahead of the append filteredValues := allValues[:0] count := 1 for _, v := range allValues { if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { return nil, ctx.Err() } count++ if m.Matches(v) { filteredValues = append(filteredValues, v) } } allValues = filteredValues } if len(allValues) == 0 { return nil, nil } // If we don't have any matchers for other labels, then we're done. if !hasMatchersForOtherLabels { return allValues, nil } p, err := PostingsForMatchers(ctx, r, matchers...) if err != nil { return nil, fmt.Errorf("fetching postings for matchers: %w", err) } valuesPostings := make([]index.Postings, len(allValues)) for i, value := range allValues { valuesPostings[i], err = r.Postings(ctx, name, value) if err != nil { return nil, fmt.Errorf("fetching postings for %s=%q: %w", name, value, err) } } indexes, err := index.FindIntersectingPostings(p, valuesPostings) if err != nil { return nil, fmt.Errorf("intersecting postings: %w", err) } values := make([]string, 0, len(indexes)) for _, idx := range indexes { values = append(values, allValues[idx]) } return values, nil } func labelNamesWithMatchers(ctx context.Context, r IndexReader, matchers ...*labels.Matcher) ([]string, error) { p, err := PostingsForMatchers(ctx, r, matchers...) if err != nil { return nil, err } return r.LabelNamesFor(ctx, p) } // seriesData, used inside other iterators, are updated when we move from one series to another. type seriesData struct { chks []chunks.Meta intervals tombstones.Intervals labels labels.Labels } // Labels implements part of storage.Series and storage.ChunkSeries. func (s *seriesData) Labels() labels.Labels { return s.labels } // blockBaseSeriesSet allows to iterate over all series in the single block. // Iterated series are trimmed with given min and max time as well as tombstones. // See newBlockSeriesSet and NewBlockChunkSeriesSet to use it for either sample or chunk iterating. type blockBaseSeriesSet struct { blockID ulid.ULID p index.Postings index IndexReader chunks ChunkReader tombstones tombstones.Reader mint, maxt int64 disableTrimming bool curr seriesData bufChks []chunks.Meta builder labels.ScratchBuilder err error } func (b *blockBaseSeriesSet) Next() bool { for b.p.Next() { if err := b.index.Series(b.p.At(), &b.builder, &b.bufChks); err != nil { // Postings may be stale. Skip if no underlying series exists. if errors.Is(err, storage.ErrNotFound) { continue } b.err = fmt.Errorf("get series %d: %w", b.p.At(), err) return false } if len(b.bufChks) == 0 { continue } intervals, err := b.tombstones.Get(b.p.At()) if err != nil { b.err = fmt.Errorf("get tombstones: %w", err) return false } // NOTE: // * block time range is half-open: [meta.MinTime, meta.MaxTime). // * chunks are both closed: [chk.MinTime, chk.MaxTime]. // * requested time ranges are closed: [req.Start, req.End]. var trimFront, trimBack bool // Copy chunks as iterables are reusable. // Count those in range to size allocation (roughly - ignoring tombstones). nChks := 0 for _, chk := range b.bufChks { if !(chk.MaxTime < b.mint || chk.MinTime > b.maxt) { nChks++ } } chks := make([]chunks.Meta, 0, nChks) // Prefilter chunks and pick those which are not entirely deleted or totally outside of the requested range. for _, chk := range b.bufChks { if chk.MaxTime < b.mint { continue } if chk.MinTime > b.maxt { continue } if (tombstones.Interval{Mint: chk.MinTime, Maxt: chk.MaxTime}.IsSubrange(intervals)) { continue } chks = append(chks, chk) // If still not entirely deleted, check if trim is needed based on requested time range. if !b.disableTrimming { if chk.MinTime < b.mint { trimFront = true } if chk.MaxTime > b.maxt { trimBack = true } } } if len(chks) == 0 { continue } if trimFront { intervals = intervals.Add(tombstones.Interval{Mint: math.MinInt64, Maxt: b.mint - 1}) } if trimBack { intervals = intervals.Add(tombstones.Interval{Mint: b.maxt + 1, Maxt: math.MaxInt64}) } b.curr.labels = b.builder.Labels() b.curr.chks = chks b.curr.intervals = intervals return true } return false } func (b *blockBaseSeriesSet) Err() error { if b.err != nil { return b.err } return b.p.Err() } func (b *blockBaseSeriesSet) Warnings() annotations.Annotations { return nil } // populateWithDelGenericSeriesIterator allows to iterate over given chunk // metas. In each iteration it ensures that chunks are trimmed based on given // tombstones interval if any. // // populateWithDelGenericSeriesIterator assumes that chunks that would be fully // removed by intervals are filtered out in previous phase. // // On each iteration currMeta is available. If currDelIter is not nil, it // means that the chunk in currMeta is invalid and a chunk rewrite is needed, // for which currDelIter should be used. type populateWithDelGenericSeriesIterator struct { blockID ulid.ULID cr ChunkReader // metas are expected to be sorted by minTime and should be related to // the same, single series. // It's possible for a single chunks.Meta to refer to multiple chunks. // cr.ChunkOrIterator() would return an iterable and a nil chunk in this // case. metas []chunks.Meta i int // Index into metas; -1 if not started yet. err error bufIter DeletedIterator // Retained for memory re-use. currDelIter may point here. intervals tombstones.Intervals currDelIter chunkenc.Iterator // currMeta is the current chunks.Meta from metas. currMeta.Chunk is set to // the chunk returned from cr.ChunkOrIterable(). As that can return a nil // chunk, currMeta.Chunk is not always guaranteed to be set. currMeta chunks.Meta } func (p *populateWithDelGenericSeriesIterator) reset(blockID ulid.ULID, cr ChunkReader, chks []chunks.Meta, intervals tombstones.Intervals) { p.blockID = blockID p.cr = cr p.metas = chks p.i = -1 p.err = nil // Note we don't touch p.bufIter.Iter; it is holding on to an iterator we might reuse in next(). p.bufIter.Intervals = p.bufIter.Intervals[:0] p.intervals = intervals p.currDelIter = nil p.currMeta = chunks.Meta{} } // If copyHeadChunk is true, then the head chunk (i.e. the in-memory chunk of the TSDB) // is deep copied to avoid races between reads and copying chunk bytes. // However, if the deletion intervals overlaps with the head chunk, then the head chunk is // not copied irrespective of copyHeadChunk because it will be re-encoded later anyway. func (p *populateWithDelGenericSeriesIterator) next(copyHeadChunk bool) bool { if p.err != nil || p.i >= len(p.metas)-1 { return false } p.i++ p.currMeta = p.metas[p.i] p.bufIter.Intervals = p.bufIter.Intervals[:0] for _, interval := range p.intervals { if p.currMeta.OverlapsClosedInterval(interval.Mint, interval.Maxt) { p.bufIter.Intervals = p.bufIter.Intervals.Add(interval) } } hcr, ok := p.cr.(*headChunkReader) var iterable chunkenc.Iterable if ok && copyHeadChunk && len(p.bufIter.Intervals) == 0 { // ChunkWithCopy will copy the head chunk. var maxt int64 p.currMeta.Chunk, maxt, p.err = hcr.ChunkWithCopy(p.currMeta) // For the in-memory head chunk the index reader sets maxt as MaxInt64. We fix it here. p.currMeta.MaxTime = maxt } else { p.currMeta.Chunk, iterable, p.err = p.cr.ChunkOrIterable(p.currMeta) } if p.err != nil { p.err = fmt.Errorf("cannot populate chunk %d from block %s: %w", p.currMeta.Ref, p.blockID.String(), p.err) return false } // Use the single chunk if possible. if p.currMeta.Chunk != nil { if len(p.bufIter.Intervals) == 0 { // If there is no overlap with deletion intervals and a single chunk is // returned, we can take chunk as it is. p.currDelIter = nil return true } // Otherwise we need to iterate over the samples in the single chunk // and create new chunks. p.bufIter.Iter = p.currMeta.Chunk.Iterator(p.bufIter.Iter) p.currDelIter = &p.bufIter return true } // Otherwise, use the iterable to create an iterator. p.bufIter.Iter = iterable.Iterator(p.bufIter.Iter) p.currDelIter = &p.bufIter return true } func (p *populateWithDelGenericSeriesIterator) Err() error { return p.err } type blockSeriesEntry struct { chunks ChunkReader blockID ulid.ULID seriesData } func (s *blockSeriesEntry) Iterator(it chunkenc.Iterator) chunkenc.Iterator { pi, ok := it.(*populateWithDelSeriesIterator) if !ok { pi = &populateWithDelSeriesIterator{} } pi.reset(s.blockID, s.chunks, s.chks, s.intervals) return pi } type chunkSeriesEntry struct { chunks ChunkReader blockID ulid.ULID seriesData } func (s *chunkSeriesEntry) Iterator(it chunks.Iterator) chunks.Iterator { pi, ok := it.(*populateWithDelChunkSeriesIterator) if !ok { pi = &populateWithDelChunkSeriesIterator{} } pi.reset(s.blockID, s.chunks, s.chks, s.intervals) return pi } // populateWithDelSeriesIterator allows to iterate over samples for the single series. type populateWithDelSeriesIterator struct { populateWithDelGenericSeriesIterator curr chunkenc.Iterator } func (p *populateWithDelSeriesIterator) reset(blockID ulid.ULID, cr ChunkReader, chks []chunks.Meta, intervals tombstones.Intervals) { p.populateWithDelGenericSeriesIterator.reset(blockID, cr, chks, intervals) p.curr = nil } func (p *populateWithDelSeriesIterator) Next() chunkenc.ValueType { if p.curr != nil { if valueType := p.curr.Next(); valueType != chunkenc.ValNone { return valueType } } for p.next(false) { if p.currDelIter != nil { p.curr = p.currDelIter } else { p.curr = p.currMeta.Chunk.Iterator(p.curr) } if valueType := p.curr.Next(); valueType != chunkenc.ValNone { return valueType } } return chunkenc.ValNone } func (p *populateWithDelSeriesIterator) Seek(t int64) chunkenc.ValueType { if p.curr != nil { if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone { return valueType } } for p.Next() != chunkenc.ValNone { if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone { return valueType } } return chunkenc.ValNone } func (p *populateWithDelSeriesIterator) At() (int64, float64) { return p.curr.At() } func (p *populateWithDelSeriesIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { return p.curr.AtHistogram(h) } func (p *populateWithDelSeriesIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { return p.curr.AtFloatHistogram(fh) } func (p *populateWithDelSeriesIterator) AtT() int64 { return p.curr.AtT() } func (p *populateWithDelSeriesIterator) Err() error { if err := p.populateWithDelGenericSeriesIterator.Err(); err != nil { return err } if p.curr != nil { return p.curr.Err() } return nil } type populateWithDelChunkSeriesIterator struct { populateWithDelGenericSeriesIterator // currMetaWithChunk is current meta with its chunk field set. This meta // is guaranteed to map to a single chunk. This differs from // populateWithDelGenericSeriesIterator.currMeta as that // could refer to multiple chunks. currMetaWithChunk chunks.Meta // chunksFromIterable stores the chunks created from iterating through // the iterable returned by cr.ChunkOrIterable() (with deleted samples // removed). chunksFromIterable []chunks.Meta chunksFromIterableIdx int } func (p *populateWithDelChunkSeriesIterator) reset(blockID ulid.ULID, cr ChunkReader, chks []chunks.Meta, intervals tombstones.Intervals) { p.populateWithDelGenericSeriesIterator.reset(blockID, cr, chks, intervals) p.currMetaWithChunk = chunks.Meta{} p.chunksFromIterable = p.chunksFromIterable[:0] p.chunksFromIterableIdx = -1 } func (p *populateWithDelChunkSeriesIterator) Next() bool { if p.currMeta.Chunk == nil { // If we've been creating chunks from the iterable, check if there are // any more chunks to iterate through. if p.chunksFromIterableIdx < len(p.chunksFromIterable)-1 { p.chunksFromIterableIdx++ p.currMetaWithChunk = p.chunksFromIterable[p.chunksFromIterableIdx] return true } } // Move to the next chunk/deletion iterator. // This is a for loop as if the current p.currDelIter returns no samples // (which means a chunk won't be created), there still might be more // samples/chunks from the rest of p.metas. for p.next(true) { if p.currDelIter == nil { p.currMetaWithChunk = p.currMeta return true } if p.currMeta.Chunk != nil { // If ChunkOrIterable() returned a non-nil chunk, the samples in // p.currDelIter will only form one chunk, as the only change // p.currDelIter might make is deleting some samples. if p.populateCurrForSingleChunk() { return true } } else { // If ChunkOrIterable() returned an iterable, multiple chunks may be // created from the samples in p.currDelIter. if p.populateChunksFromIterable() { return true } } } return false } // populateCurrForSingleChunk sets the fields within p.currMetaWithChunk. This // should be called if the samples in p.currDelIter only form one chunk. func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool { valueType := p.currDelIter.Next() if valueType == chunkenc.ValNone { if err := p.currDelIter.Err(); err != nil { p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) } return false } p.currMetaWithChunk.MinTime = p.currDelIter.AtT() // Re-encode the chunk if iterator is provided. This means that it has // some samples to be deleted or chunk is opened. var ( newChunk chunkenc.Chunk app chunkenc.Appender t int64 err error ) switch valueType { case chunkenc.ValHistogram: newChunk = chunkenc.NewHistogramChunk() if app, err = newChunk.Appender(); err != nil { break } for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() { if vt != chunkenc.ValHistogram { err = fmt.Errorf("found value type %v in histogram chunk", vt) break } var h *histogram.Histogram t, h = p.currDelIter.AtHistogram(nil) _, _, app, err = app.AppendHistogram(nil, t, h, true) if err != nil { break } } case chunkenc.ValFloat: newChunk = chunkenc.NewXORChunk() if app, err = newChunk.Appender(); err != nil { break } for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() { if vt != chunkenc.ValFloat { err = fmt.Errorf("found value type %v in float chunk", vt) break } var v float64 t, v = p.currDelIter.At() app.Append(t, v) } case chunkenc.ValFloatHistogram: newChunk = chunkenc.NewFloatHistogramChunk() if app, err = newChunk.Appender(); err != nil { break } for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() { if vt != chunkenc.ValFloatHistogram { err = fmt.Errorf("found value type %v in histogram chunk", vt) break } var h *histogram.FloatHistogram t, h = p.currDelIter.AtFloatHistogram(nil) _, _, app, err = app.AppendFloatHistogram(nil, t, h, true) if err != nil { break } } default: err = fmt.Errorf("populateCurrForSingleChunk: value type %v unsupported", valueType) } if err != nil { p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) return false } if err := p.currDelIter.Err(); err != nil { p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) return false } p.currMetaWithChunk.Chunk = newChunk p.currMetaWithChunk.MaxTime = t return true } // populateChunksFromIterable reads the samples from currDelIter to create // chunks for chunksFromIterable. It also sets p.currMetaWithChunk to the first // chunk. func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { p.chunksFromIterable = p.chunksFromIterable[:0] p.chunksFromIterableIdx = -1 firstValueType := p.currDelIter.Next() if firstValueType == chunkenc.ValNone { if err := p.currDelIter.Err(); err != nil { p.err = fmt.Errorf("populateChunksFromIterable: no samples could be read: %w", err) return false } return false } var ( // t is the timestamp for the current sample. t int64 cmint int64 cmaxt int64 currentChunk chunkenc.Chunk app chunkenc.Appender newChunk chunkenc.Chunk recoded bool err error ) prevValueType := chunkenc.ValNone for currentValueType := firstValueType; currentValueType != chunkenc.ValNone; currentValueType = p.currDelIter.Next() { // Check if the encoding has changed (i.e. we need to create a new // chunk as chunks can't have multiple encoding types). // For the first sample, the following condition will always be true as // ValNoneNone != ValFloat | ValHistogram | ValFloatHistogram. if currentValueType != prevValueType { if prevValueType != chunkenc.ValNone { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) } cmint = p.currDelIter.AtT() if currentChunk, err = currentValueType.NewChunk(); err != nil { break } if app, err = currentChunk.Appender(); err != nil { break } } switch currentValueType { case chunkenc.ValFloat: { var v float64 t, v = p.currDelIter.At() app.Append(t, v) } case chunkenc.ValHistogram: { var v *histogram.Histogram t, v = p.currDelIter.AtHistogram(nil) // No need to set prevApp as AppendHistogram will set the // counter reset header for the appender that's returned. newChunk, recoded, app, err = app.AppendHistogram(nil, t, v, false) } case chunkenc.ValFloatHistogram: { var v *histogram.FloatHistogram t, v = p.currDelIter.AtFloatHistogram(nil) // No need to set prevApp as AppendHistogram will set the // counter reset header for the appender that's returned. newChunk, recoded, app, err = app.AppendFloatHistogram(nil, t, v, false) } } if err != nil { break } if newChunk != nil { if !recoded { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) } currentChunk = newChunk cmint = t } cmaxt = t prevValueType = currentValueType } if err != nil { p.err = fmt.Errorf("populateChunksFromIterable: error when writing new chunks: %w", err) return false } if err = p.currDelIter.Err(); err != nil { p.err = fmt.Errorf("populateChunksFromIterable: currDelIter error when writing new chunks: %w", err) return false } if prevValueType != chunkenc.ValNone { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) } if len(p.chunksFromIterable) == 0 { return false } p.currMetaWithChunk = p.chunksFromIterable[0] p.chunksFromIterableIdx = 0 return true } func (p *populateWithDelChunkSeriesIterator) At() chunks.Meta { return p.currMetaWithChunk } // blockSeriesSet allows to iterate over sorted, populated series with applied tombstones. // Series with all deleted chunks are still present as Series with no samples. // Samples from chunks are also trimmed to requested min and max time. type blockSeriesSet struct { blockBaseSeriesSet } func newBlockSeriesSet(i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool) storage.SeriesSet { return &blockSeriesSet{ blockBaseSeriesSet{ index: i, chunks: c, tombstones: t, p: p, mint: mint, maxt: maxt, disableTrimming: disableTrimming, }, } } func (b *blockSeriesSet) At() storage.Series { // At can be looped over before iterating, so save the current values locally. return &blockSeriesEntry{ chunks: b.chunks, blockID: b.blockID, seriesData: b.curr, } } // blockChunkSeriesSet allows to iterate over sorted, populated series with applied tombstones. // Series with all deleted chunks are still present as Labelled iterator with no chunks. // Chunks are also trimmed to requested [min and max] (keeping samples with min and max timestamps). type blockChunkSeriesSet struct { blockBaseSeriesSet } func NewBlockChunkSeriesSet(id ulid.ULID, i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool) storage.ChunkSeriesSet { return &blockChunkSeriesSet{ blockBaseSeriesSet{ blockID: id, index: i, chunks: c, tombstones: t, p: p, mint: mint, maxt: maxt, disableTrimming: disableTrimming, }, } } func (b *blockChunkSeriesSet) At() storage.ChunkSeries { // At can be looped over before iterating, so save the current values locally. return &chunkSeriesEntry{ chunks: b.chunks, blockID: b.blockID, seriesData: b.curr, } } // NewMergedStringIter returns string iterator that allows to merge symbols on demand and stream result. func NewMergedStringIter(a, b index.StringIter) index.StringIter { return &mergedStringIter{a: a, b: b, aok: a.Next(), bok: b.Next()} } type mergedStringIter struct { a index.StringIter b index.StringIter aok, bok bool cur string err error } func (m *mergedStringIter) Next() bool { if (!m.aok && !m.bok) || (m.Err() != nil) { return false } switch { case !m.aok: m.cur = m.b.At() m.bok = m.b.Next() m.err = m.b.Err() case !m.bok: m.cur = m.a.At() m.aok = m.a.Next() m.err = m.a.Err() case m.b.At() > m.a.At(): m.cur = m.a.At() m.aok = m.a.Next() m.err = m.a.Err() case m.a.At() > m.b.At(): m.cur = m.b.At() m.bok = m.b.Next() m.err = m.b.Err() default: // Equal. m.cur = m.b.At() m.aok = m.a.Next() m.err = m.a.Err() m.bok = m.b.Next() if m.err == nil { m.err = m.b.Err() } } return true } func (m mergedStringIter) At() string { return m.cur } func (m mergedStringIter) Err() error { return m.err } // DeletedIterator wraps chunk Iterator and makes sure any deleted metrics are not returned. type DeletedIterator struct { // Iter is an Iterator to be wrapped. Iter chunkenc.Iterator // Intervals are the deletion intervals. Intervals tombstones.Intervals } func (it *DeletedIterator) At() (int64, float64) { return it.Iter.At() } func (it *DeletedIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { t, h := it.Iter.AtHistogram(h) return t, h } func (it *DeletedIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { t, h := it.Iter.AtFloatHistogram(fh) return t, h } func (it *DeletedIterator) AtT() int64 { return it.Iter.AtT() } func (it *DeletedIterator) Seek(t int64) chunkenc.ValueType { if it.Iter.Err() != nil { return chunkenc.ValNone } valueType := it.Iter.Seek(t) if valueType == chunkenc.ValNone { return chunkenc.ValNone } // Now double check if the entry falls into a deleted interval. ts := it.AtT() for _, itv := range it.Intervals { if ts < itv.Mint { return valueType } if ts > itv.Maxt { it.Intervals = it.Intervals[1:] continue } // We're in the middle of an interval, we can now call Next(). return it.Next() } // The timestamp is greater than all the deleted intervals. return valueType } func (it *DeletedIterator) Next() chunkenc.ValueType { Outer: for valueType := it.Iter.Next(); valueType != chunkenc.ValNone; valueType = it.Iter.Next() { ts := it.AtT() for _, tr := range it.Intervals { if tr.InBounds(ts) { continue Outer } if ts <= tr.Maxt { return valueType } it.Intervals = it.Intervals[1:] } return valueType } return chunkenc.ValNone } func (it *DeletedIterator) Err() error { return it.Iter.Err() } type nopChunkReader struct { emptyChunk chunkenc.Chunk } func newNopChunkReader() ChunkReader { return nopChunkReader{ emptyChunk: chunkenc.NewXORChunk(), } } func (cr nopChunkReader) ChunkOrIterable(chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) { return cr.emptyChunk, nil, nil } func (cr nopChunkReader) Close() error { return nil }