prometheus/storage/tsdb/tsdb.go
Thibault Chataigner bf4a279a91 Remote storage reads based on oldest timestamp in primary storage (#3129)
Currently all read queries are simply pushed to remote read clients.
This is fine, except for remote storage for wich it unefficient and
make query slower even if remote read is unnecessary.
So we need instead to compare the oldest timestamp in primary/local
storage with the query range lower boundary. If the oldest timestamp
is older than the mint parameter, then there is no need for remote read.
This is an optionnal behavior per remote read client.

Signed-off-by: Thibault Chataigner <t.chataigner@criteo.com>
2017-10-18 12:08:14 +01:00

324 lines
8.2 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tsdb
import (
"context"
"sync"
"time"
"unsafe"
"github.com/go-kit/kit/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/tsdb"
tsdbLabels "github.com/prometheus/tsdb/labels"
)
// ErrNotReady is returned if the underlying storage is not ready yet.
var ErrNotReady = errors.New("TSDB not ready")
// ReadyStorage implements the Storage interface while allowing to set the actual
// storage at a later point in time.
type ReadyStorage struct {
mtx sync.RWMutex
a *adapter
}
// Set the storage.
func (s *ReadyStorage) Set(db *tsdb.DB, startTimeMargin int64) {
s.mtx.Lock()
defer s.mtx.Unlock()
s.a = &adapter{db: db, startTimeMargin: startTimeMargin}
}
// Get the storage.
func (s *ReadyStorage) Get() *tsdb.DB {
if x := s.get(); x != nil {
return x.db
}
return nil
}
func (s *ReadyStorage) get() *adapter {
s.mtx.RLock()
x := s.a
s.mtx.RUnlock()
return x
}
// StartTime implements the Storage interface.
func (s *ReadyStorage) StartTime() (int64, error) {
if x := s.get(); x != nil {
return x.StartTime()
}
return int64(model.Latest), ErrNotReady
}
// Querier implements the Storage interface.
func (s *ReadyStorage) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
if x := s.get(); x != nil {
return x.Querier(ctx, mint, maxt)
}
return nil, ErrNotReady
}
// Appender implements the Storage interface.
func (s *ReadyStorage) Appender() (storage.Appender, error) {
if x := s.get(); x != nil {
return x.Appender()
}
return nil, ErrNotReady
}
// Close implements the Storage interface.
func (s *ReadyStorage) Close() error {
if x := s.Get(); x != nil {
return x.Close()
}
return nil
}
// Adapter return an adapter as storage.Storage.
func Adapter(db *tsdb.DB, startTimeMargin int64) storage.Storage {
return &adapter{db: db, startTimeMargin: startTimeMargin}
}
// adapter implements a storage.Storage around TSDB.
type adapter struct {
db *tsdb.DB
startTimeMargin int64
}
// Options of the DB storage.
type Options struct {
// The interval at which the write ahead log is flushed to disc.
WALFlushInterval time.Duration
// The timestamp range of head blocks after which they get persisted.
// It's the minimum duration of any persisted block.
MinBlockDuration model.Duration
// The maximum timestamp range of compacted blocks.
MaxBlockDuration model.Duration
// Duration for how long to retain data.
Retention model.Duration
// Disable creation and consideration of lockfile.
NoLockfile bool
}
// Open returns a new storage backed by a TSDB database that is configured for Prometheus.
func Open(path string, l log.Logger, r prometheus.Registerer, opts *Options) (*tsdb.DB, error) {
// Start with smallest block duration and create exponential buckets until the exceed the
// configured maximum block duration.
rngs := tsdb.ExponentialBlockRanges(int64(time.Duration(opts.MinBlockDuration).Seconds()*1000), 10, 3)
for i, v := range rngs {
if v > int64(time.Duration(opts.MaxBlockDuration).Seconds()*1000) {
rngs = rngs[:i]
break
}
}
db, err := tsdb.Open(path, l, r, &tsdb.Options{
WALFlushInterval: 10 * time.Second,
RetentionDuration: uint64(time.Duration(opts.Retention).Seconds() * 1000),
BlockRanges: rngs,
NoLockfile: opts.NoLockfile,
})
if err != nil {
return nil, err
}
return db, nil
}
// StartTime implements the Storage interface.
func (a adapter) StartTime() (int64, error) {
startTime := int64(model.Latest)
var indexr tsdb.IndexReader
if len(a.db.Blocks()) > 0 {
var err error
indexr, err = a.db.Blocks()[0].Index()
if err != nil {
return startTime, err
}
} else {
var err error
indexr, err = a.db.Head().Index()
if err != nil {
return startTime, err
}
}
joblabel := "job"
tpls, err := indexr.LabelValues(joblabel)
if err != nil {
return startTime, err
}
for i := 0; i < tpls.Len(); i++ {
vals, err := tpls.At(i)
if err != nil {
continue
}
for _, v := range vals {
p, err := indexr.Postings(joblabel, v)
if err != nil {
continue
}
if p.Next() {
var lset tsdbLabels.Labels
var chks []tsdb.ChunkMeta
indexr.Series(p.At(), &lset, &chks)
if startTime > chks[0].MinTime {
startTime = chks[0].MinTime
}
}
}
}
// Add a safety margin as it may take a few minutes for everything to spin up.
return startTime + a.startTimeMargin, nil
}
func (a adapter) Querier(_ context.Context, mint, maxt int64) (storage.Querier, error) {
q, err := a.db.Querier(mint, maxt)
if err != nil {
return nil, err
}
return querier{q: q}, nil
}
// Appender returns a new appender against the storage.
func (a adapter) Appender() (storage.Appender, error) {
return appender{a: a.db.Appender()}, nil
}
// Close closes the storage and all its underlying resources.
func (a adapter) Close() error {
return a.db.Close()
}
type querier struct {
q tsdb.Querier
}
func (q querier) Select(oms ...*labels.Matcher) storage.SeriesSet {
ms := make([]tsdbLabels.Matcher, 0, len(oms))
for _, om := range oms {
ms = append(ms, convertMatcher(om))
}
return seriesSet{set: q.q.Select(ms...)}
}
func (q querier) LabelValues(name string) ([]string, error) { return q.q.LabelValues(name) }
func (q querier) Close() error { return q.q.Close() }
type seriesSet struct {
set tsdb.SeriesSet
}
func (s seriesSet) Next() bool { return s.set.Next() }
func (s seriesSet) Err() error { return s.set.Err() }
func (s seriesSet) At() storage.Series { return series{s: s.set.At()} }
type series struct {
s tsdb.Series
}
func (s series) Labels() labels.Labels { return toLabels(s.s.Labels()) }
func (s series) Iterator() storage.SeriesIterator { return storage.SeriesIterator(s.s.Iterator()) }
type appender struct {
a tsdb.Appender
}
func (a appender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
ref, err := a.a.Add(toTSDBLabels(lset), t, v)
switch errors.Cause(err) {
case tsdb.ErrNotFound:
return 0, storage.ErrNotFound
case tsdb.ErrOutOfOrderSample:
return 0, storage.ErrOutOfOrderSample
case tsdb.ErrAmendSample:
return 0, storage.ErrDuplicateSampleForTimestamp
case tsdb.ErrOutOfBounds:
return 0, storage.ErrOutOfBounds
}
return ref, err
}
func (a appender) AddFast(_ labels.Labels, ref uint64, t int64, v float64) error {
err := a.a.AddFast(ref, t, v)
switch errors.Cause(err) {
case tsdb.ErrNotFound:
return storage.ErrNotFound
case tsdb.ErrOutOfOrderSample:
return storage.ErrOutOfOrderSample
case tsdb.ErrAmendSample:
return storage.ErrDuplicateSampleForTimestamp
case tsdb.ErrOutOfBounds:
return storage.ErrOutOfBounds
}
return err
}
func (a appender) Commit() error { return a.a.Commit() }
func (a appender) Rollback() error { return a.a.Rollback() }
func convertMatcher(m *labels.Matcher) tsdbLabels.Matcher {
switch m.Type {
case labels.MatchEqual:
return tsdbLabels.NewEqualMatcher(m.Name, m.Value)
case labels.MatchNotEqual:
return tsdbLabels.Not(tsdbLabels.NewEqualMatcher(m.Name, m.Value))
case labels.MatchRegexp:
res, err := tsdbLabels.NewRegexpMatcher(m.Name, "^(?:"+m.Value+")$")
if err != nil {
panic(err)
}
return res
case labels.MatchNotRegexp:
res, err := tsdbLabels.NewRegexpMatcher(m.Name, "^(?:"+m.Value+")$")
if err != nil {
panic(err)
}
return tsdbLabels.Not(res)
}
panic("storage.convertMatcher: invalid matcher type")
}
func toTSDBLabels(l labels.Labels) tsdbLabels.Labels {
return *(*tsdbLabels.Labels)(unsafe.Pointer(&l))
}
func toLabels(l tsdbLabels.Labels) labels.Labels {
return *(*labels.Labels)(unsafe.Pointer(&l))
}