mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-13 17:14:05 -08:00
bab587b9dc
Some checks failed
buf.build / lint and publish (push) Has been cancelled
CI / Go tests (push) Has been cancelled
CI / More Go tests (push) Has been cancelled
CI / Go tests with previous Go version (push) Has been cancelled
CI / UI tests (push) Has been cancelled
CI / Go tests on Windows (push) Has been cancelled
CI / Mixins tests (push) Has been cancelled
CI / Build Prometheus for common architectures (0) (push) Has been cancelled
CI / Build Prometheus for common architectures (1) (push) Has been cancelled
CI / Build Prometheus for common architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (0) (push) Has been cancelled
CI / Build Prometheus for all architectures (1) (push) Has been cancelled
CI / Build Prometheus for all architectures (10) (push) Has been cancelled
CI / Build Prometheus for all architectures (11) (push) Has been cancelled
CI / Build Prometheus for all architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (3) (push) Has been cancelled
CI / Build Prometheus for all architectures (4) (push) Has been cancelled
CI / Build Prometheus for all architectures (5) (push) Has been cancelled
CI / Build Prometheus for all architectures (6) (push) Has been cancelled
CI / Build Prometheus for all architectures (7) (push) Has been cancelled
CI / Build Prometheus for all architectures (8) (push) Has been cancelled
CI / Build Prometheus for all architectures (9) (push) Has been cancelled
CI / Check generated parser (push) Has been cancelled
CI / golangci-lint (push) Has been cancelled
CI / fuzzing (push) Has been cancelled
CI / codeql (push) Has been cancelled
Scorecards supply-chain security / Scorecards analysis (push) Has been cancelled
CI / Report status of build Prometheus for all architectures (push) Has been cancelled
CI / Publish main branch artifacts (push) Has been cancelled
CI / Publish release artefacts (push) Has been cancelled
CI / Publish UI on npm Registry (push) Has been cancelled
* Remove unused option from HeadOptions Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Improve docs for appendable() method in head appender Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Ingest CT (float) samples in Agent DB Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * allow for ingestion of CT native histogram Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * adding some verification for ct ts Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Validating CT histogram before append and add newly created series to pending series Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * checking the wal for written samples Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Checking for samples in test Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * adding case for validations Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * fixing comparison when dedupelabels is enabled Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * unite tests, use table testing Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Implement CT related methods in timestampTracker for write storage Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * adding error case to test Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * removing unused fields Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * Updating lastTs for series when adding CT to invalidate duplicates Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> * making sure that updating the lastTS wont cause OOO later on in Commit(); Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com> --------- Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>
356 lines
11 KiB
Go
356 lines
11 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package remote
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"math"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"github.com/prometheus/common/promslog"
|
|
|
|
"github.com/prometheus/prometheus/config"
|
|
"github.com/prometheus/prometheus/model/exemplar"
|
|
"github.com/prometheus/prometheus/model/histogram"
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/model/metadata"
|
|
"github.com/prometheus/prometheus/storage"
|
|
"github.com/prometheus/prometheus/tsdb/wlog"
|
|
)
|
|
|
|
var (
|
|
samplesIn = promauto.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "samples_in_total",
|
|
Help: "Samples in to remote storage, compare to samples out for queue managers.",
|
|
})
|
|
exemplarsIn = promauto.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "exemplars_in_total",
|
|
Help: "Exemplars in to remote storage, compare to exemplars out for queue managers.",
|
|
})
|
|
histogramsIn = promauto.NewCounter(prometheus.CounterOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "histograms_in_total",
|
|
Help: "HistogramSamples in to remote storage, compare to histograms out for queue managers.",
|
|
})
|
|
)
|
|
|
|
// WriteStorage represents all the remote write storage.
|
|
type WriteStorage struct {
|
|
logger *slog.Logger
|
|
reg prometheus.Registerer
|
|
mtx sync.Mutex
|
|
|
|
watcherMetrics *wlog.WatcherMetrics
|
|
liveReaderMetrics *wlog.LiveReaderMetrics
|
|
externalLabels labels.Labels
|
|
dir string
|
|
queues map[string]*QueueManager
|
|
metadataInWAL bool
|
|
samplesIn *ewmaRate
|
|
flushDeadline time.Duration
|
|
interner *pool
|
|
scraper ReadyScrapeManager
|
|
quit chan struct{}
|
|
|
|
// For timestampTracker.
|
|
highestTimestamp *maxTimestamp
|
|
}
|
|
|
|
// NewWriteStorage creates and runs a WriteStorage.
|
|
func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage {
|
|
if logger == nil {
|
|
logger = promslog.NewNopLogger()
|
|
}
|
|
rws := &WriteStorage{
|
|
queues: make(map[string]*QueueManager),
|
|
watcherMetrics: wlog.NewWatcherMetrics(reg),
|
|
liveReaderMetrics: wlog.NewLiveReaderMetrics(reg),
|
|
logger: logger,
|
|
reg: reg,
|
|
flushDeadline: flushDeadline,
|
|
samplesIn: newEWMARate(ewmaWeight, shardUpdateDuration),
|
|
dir: dir,
|
|
interner: newPool(),
|
|
scraper: sm,
|
|
quit: make(chan struct{}),
|
|
metadataInWAL: metadataInWal,
|
|
highestTimestamp: &maxTimestamp{
|
|
Gauge: prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Namespace: namespace,
|
|
Subsystem: subsystem,
|
|
Name: "highest_timestamp_in_seconds",
|
|
Help: "Highest timestamp that has come into the remote storage via the Appender interface, in seconds since epoch. Initialized to 0 when no data has been received yet.",
|
|
}),
|
|
},
|
|
}
|
|
if reg != nil {
|
|
reg.MustRegister(rws.highestTimestamp)
|
|
}
|
|
go rws.run()
|
|
return rws
|
|
}
|
|
|
|
func (rws *WriteStorage) run() {
|
|
ticker := time.NewTicker(shardUpdateDuration)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
rws.samplesIn.tick()
|
|
case <-rws.quit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (rws *WriteStorage) Notify() {
|
|
rws.mtx.Lock()
|
|
defer rws.mtx.Unlock()
|
|
|
|
for _, q := range rws.queues {
|
|
// These should all be non blocking
|
|
q.watcher.Notify()
|
|
}
|
|
}
|
|
|
|
// ApplyConfig updates the state as the new config requires.
|
|
// Only stop & create queues which have changes.
|
|
func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {
|
|
rws.mtx.Lock()
|
|
defer rws.mtx.Unlock()
|
|
|
|
// Remote write queues only need to change if the remote write config or
|
|
// external labels change.
|
|
externalLabelUnchanged := labels.Equal(conf.GlobalConfig.ExternalLabels, rws.externalLabels)
|
|
rws.externalLabels = conf.GlobalConfig.ExternalLabels
|
|
|
|
newQueues := make(map[string]*QueueManager)
|
|
newHashes := []string{}
|
|
for _, rwConf := range conf.RemoteWriteConfigs {
|
|
if rwConf.ProtobufMessage == config.RemoteWriteProtoMsgV2 && !rws.metadataInWAL {
|
|
return errors.New("invalid remote write configuration, if you are using remote write version 2.0 the `--enable-feature=metadata-wal-records` feature flag must be enabled")
|
|
}
|
|
hash, err := toHash(rwConf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Don't allow duplicate remote write configs.
|
|
if _, ok := newQueues[hash]; ok {
|
|
return fmt.Errorf("duplicate remote write configs are not allowed, found duplicate for URL: %s", rwConf.URL)
|
|
}
|
|
|
|
// Set the queue name to the config hash if the user has not set
|
|
// a name in their remote write config so we can still differentiate
|
|
// between queues that have the same remote write endpoint.
|
|
name := hash[:6]
|
|
if rwConf.Name != "" {
|
|
name = rwConf.Name
|
|
}
|
|
|
|
c, err := NewWriteClient(name, &ClientConfig{
|
|
URL: rwConf.URL,
|
|
WriteProtoMsg: rwConf.ProtobufMessage,
|
|
Timeout: rwConf.RemoteTimeout,
|
|
HTTPClientConfig: rwConf.HTTPClientConfig,
|
|
SigV4Config: rwConf.SigV4Config,
|
|
AzureADConfig: rwConf.AzureADConfig,
|
|
GoogleIAMConfig: rwConf.GoogleIAMConfig,
|
|
Headers: rwConf.Headers,
|
|
RetryOnRateLimit: rwConf.QueueConfig.RetryOnRateLimit,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
queue, ok := rws.queues[hash]
|
|
if externalLabelUnchanged && ok {
|
|
// Update the client in case any secret configuration has changed.
|
|
queue.SetClient(c)
|
|
newQueues[hash] = queue
|
|
delete(rws.queues, hash)
|
|
continue
|
|
}
|
|
|
|
// Redacted to remove any passwords in the URL (that are
|
|
// technically accepted but not recommended) since this is
|
|
// only used for metric labels.
|
|
endpoint := rwConf.URL.Redacted()
|
|
newQueues[hash] = NewQueueManager(
|
|
newQueueManagerMetrics(rws.reg, name, endpoint),
|
|
rws.watcherMetrics,
|
|
rws.liveReaderMetrics,
|
|
rws.logger,
|
|
rws.dir,
|
|
rws.samplesIn,
|
|
rwConf.QueueConfig,
|
|
rwConf.MetadataConfig,
|
|
conf.GlobalConfig.ExternalLabels,
|
|
rwConf.WriteRelabelConfigs,
|
|
c,
|
|
rws.flushDeadline,
|
|
rws.interner,
|
|
rws.highestTimestamp,
|
|
rws.scraper,
|
|
rwConf.SendExemplars,
|
|
rwConf.SendNativeHistograms,
|
|
rwConf.ProtobufMessage,
|
|
)
|
|
// Keep track of which queues are new so we know which to start.
|
|
newHashes = append(newHashes, hash)
|
|
}
|
|
|
|
// Anything remaining in rws.queues is a queue who's config has
|
|
// changed or was removed from the overall remote write config.
|
|
for _, q := range rws.queues {
|
|
q.Stop()
|
|
}
|
|
|
|
for _, hash := range newHashes {
|
|
newQueues[hash].Start()
|
|
}
|
|
|
|
rws.queues = newQueues
|
|
|
|
return nil
|
|
}
|
|
|
|
// Appender implements storage.Storage.
|
|
func (rws *WriteStorage) Appender(_ context.Context) storage.Appender {
|
|
return ×tampTracker{
|
|
writeStorage: rws,
|
|
highestRecvTimestamp: rws.highestTimestamp,
|
|
}
|
|
}
|
|
|
|
// LowestSentTimestamp returns the lowest sent timestamp across all queues.
|
|
func (rws *WriteStorage) LowestSentTimestamp() int64 {
|
|
rws.mtx.Lock()
|
|
defer rws.mtx.Unlock()
|
|
|
|
var lowestTs int64 = math.MaxInt64
|
|
|
|
for _, q := range rws.queues {
|
|
ts := int64(q.metrics.highestSentTimestamp.Get() * 1000)
|
|
if ts < lowestTs {
|
|
lowestTs = ts
|
|
}
|
|
}
|
|
if len(rws.queues) == 0 {
|
|
lowestTs = 0
|
|
}
|
|
|
|
return lowestTs
|
|
}
|
|
|
|
// Close closes the WriteStorage.
|
|
func (rws *WriteStorage) Close() error {
|
|
rws.mtx.Lock()
|
|
defer rws.mtx.Unlock()
|
|
for _, q := range rws.queues {
|
|
q.Stop()
|
|
}
|
|
close(rws.quit)
|
|
return nil
|
|
}
|
|
|
|
type timestampTracker struct {
|
|
writeStorage *WriteStorage
|
|
appendOptions *storage.AppendOptions
|
|
samples int64
|
|
exemplars int64
|
|
histograms int64
|
|
highestTimestamp int64
|
|
highestRecvTimestamp *maxTimestamp
|
|
}
|
|
|
|
func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) {
|
|
t.appendOptions = opts
|
|
}
|
|
|
|
// Append implements storage.Appender.
|
|
func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) {
|
|
t.samples++
|
|
if ts > t.highestTimestamp {
|
|
t.highestTimestamp = ts
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (t *timestampTracker) AppendExemplar(_ storage.SeriesRef, _ labels.Labels, _ exemplar.Exemplar) (storage.SeriesRef, error) {
|
|
t.exemplars++
|
|
return 0, nil
|
|
}
|
|
|
|
func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, ts int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) {
|
|
t.histograms++
|
|
if ts > t.highestTimestamp {
|
|
t.highestTimestamp = ts
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) {
|
|
t.samples++
|
|
if ct > t.highestTimestamp {
|
|
// Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far.
|
|
// However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this.
|
|
t.highestTimestamp = ct
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) {
|
|
t.histograms++
|
|
if ct > t.highestTimestamp {
|
|
// Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far.
|
|
// However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this.
|
|
t.highestTimestamp = ct
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) {
|
|
// TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write.
|
|
// UpdateMetadata is no-op for remote write (where timestampTracker is being used) for now.
|
|
return 0, nil
|
|
}
|
|
|
|
// Commit implements storage.Appender.
|
|
func (t *timestampTracker) Commit() error {
|
|
t.writeStorage.samplesIn.incr(t.samples + t.exemplars + t.histograms)
|
|
|
|
samplesIn.Add(float64(t.samples))
|
|
exemplarsIn.Add(float64(t.exemplars))
|
|
histogramsIn.Add(float64(t.histograms))
|
|
t.highestRecvTimestamp.Set(float64(t.highestTimestamp / 1000))
|
|
return nil
|
|
}
|
|
|
|
// Rollback implements storage.Appender.
|
|
func (*timestampTracker) Rollback() error {
|
|
return nil
|
|
}
|