Add tsdb startup duration metric (#7737)

* Add tsdb wal replay duration metric

Signed-off-by: Max Neverov <neverov.max@gmail.com>
This commit is contained in:
Max Neverov 2020-09-21 18:25:05 +02:00 committed by GitHub
parent 77c784ac93
commit 7e1c27b853
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -110,6 +110,7 @@ type headMetrics struct {
outOfOrderSamples prometheus.Counter outOfOrderSamples prometheus.Counter
walTruncateDuration prometheus.Summary walTruncateDuration prometheus.Summary
walCorruptionsTotal prometheus.Counter walCorruptionsTotal prometheus.Counter
walTotalReplayDuration prometheus.Gauge
headTruncateFail prometheus.Counter headTruncateFail prometheus.Counter
headTruncateTotal prometheus.Counter headTruncateTotal prometheus.Counter
checkpointDeleteFail prometheus.Counter checkpointDeleteFail prometheus.Counter
@ -167,6 +168,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_wal_corruptions_total", Name: "prometheus_tsdb_wal_corruptions_total",
Help: "Total number of WAL corruptions.", Help: "Total number of WAL corruptions.",
}), }),
walTotalReplayDuration: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "prometheus_tsdb_data_replay_duration_seconds",
Help: "Time taken to replay the data on disk.",
}),
samplesAppended: prometheus.NewCounter(prometheus.CounterOpts{ samplesAppended: prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_head_samples_appended_total", Name: "prometheus_tsdb_head_samples_appended_total",
Help: "Total number of appended samples.", Help: "Total number of appended samples.",
@ -222,6 +227,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
m.gcDuration, m.gcDuration,
m.walTruncateDuration, m.walTruncateDuration,
m.walCorruptionsTotal, m.walCorruptionsTotal,
m.walTotalReplayDuration,
m.samplesAppended, m.samplesAppended,
m.outOfBoundSamples, m.outOfBoundSamples,
m.outOfOrderSamples, m.outOfOrderSamples,
@ -713,11 +719,13 @@ func (h *Head) Init(minValidTime int64) error {
level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last) level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last)
} }
walReplayDuration := time.Since(start)
h.metrics.walTotalReplayDuration.Set(walReplayDuration.Seconds())
level.Info(h.logger).Log( level.Info(h.logger).Log(
"msg", "WAL replay completed", "msg", "WAL replay completed",
"checkpoint_replay_duration", checkpointReplayDuration.String(), "checkpoint_replay_duration", checkpointReplayDuration.String(),
"wal_replay_duration", time.Since(walReplayStart).String(), "wal_replay_duration", time.Since(walReplayStart).String(),
"total_replay_duration", time.Since(start).String(), "total_replay_duration", walReplayDuration.String(),
) )
return nil return nil