From 7e1c27b853ff98c0cfdcd6b482ac0caa2403d135 Mon Sep 17 00:00:00 2001 From: Max Neverov <1296281+mneverov@users.noreply.github.com> Date: Mon, 21 Sep 2020 18:25:05 +0200 Subject: [PATCH] Add tsdb startup duration metric (#7737) * Add tsdb wal replay duration metric Signed-off-by: Max Neverov --- tsdb/head.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tsdb/head.go b/tsdb/head.go index 9366bfe149..ba5cf1a611 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -110,6 +110,7 @@ type headMetrics struct { outOfOrderSamples prometheus.Counter walTruncateDuration prometheus.Summary walCorruptionsTotal prometheus.Counter + walTotalReplayDuration prometheus.Gauge headTruncateFail prometheus.Counter headTruncateTotal prometheus.Counter checkpointDeleteFail prometheus.Counter @@ -167,6 +168,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { Name: "prometheus_tsdb_wal_corruptions_total", Help: "Total number of WAL corruptions.", }), + walTotalReplayDuration: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_data_replay_duration_seconds", + Help: "Time taken to replay the data on disk.", + }), samplesAppended: prometheus.NewCounter(prometheus.CounterOpts{ Name: "prometheus_tsdb_head_samples_appended_total", Help: "Total number of appended samples.", @@ -222,6 +227,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { m.gcDuration, m.walTruncateDuration, m.walCorruptionsTotal, + m.walTotalReplayDuration, m.samplesAppended, m.outOfBoundSamples, m.outOfOrderSamples, @@ -713,11 +719,13 @@ func (h *Head) Init(minValidTime int64) error { level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last) } + walReplayDuration := time.Since(start) + h.metrics.walTotalReplayDuration.Set(walReplayDuration.Seconds()) level.Info(h.logger).Log( "msg", "WAL replay completed", "checkpoint_replay_duration", checkpointReplayDuration.String(), "wal_replay_duration", time.Since(walReplayStart).String(), - "total_replay_duration", time.Since(start).String(), + "total_replay_duration", walReplayDuration.String(), ) return nil