re-add the missing prometheus_tsdb_wal_corruptions_total (#473)

closes https://github.com/prometheus/tsdb/issues/471

after implementing the new WAL this metric was missing so adding it again.
Also added it in a test to make sure it works as expected.

Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
This commit is contained in:
Krasi Georgiev 2018-12-18 13:24:56 +03:00 committed by GitHub
parent 79869d9a4d
commit 520ab7dc53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 1 deletions

View file

@ -89,6 +89,7 @@ type headMetrics struct {
maxTime prometheus.GaugeFunc maxTime prometheus.GaugeFunc
samplesAppended prometheus.Counter samplesAppended prometheus.Counter
walTruncateDuration prometheus.Summary walTruncateDuration prometheus.Summary
walCorruptionsTotal prometheus.Counter
headTruncateFail prometheus.Counter headTruncateFail prometheus.Counter
headTruncateTotal prometheus.Counter headTruncateTotal prometheus.Counter
checkpointDeleteFail prometheus.Counter checkpointDeleteFail prometheus.Counter
@ -152,6 +153,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_wal_truncate_duration_seconds", Name: "prometheus_tsdb_wal_truncate_duration_seconds",
Help: "Duration of WAL truncation.", Help: "Duration of WAL truncation.",
}) })
m.walCorruptionsTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_wal_corruptions_total",
Help: "Total number of WAL corruptions.",
})
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{ m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_head_samples_appended_total", Name: "prometheus_tsdb_head_samples_appended_total",
Help: "Total number of appended samples.", Help: "Total number of appended samples.",
@ -195,6 +200,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
m.maxTime, m.maxTime,
m.gcDuration, m.gcDuration,
m.walTruncateDuration, m.walTruncateDuration,
m.walCorruptionsTotal,
m.samplesAppended, m.samplesAppended,
m.headTruncateFail, m.headTruncateFail,
m.headTruncateTotal, m.headTruncateTotal,
@ -480,10 +486,10 @@ func (h *Head) Init(minValidTime int64) error {
return nil return nil
} }
level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err) level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err)
h.metrics.walCorruptionsTotal.Inc()
if err := h.wal.Repair(err); err != nil { if err := h.wal.Repair(err); err != nil {
return errors.Wrap(err, "repair corrupted WAL") return errors.Wrap(err, "repair corrupted WAL")
} }
return nil return nil
} }

View file

@ -22,6 +22,7 @@ import (
"sort" "sort"
"testing" "testing"
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/tsdb/chunkenc" "github.com/prometheus/tsdb/chunkenc"
"github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/chunks"
"github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/index"
@ -927,7 +928,9 @@ func TestWalRepair(t *testing.T) {
h, err := NewHead(nil, nil, w, 1) h, err := NewHead(nil, nil, w, 1)
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
testutil.Ok(t, h.Init(math.MinInt64)) testutil.Ok(t, h.Init(math.MinInt64))
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
sr, err := wal.NewSegmentsReader(dir) sr, err := wal.NewSegmentsReader(dir)
testutil.Ok(t, err) testutil.Ok(t, err)