mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 13:57:36 -08:00
re-add the missing prometheus_tsdb_wal_corruptions_total (#473)
closes https://github.com/prometheus/tsdb/issues/471 after implementing the new WAL this metric was missing so adding it again. Also added it in a test to make sure it works as expected. Signed-off-by: Krasi Georgiev <kgeorgie@redhat.com>
This commit is contained in:
parent
79869d9a4d
commit
520ab7dc53
8
head.go
8
head.go
|
@ -89,6 +89,7 @@ type headMetrics struct {
|
||||||
maxTime prometheus.GaugeFunc
|
maxTime prometheus.GaugeFunc
|
||||||
samplesAppended prometheus.Counter
|
samplesAppended prometheus.Counter
|
||||||
walTruncateDuration prometheus.Summary
|
walTruncateDuration prometheus.Summary
|
||||||
|
walCorruptionsTotal prometheus.Counter
|
||||||
headTruncateFail prometheus.Counter
|
headTruncateFail prometheus.Counter
|
||||||
headTruncateTotal prometheus.Counter
|
headTruncateTotal prometheus.Counter
|
||||||
checkpointDeleteFail prometheus.Counter
|
checkpointDeleteFail prometheus.Counter
|
||||||
|
@ -152,6 +153,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
|
||||||
Name: "prometheus_tsdb_wal_truncate_duration_seconds",
|
Name: "prometheus_tsdb_wal_truncate_duration_seconds",
|
||||||
Help: "Duration of WAL truncation.",
|
Help: "Duration of WAL truncation.",
|
||||||
})
|
})
|
||||||
|
m.walCorruptionsTotal = prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "prometheus_tsdb_wal_corruptions_total",
|
||||||
|
Help: "Total number of WAL corruptions.",
|
||||||
|
})
|
||||||
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
|
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
Name: "prometheus_tsdb_head_samples_appended_total",
|
Name: "prometheus_tsdb_head_samples_appended_total",
|
||||||
Help: "Total number of appended samples.",
|
Help: "Total number of appended samples.",
|
||||||
|
@ -195,6 +200,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
|
||||||
m.maxTime,
|
m.maxTime,
|
||||||
m.gcDuration,
|
m.gcDuration,
|
||||||
m.walTruncateDuration,
|
m.walTruncateDuration,
|
||||||
|
m.walCorruptionsTotal,
|
||||||
m.samplesAppended,
|
m.samplesAppended,
|
||||||
m.headTruncateFail,
|
m.headTruncateFail,
|
||||||
m.headTruncateTotal,
|
m.headTruncateTotal,
|
||||||
|
@ -480,10 +486,10 @@ func (h *Head) Init(minValidTime int64) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err)
|
level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err)
|
||||||
|
h.metrics.walCorruptionsTotal.Inc()
|
||||||
if err := h.wal.Repair(err); err != nil {
|
if err := h.wal.Repair(err); err != nil {
|
||||||
return errors.Wrap(err, "repair corrupted WAL")
|
return errors.Wrap(err, "repair corrupted WAL")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
|
||||||
"github.com/prometheus/tsdb/chunkenc"
|
"github.com/prometheus/tsdb/chunkenc"
|
||||||
"github.com/prometheus/tsdb/chunks"
|
"github.com/prometheus/tsdb/chunks"
|
||||||
"github.com/prometheus/tsdb/index"
|
"github.com/prometheus/tsdb/index"
|
||||||
|
@ -927,7 +928,9 @@ func TestWalRepair(t *testing.T) {
|
||||||
|
|
||||||
h, err := NewHead(nil, nil, w, 1)
|
h, err := NewHead(nil, nil, w, 1)
|
||||||
testutil.Ok(t, err)
|
testutil.Ok(t, err)
|
||||||
|
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
|
||||||
testutil.Ok(t, h.Init(math.MinInt64))
|
testutil.Ok(t, h.Init(math.MinInt64))
|
||||||
|
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
|
||||||
|
|
||||||
sr, err := wal.NewSegmentsReader(dir)
|
sr, err := wal.NewSegmentsReader(dir)
|
||||||
testutil.Ok(t, err)
|
testutil.Ok(t, err)
|
||||||
|
|
Loading…
Reference in a new issue