From 669592a2c4d59697ce3f654db2c1e7d5e3d42714 Mon Sep 17 00:00:00 2001 From: John McBride Date: Fri, 17 Jan 2020 13:12:04 -0700 Subject: [PATCH] Exports metric for WAL write errors (#6647) * Exports metric for WAL write errors Signed-off-by: John McBride * Correct name for counter Signed-off-by: John McBride * Move WAL write failure to wal.go Signed-off-by: John McBride * WAL write fail metric moved to Log for external consumers Signed-off-by: John McBride --- tsdb/wal/wal.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tsdb/wal/wal.go b/tsdb/wal/wal.go index b0b6931744..510326b881 100644 --- a/tsdb/wal/wal.go +++ b/tsdb/wal/wal.go @@ -187,6 +187,7 @@ type walMetrics struct { truncateFail prometheus.Counter truncateTotal prometheus.Counter currentSegment prometheus.Gauge + writesFailed prometheus.Counter } func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { @@ -217,6 +218,10 @@ func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { Name: "prometheus_tsdb_wal_segment_current", Help: "WAL segment index that TSDB is currently writing to.", }) + m.writesFailed = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "prometheus_tsdb_wal_writes_failed_total", + Help: "Total number of WAL writes that failed.", + }) if r != nil { r.MustRegister( @@ -226,6 +231,7 @@ func newWALMetrics(w *WAL, r prometheus.Registerer) *walMetrics { m.truncateFail, m.truncateTotal, m.currentSegment, + m.writesFailed, ) } @@ -575,6 +581,7 @@ func (w *WAL) Log(recs ...[]byte) error { // a bit of extra logic here frees them from that overhead. for i, r := range recs { if err := w.log(r, i == len(recs)-1); err != nil { + w.metrics.writesFailed.Inc() return err } }