mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
Fix panic during tsdb Commit (#13092)
* Fix panic during tsdb Commit Fixes the following panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x19deb45] goroutine 651118930 [running]: github.com/prometheus/prometheus/tsdb.(*headAppender).Commit(0xc19100f7c0) /drone/src/vendor/github.com/prometheus/prometheus/tsdb/head_append.go:855 +0x245 github.com/prometheus/prometheus/tsdb.dbAppender.Commit({{0x35bd6f0?, 0xc19100f7c0?}, 0xc000fa4c00?}) /drone/src/vendor/github.com/prometheus/prometheus/tsdb/db.go:1159 +0x2f We theorize that the panic happened due the the series referenced by the exemplar being removed between AppendExemplar and Commit due to being idle. Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
parent
39a35d92bc
commit
acc114fe55
|
@ -751,6 +751,12 @@ func (a *headAppender) Commit() (err error) {
|
|||
// No errors logging to WAL, so pass the exemplars along to the in memory storage.
|
||||
for _, e := range a.exemplars {
|
||||
s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref))
|
||||
if s == nil {
|
||||
// This is very unlikely to happen, but we have seen it in the wild.
|
||||
// It means that the series was truncated between AppendExemplar and Commit.
|
||||
// See TestHeadCompactionWhileAppendAndCommitExemplar.
|
||||
continue
|
||||
}
|
||||
// We don't instrument exemplar appends here, all is instrumented by storage.
|
||||
if err := a.head.exemplars.AddExemplar(s.lset, e.exemplar); err != nil {
|
||||
if err == storage.ErrOutOfOrderExemplar {
|
||||
|
|
|
@ -5514,3 +5514,31 @@ func TestWALSampleAndExemplarOrder(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeadCompactionWhileAppendAndCommitExemplar simulates a use case where
|
||||
// a series is removed from the head while an exemplar is being appended to it.
|
||||
// This can happen in theory by compacting the head at the right time due to
|
||||
// a series being idle.
|
||||
// The test cheats a little bit by not appending a sample with the exemplar.
|
||||
// If you also add a sample and run Truncate in a concurrent goroutine and run
|
||||
// the test around a million(!) times, you can get
|
||||
// `unknown HeadSeriesRef when trying to add exemplar: 1` error on push.
|
||||
// It is likely that running the test for much longer and with more time variations
|
||||
// would trigger the
|
||||
// `signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0xbb03d1`
|
||||
// panic, that we have seen in the wild once.
|
||||
func TestHeadCompactionWhileAppendAndCommitExemplar(t *testing.T) {
|
||||
h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false)
|
||||
app := h.Appender(context.Background())
|
||||
lbls := labels.FromStrings("foo", "bar")
|
||||
ref, err := app.Append(0, lbls, 1, 1)
|
||||
require.NoError(t, err)
|
||||
app.Commit()
|
||||
// Not adding a sample here to trigger the fault.
|
||||
app = h.Appender(context.Background())
|
||||
_, err = app.AppendExemplar(ref, lbls, exemplar.Exemplar{Value: 1, Ts: 20})
|
||||
require.NoError(t, err)
|
||||
h.Truncate(10)
|
||||
app.Commit()
|
||||
h.Close()
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue