mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 13:44:05 -08:00
tsdb: turn off transaction isolation for head compaction (#11317)
* tsdb: add a basic test for read/write isolation * tsdb: store the min time with isolationAppender So that we can see when appending has moved past a certain point in time. * tsdb: allow RangeHead to have isolation disabled This will be used when for head compaction. * tsdb: do head compaction with isolation disabled This saves a lot of work tracking appends done while compaction is ongoing. Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
parent
d0607435a2
commit
ff00dee262
10
tsdb/db.go
10
tsdb/db.go
|
@ -1059,7 +1059,15 @@ func (db *DB) Compact() (returnErr error) {
|
||||||
// so in order to make sure that overlaps are evaluated
|
// so in order to make sure that overlaps are evaluated
|
||||||
// consistently, we explicitly remove the last value
|
// consistently, we explicitly remove the last value
|
||||||
// from the block interval here.
|
// from the block interval here.
|
||||||
if err := db.compactHead(NewRangeHead(db.head, mint, maxt-1)); err != nil {
|
rh := NewRangeHeadWithIsolationDisabled(db.head, mint, maxt-1)
|
||||||
|
|
||||||
|
// Compaction runs with isolation disabled, because head.compactable()
|
||||||
|
// ensures that maxt is more than chunkRange/2 back from now, and
|
||||||
|
// head.appendableMinValidTime() ensures that no new appends can start within the compaction range.
|
||||||
|
// We do need to wait for any overlapping appenders that started previously to finish.
|
||||||
|
db.head.WaitForAppendersOverlapping(rh.MaxTime())
|
||||||
|
|
||||||
|
if err := db.compactHead(rh); err != nil {
|
||||||
return errors.Wrap(err, "compact head")
|
return errors.Wrap(err, "compact head")
|
||||||
}
|
}
|
||||||
// Consider only successful compactions for WAL truncation.
|
// Consider only successful compactions for WAL truncation.
|
||||||
|
|
22
tsdb/head.go
22
tsdb/head.go
|
@ -1030,6 +1030,13 @@ func (h *Head) WaitForPendingReadersInTimeRange(mint, maxt int64) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WaitForAppendersOverlapping waits for appends overlapping maxt to finish.
|
||||||
|
func (h *Head) WaitForAppendersOverlapping(maxt int64) {
|
||||||
|
for maxt >= h.iso.lowestAppendTime() {
|
||||||
|
time.Sleep(500 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// IsQuerierCollidingWithTruncation returns if the current querier needs to be closed and if a new querier
|
// IsQuerierCollidingWithTruncation returns if the current querier needs to be closed and if a new querier
|
||||||
// has to be created. In the latter case, the method also returns the new mint to be used for creating the
|
// has to be created. In the latter case, the method also returns the new mint to be used for creating the
|
||||||
// new range head and the new querier. This methods helps preventing races with the truncation of in-memory data.
|
// new range head and the new querier. This methods helps preventing races with the truncation of in-memory data.
|
||||||
|
@ -1235,6 +1242,8 @@ func (h *Head) Stats(statsByLabelName string) *Stats {
|
||||||
type RangeHead struct {
|
type RangeHead struct {
|
||||||
head *Head
|
head *Head
|
||||||
mint, maxt int64
|
mint, maxt int64
|
||||||
|
|
||||||
|
isolationOff bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRangeHead returns a *RangeHead.
|
// NewRangeHead returns a *RangeHead.
|
||||||
|
@ -1247,12 +1256,23 @@ func NewRangeHead(head *Head, mint, maxt int64) *RangeHead {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewRangeHeadWithIsolationDisabled returns a *RangeHead that does not create an isolationState.
|
||||||
|
func NewRangeHeadWithIsolationDisabled(head *Head, mint, maxt int64) *RangeHead {
|
||||||
|
rh := NewRangeHead(head, mint, maxt)
|
||||||
|
rh.isolationOff = true
|
||||||
|
return rh
|
||||||
|
}
|
||||||
|
|
||||||
func (h *RangeHead) Index() (IndexReader, error) {
|
func (h *RangeHead) Index() (IndexReader, error) {
|
||||||
return h.head.indexRange(h.mint, h.maxt), nil
|
return h.head.indexRange(h.mint, h.maxt), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *RangeHead) Chunks() (ChunkReader, error) {
|
func (h *RangeHead) Chunks() (ChunkReader, error) {
|
||||||
return h.head.chunksRange(h.mint, h.maxt, h.head.iso.State(h.mint, h.maxt))
|
var isoState *isolationState
|
||||||
|
if !h.isolationOff {
|
||||||
|
isoState = h.head.iso.State(h.mint, h.maxt)
|
||||||
|
}
|
||||||
|
return h.head.chunksRange(h.mint, h.maxt, isoState)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *RangeHead) Tombstones() (tombstones.Reader, error) {
|
func (h *RangeHead) Tombstones() (tombstones.Reader, error) {
|
||||||
|
|
|
@ -124,7 +124,8 @@ func (h *Head) Appender(_ context.Context) storage.Appender {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Head) appender() *headAppender {
|
func (h *Head) appender() *headAppender {
|
||||||
appendID, cleanupAppendIDsBelow := h.iso.newAppendID() // Every appender gets an ID that is cleared upon commit/rollback.
|
minValidTime := h.appendableMinValidTime()
|
||||||
|
appendID, cleanupAppendIDsBelow := h.iso.newAppendID(minValidTime) // Every appender gets an ID that is cleared upon commit/rollback.
|
||||||
|
|
||||||
// Allocate the exemplars buffer only if exemplars are enabled.
|
// Allocate the exemplars buffer only if exemplars are enabled.
|
||||||
var exemplarsBuf []exemplarWithSeriesRef
|
var exemplarsBuf []exemplarWithSeriesRef
|
||||||
|
@ -134,7 +135,7 @@ func (h *Head) appender() *headAppender {
|
||||||
|
|
||||||
return &headAppender{
|
return &headAppender{
|
||||||
head: h,
|
head: h,
|
||||||
minValidTime: h.appendableMinValidTime(),
|
minValidTime: minValidTime,
|
||||||
mint: math.MaxInt64,
|
mint: math.MaxInt64,
|
||||||
maxt: math.MinInt64,
|
maxt: math.MinInt64,
|
||||||
headMaxt: h.MaxTime(),
|
headMaxt: h.MaxTime(),
|
||||||
|
|
|
@ -262,7 +262,9 @@ type headChunkReader struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *headChunkReader) Close() error {
|
func (h *headChunkReader) Close() error {
|
||||||
h.isoState.Close()
|
if h.isoState != nil {
|
||||||
|
h.isoState.Close()
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
package tsdb
|
package tsdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@ func (i *isolationState) IsolationDisabled() bool {
|
||||||
|
|
||||||
type isolationAppender struct {
|
type isolationAppender struct {
|
||||||
appendID uint64
|
appendID uint64
|
||||||
|
minTime int64
|
||||||
prev *isolationAppender
|
prev *isolationAppender
|
||||||
next *isolationAppender
|
next *isolationAppender
|
||||||
}
|
}
|
||||||
|
@ -116,6 +118,21 @@ func (i *isolation) lowWatermarkLocked() uint64 {
|
||||||
return i.appendsOpenList.next.appendID
|
return i.appendsOpenList.next.appendID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// lowestAppendTime returns the lowest minTime for any open appender,
|
||||||
|
// or math.MaxInt64 if no open appenders.
|
||||||
|
func (i *isolation) lowestAppendTime() int64 {
|
||||||
|
var lowest int64 = math.MaxInt64
|
||||||
|
i.appendMtx.RLock()
|
||||||
|
defer i.appendMtx.RUnlock()
|
||||||
|
|
||||||
|
for a := i.appendsOpenList.next; a != i.appendsOpenList; a = a.next {
|
||||||
|
if lowest > a.minTime {
|
||||||
|
lowest = a.minTime
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lowest
|
||||||
|
}
|
||||||
|
|
||||||
// State returns an object used to control isolation
|
// State returns an object used to control isolation
|
||||||
// between a query and appends. Must be closed when complete.
|
// between a query and appends. Must be closed when complete.
|
||||||
func (i *isolation) State(mint, maxt int64) *isolationState {
|
func (i *isolation) State(mint, maxt int64) *isolationState {
|
||||||
|
@ -164,7 +181,7 @@ func (i *isolation) TraverseOpenReads(f func(s *isolationState) bool) {
|
||||||
// newAppendID increments the transaction counter and returns a new transaction
|
// newAppendID increments the transaction counter and returns a new transaction
|
||||||
// ID. The first ID returned is 1.
|
// ID. The first ID returned is 1.
|
||||||
// Also returns the low watermark, to keep lock/unlock operations down.
|
// Also returns the low watermark, to keep lock/unlock operations down.
|
||||||
func (i *isolation) newAppendID() (uint64, uint64) {
|
func (i *isolation) newAppendID(minTime int64) (uint64, uint64) {
|
||||||
if i.disabled {
|
if i.disabled {
|
||||||
return 0, 0
|
return 0, 0
|
||||||
}
|
}
|
||||||
|
@ -177,6 +194,7 @@ func (i *isolation) newAppendID() (uint64, uint64) {
|
||||||
|
|
||||||
app := i.appendersPool.Get().(*isolationAppender)
|
app := i.appendersPool.Get().(*isolationAppender)
|
||||||
app.appendID = i.appendsOpenList.appendID
|
app.appendID = i.appendsOpenList.appendID
|
||||||
|
app.minTime = minTime
|
||||||
app.prev = i.appendsOpenList.prev
|
app.prev = i.appendsOpenList.prev
|
||||||
app.next = i.appendsOpenList
|
app.next = i.appendsOpenList
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,67 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestIsolation(t *testing.T) {
|
||||||
|
type result struct {
|
||||||
|
id uint64
|
||||||
|
lowWatermark uint64
|
||||||
|
}
|
||||||
|
var appendA, appendB result
|
||||||
|
iso := newIsolation(false)
|
||||||
|
|
||||||
|
// Low watermark starts at 1.
|
||||||
|
require.Equal(t, uint64(0), iso.lowWatermark())
|
||||||
|
require.Equal(t, int64(math.MaxInt64), iso.lowestAppendTime())
|
||||||
|
|
||||||
|
// Pretend we are starting to append.
|
||||||
|
appendA.id, appendA.lowWatermark = iso.newAppendID(10)
|
||||||
|
require.Equal(t, result{1, 1}, appendA)
|
||||||
|
require.Equal(t, uint64(1), iso.lowWatermark())
|
||||||
|
|
||||||
|
require.Equal(t, 0, countOpenReads(iso))
|
||||||
|
require.Equal(t, int64(10), iso.lowestAppendTime())
|
||||||
|
|
||||||
|
// Now we start a read.
|
||||||
|
stateA := iso.State(10, 20)
|
||||||
|
require.Equal(t, 1, countOpenReads(iso))
|
||||||
|
|
||||||
|
// Second appender.
|
||||||
|
appendB.id, appendB.lowWatermark = iso.newAppendID(20)
|
||||||
|
require.Equal(t, result{2, 1}, appendB)
|
||||||
|
require.Equal(t, uint64(1), iso.lowWatermark())
|
||||||
|
require.Equal(t, int64(10), iso.lowestAppendTime())
|
||||||
|
|
||||||
|
iso.closeAppend(appendA.id)
|
||||||
|
// Low watermark remains at 1 because stateA is still open
|
||||||
|
require.Equal(t, uint64(1), iso.lowWatermark())
|
||||||
|
|
||||||
|
require.Equal(t, 1, countOpenReads(iso))
|
||||||
|
require.Equal(t, int64(20), iso.lowestAppendTime())
|
||||||
|
|
||||||
|
// Finish the read and low watermark should rise.
|
||||||
|
stateA.Close()
|
||||||
|
require.Equal(t, uint64(2), iso.lowWatermark())
|
||||||
|
|
||||||
|
require.Equal(t, 0, countOpenReads(iso))
|
||||||
|
|
||||||
|
iso.closeAppend(appendB.id)
|
||||||
|
require.Equal(t, uint64(2), iso.lowWatermark())
|
||||||
|
require.Equal(t, int64(math.MaxInt64), iso.lowestAppendTime())
|
||||||
|
}
|
||||||
|
|
||||||
|
func countOpenReads(iso *isolation) int {
|
||||||
|
count := 0
|
||||||
|
iso.TraverseOpenReads(func(s *isolationState) bool {
|
||||||
|
count++
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkIsolation(b *testing.B) {
|
func BenchmarkIsolation(b *testing.B) {
|
||||||
for _, goroutines := range []int{10, 100, 1000, 10000} {
|
for _, goroutines := range []int{10, 100, 1000, 10000} {
|
||||||
b.Run(strconv.Itoa(goroutines), func(b *testing.B) {
|
b.Run(strconv.Itoa(goroutines), func(b *testing.B) {
|
||||||
|
@ -36,7 +95,7 @@ func BenchmarkIsolation(b *testing.B) {
|
||||||
<-start
|
<-start
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
appendID, _ := iso.newAppendID()
|
appendID, _ := iso.newAppendID(0)
|
||||||
|
|
||||||
iso.closeAppend(appendID)
|
iso.closeAppend(appendID)
|
||||||
}
|
}
|
||||||
|
@ -66,7 +125,7 @@ func BenchmarkIsolationWithState(b *testing.B) {
|
||||||
<-start
|
<-start
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
appendID, _ := iso.newAppendID()
|
appendID, _ := iso.newAppendID(0)
|
||||||
|
|
||||||
iso.closeAppend(appendID)
|
iso.closeAppend(appendID)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue