mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 13:57:36 -08:00
Decode WAL in Separate Goroutine (#6230)
* Make WAL replay benchmark more representative Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com> * Move decoding records from the WAL into goroutine Decoding the WAL records accounts for a significant amount of time on startup, and can be done in parallel with creating series/samples to speed up startup. However, records still must be handled in order, so only a single goroutine can do the decoding. benchmark old ns/op new ns/op delta BenchmarkLoadWAL/batches=10,seriesPerBatch=100,samplesPerSeries=7200-8 481607033 391971490 -18.61% BenchmarkLoadWAL/batches=10,seriesPerBatch=10000,samplesPerSeries=50-8 836394378 629067006 -24.79% BenchmarkLoadWAL/batches=10,seriesPerBatch=1000,samplesPerSeries=480-8 348238658 234218667 -32.74% Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
This commit is contained in:
parent
d2257cb7d8
commit
c5b3f0221f
127
tsdb/head.go
127
tsdb/head.go
|
@ -389,9 +389,6 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
dec record.Decoder
|
dec record.Decoder
|
||||||
series []record.RefSeries
|
|
||||||
samples []record.RefSample
|
|
||||||
tstones []tombstones.Stone
|
|
||||||
allStones = tombstones.NewMemTombstones()
|
allStones = tombstones.NewMemTombstones()
|
||||||
shards = make([][]record.RefSample, n)
|
shards = make([][]record.RefSample, n)
|
||||||
)
|
)
|
||||||
|
@ -400,21 +397,82 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
|
||||||
level.Warn(h.logger).Log("msg", "closing memTombstones during wal read", "err", err)
|
level.Warn(h.logger).Log("msg", "closing memTombstones during wal read", "err", err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
for r.Next() {
|
|
||||||
series, samples, tstones = series[:0], samples[:0], tstones[:0]
|
|
||||||
rec := r.Record()
|
|
||||||
|
|
||||||
switch dec.Type(rec) {
|
var (
|
||||||
case record.Series:
|
decoded = make(chan interface{}, 10)
|
||||||
series, err = dec.Series(rec, series)
|
errCh = make(chan error, 1)
|
||||||
if err != nil {
|
seriesPool = sync.Pool{
|
||||||
return &wal.CorruptionErr{
|
New: func() interface{} {
|
||||||
Err: errors.Wrap(err, "decode series"),
|
return []record.RefSeries{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
samplesPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return []record.RefSample{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
tstonesPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return []tombstones.Stone{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
go func() {
|
||||||
|
defer close(decoded)
|
||||||
|
for r.Next() {
|
||||||
|
rec := r.Record()
|
||||||
|
switch dec.Type(rec) {
|
||||||
|
case record.Series:
|
||||||
|
series := seriesPool.Get().([]record.RefSeries)[:0]
|
||||||
|
series, err = dec.Series(rec, series)
|
||||||
|
if err != nil {
|
||||||
|
errCh <- &wal.CorruptionErr{
|
||||||
|
Err: errors.Wrap(err, "decode series"),
|
||||||
|
Segment: r.Segment(),
|
||||||
|
Offset: r.Offset(),
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
decoded <- series
|
||||||
|
case record.Samples:
|
||||||
|
samples := samplesPool.Get().([]record.RefSample)[:0]
|
||||||
|
samples, err = dec.Samples(rec, samples)
|
||||||
|
if err != nil {
|
||||||
|
errCh <- &wal.CorruptionErr{
|
||||||
|
Err: errors.Wrap(err, "decode samples"),
|
||||||
|
Segment: r.Segment(),
|
||||||
|
Offset: r.Offset(),
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
decoded <- samples
|
||||||
|
case record.Tombstones:
|
||||||
|
tstones := tstonesPool.Get().([]tombstones.Stone)[:0]
|
||||||
|
tstones, err = dec.Tombstones(rec, tstones)
|
||||||
|
if err != nil {
|
||||||
|
errCh <- &wal.CorruptionErr{
|
||||||
|
Err: errors.Wrap(err, "decode tombstones"),
|
||||||
|
Segment: r.Segment(),
|
||||||
|
Offset: r.Offset(),
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
decoded <- tstones
|
||||||
|
default:
|
||||||
|
errCh <- &wal.CorruptionErr{
|
||||||
|
Err: errors.Errorf("invalid record type %v", dec.Type(rec)),
|
||||||
Segment: r.Segment(),
|
Segment: r.Segment(),
|
||||||
Offset: r.Offset(),
|
Offset: r.Offset(),
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
for _, s := range series {
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
for d := range decoded {
|
||||||
|
switch v := d.(type) {
|
||||||
|
case []record.RefSeries:
|
||||||
|
for _, s := range v {
|
||||||
series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
|
series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
|
||||||
|
|
||||||
if !created {
|
if !created {
|
||||||
|
@ -426,16 +484,10 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
|
||||||
h.lastSeriesID = s.Ref
|
h.lastSeriesID = s.Ref
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case record.Samples:
|
//lint:ignore SA6002 relax staticcheck verification.
|
||||||
samples, err = dec.Samples(rec, samples)
|
seriesPool.Put(v)
|
||||||
s := samples
|
case []record.RefSample:
|
||||||
if err != nil {
|
samples := v
|
||||||
return &wal.CorruptionErr{
|
|
||||||
Err: errors.Wrap(err, "decode samples"),
|
|
||||||
Segment: r.Segment(),
|
|
||||||
Offset: r.Offset(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We split up the samples into chunks of 5000 samples or less.
|
// We split up the samples into chunks of 5000 samples or less.
|
||||||
// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
|
// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
|
||||||
// cause thousands of very large in flight buffers occupying large amounts
|
// cause thousands of very large in flight buffers occupying large amounts
|
||||||
|
@ -465,17 +517,10 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
|
||||||
}
|
}
|
||||||
samples = samples[m:]
|
samples = samples[m:]
|
||||||
}
|
}
|
||||||
samples = s // Keep whole slice for reuse.
|
//lint:ignore SA6002 relax staticcheck verification.
|
||||||
case record.Tombstones:
|
samplesPool.Put(v)
|
||||||
tstones, err = dec.Tombstones(rec, tstones)
|
case []tombstones.Stone:
|
||||||
if err != nil {
|
for _, s := range v {
|
||||||
return &wal.CorruptionErr{
|
|
||||||
Err: errors.Wrap(err, "decode tombstones"),
|
|
||||||
Segment: r.Segment(),
|
|
||||||
Offset: r.Offset(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range tstones {
|
|
||||||
for _, itv := range s.Intervals {
|
for _, itv := range s.Intervals {
|
||||||
if itv.Maxt < h.minValidTime {
|
if itv.Maxt < h.minValidTime {
|
||||||
continue
|
continue
|
||||||
|
@ -487,15 +532,19 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
|
||||||
allStones.AddInterval(s.Ref, itv)
|
allStones.AddInterval(s.Ref, itv)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//lint:ignore SA6002 relax staticcheck verification.
|
||||||
|
tstonesPool.Put(v)
|
||||||
default:
|
default:
|
||||||
return &wal.CorruptionErr{
|
panic(fmt.Errorf("unexpected decoded type: %T", d))
|
||||||
Err: errors.Errorf("invalid record type %v", dec.Type(rec)),
|
|
||||||
Segment: r.Segment(),
|
|
||||||
Offset: r.Offset(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-errCh:
|
||||||
|
return err
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
// Signal termination to each worker and wait for it to close its output channel.
|
// Signal termination to each worker and wait for it to close its output channel.
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
close(inputs[i])
|
close(inputs[i])
|
||||||
|
|
|
@ -106,20 +106,20 @@ func BenchmarkLoadWAL(b *testing.B) {
|
||||||
seriesPerBatch int
|
seriesPerBatch int
|
||||||
samplesPerSeries int
|
samplesPerSeries int
|
||||||
}{
|
}{
|
||||||
{ // Less series and more samples.
|
{ // Less series and more samples. 2 hour WAL with 1 second scrape interval.
|
||||||
batches: 10,
|
batches: 10,
|
||||||
seriesPerBatch: 100,
|
seriesPerBatch: 100,
|
||||||
samplesPerSeries: 100000,
|
samplesPerSeries: 7200,
|
||||||
},
|
},
|
||||||
{ // More series and less samples.
|
{ // More series and less samples.
|
||||||
batches: 10,
|
batches: 10,
|
||||||
seriesPerBatch: 10000,
|
seriesPerBatch: 10000,
|
||||||
samplesPerSeries: 100,
|
samplesPerSeries: 50,
|
||||||
},
|
},
|
||||||
{ // In between.
|
{ // In between.
|
||||||
batches: 10,
|
batches: 10,
|
||||||
seriesPerBatch: 1000,
|
seriesPerBatch: 1000,
|
||||||
samplesPerSeries: 10000,
|
samplesPerSeries: 480,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,13 +167,14 @@ func BenchmarkLoadWAL(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
h, err := NewHead(nil, nil, w, 1000)
|
|
||||||
testutil.Ok(b, err)
|
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
|
|
||||||
// Load the WAL.
|
// Load the WAL.
|
||||||
h.Init(0)
|
for i := 0; i < b.N; i++ {
|
||||||
|
h, err := NewHead(nil, nil, w, 1000)
|
||||||
|
testutil.Ok(b, err)
|
||||||
|
h.Init(0)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue