mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
tune the "Wal segment size" with a flag (#5029)
Add WALSegmentSize as an option, and the corresponding flag "storage.tsdb.wal-segment-size" to tune the max size of wal segment files. The addressed base problem is to reduce the disk space used by wal segment files : on a raspberry pi, for instance, we often want to reduce write load of the sd card, then, the wal directory is mounted on a memory (space limited) partition. the default value of the segment max file size, pushed the size of directory to 128 MB for each segment , which is too much ram consumption on a rasp. the initial discussion is at https://github.com/prometheus/tsdb/pull/450
This commit is contained in:
parent
121603c417
commit
5ddde1965b
|
@ -164,6 +164,10 @@ func main() {
|
||||||
"Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period).").
|
"Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period).").
|
||||||
Hidden().PlaceHolder("<duration>").SetValue(&cfg.tsdb.MaxBlockDuration)
|
Hidden().PlaceHolder("<duration>").SetValue(&cfg.tsdb.MaxBlockDuration)
|
||||||
|
|
||||||
|
a.Flag("storage.tsdb.wal-segment-size",
|
||||||
|
"Size at which to split the tsdb WAL segment files (e.g. 100MB)").
|
||||||
|
Hidden().PlaceHolder("<bytes>").BytesVar(&cfg.tsdb.WALSegmentSize)
|
||||||
|
|
||||||
a.Flag("storage.tsdb.retention", "How long to retain samples in storage.").
|
a.Flag("storage.tsdb.retention", "How long to retain samples in storage.").
|
||||||
Default("15d").SetValue(&cfg.tsdb.Retention)
|
Default("15d").SetValue(&cfg.tsdb.Retention)
|
||||||
|
|
||||||
|
@ -560,6 +564,11 @@ func main() {
|
||||||
g.Add(
|
g.Add(
|
||||||
func() error {
|
func() error {
|
||||||
level.Info(logger).Log("msg", "Starting TSDB ...")
|
level.Info(logger).Log("msg", "Starting TSDB ...")
|
||||||
|
if cfg.tsdb.WALSegmentSize != 0 {
|
||||||
|
if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 {
|
||||||
|
return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB")
|
||||||
|
}
|
||||||
|
}
|
||||||
db, err := tsdb.Open(
|
db, err := tsdb.Open(
|
||||||
cfg.localStoragePath,
|
cfg.localStoragePath,
|
||||||
log.With(logger, "component", "tsdb"),
|
log.With(logger, "component", "tsdb"),
|
||||||
|
|
|
@ -247,3 +247,36 @@ func TestSendAlerts(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWALSegmentSizeBounds(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping test in short mode.")
|
||||||
|
}
|
||||||
|
|
||||||
|
for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
|
||||||
|
prom := exec.Command(promPath, "--storage.tsdb.wal-segment-size="+size, "--config.file="+promConfig)
|
||||||
|
err := prom.Start()
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
|
||||||
|
if expectedExitStatus == 0 {
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- prom.Wait() }()
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
t.Errorf("prometheus should be still running: %v", err)
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
prom.Process.Signal(os.Interrupt)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = prom.Wait()
|
||||||
|
testutil.NotOk(t, err, "")
|
||||||
|
if exitError, ok := err.(*exec.ExitError); ok {
|
||||||
|
status := exitError.Sys().(syscall.WaitStatus)
|
||||||
|
testutil.Equals(t, expectedExitStatus, status.ExitStatus())
|
||||||
|
} else {
|
||||||
|
t.Errorf("unable to retrieve the exit status for prometheus: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
3
go.mod
3
go.mod
|
@ -5,6 +5,7 @@ require (
|
||||||
github.com/Azure/go-autorest v10.8.1+incompatible
|
github.com/Azure/go-autorest v10.8.1+incompatible
|
||||||
github.com/StackExchange/wmi v0.0.0-20180725035823-b12b22c5341f // indirect
|
github.com/StackExchange/wmi v0.0.0-20180725035823-b12b22c5341f // indirect
|
||||||
github.com/VividCortex/ewma v1.1.1 // indirect
|
github.com/VividCortex/ewma v1.1.1 // indirect
|
||||||
|
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da // indirect
|
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da // indirect
|
||||||
github.com/aws/aws-sdk-go v0.0.0-20180507225419-00862f899353
|
github.com/aws/aws-sdk-go v0.0.0-20180507225419-00862f899353
|
||||||
github.com/biogo/store v0.0.0-20160505134755-913427a1d5e8 // indirect
|
github.com/biogo/store v0.0.0-20160505134755-913427a1d5e8 // indirect
|
||||||
|
@ -88,7 +89,7 @@ require (
|
||||||
github.com/prometheus/client_golang v0.9.1
|
github.com/prometheus/client_golang v0.9.1
|
||||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910
|
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910
|
||||||
github.com/prometheus/common v0.0.0-20181119215939-b36ad289a3ea
|
github.com/prometheus/common v0.0.0-20181119215939-b36ad289a3ea
|
||||||
github.com/prometheus/tsdb v0.3.1
|
github.com/prometheus/tsdb v0.3.2-0.20181219094047-6d489a1004dc
|
||||||
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect
|
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect
|
||||||
github.com/rlmcpherson/s3gof3r v0.5.0 // indirect
|
github.com/rlmcpherson/s3gof3r v0.5.0 // indirect
|
||||||
github.com/rubyist/circuitbreaker v2.2.1+incompatible // indirect
|
github.com/rubyist/circuitbreaker v2.2.1+incompatible // indirect
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -209,8 +209,8 @@ github.com/prometheus/common v0.0.0-20181119215939-b36ad289a3ea h1:4RkbEb5XX0Wvu
|
||||||
github.com/prometheus/common v0.0.0-20181119215939-b36ad289a3ea/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
github.com/prometheus/common v0.0.0-20181119215939-b36ad289a3ea/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFdaDqxJVlbOQ1DtGmZWs/Qau0hIlk+WQ=
|
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFdaDqxJVlbOQ1DtGmZWs/Qau0hIlk+WQ=
|
||||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||||
github.com/prometheus/tsdb v0.3.1 h1:uGgfubT2MesNpx3T46c5R32RcUoKAPGyWX+4x1orJLE=
|
github.com/prometheus/tsdb v0.3.2-0.20181219094047-6d489a1004dc h1:phU3kj067sczIc4fhaq5rRcH4Lp9A45MsrcQqjC+cao=
|
||||||
github.com/prometheus/tsdb v0.3.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
|
github.com/prometheus/tsdb v0.3.2-0.20181219094047-6d489a1004dc/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
|
||||||
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ=
|
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ=
|
||||||
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
|
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
|
||||||
github.com/rlmcpherson/s3gof3r v0.5.0 h1:1izOJpTiohSibfOHuNyEA/yQnAirh05enzEdmhez43k=
|
github.com/rlmcpherson/s3gof3r v0.5.0 h1:1izOJpTiohSibfOHuNyEA/yQnAirh05enzEdmhez43k=
|
||||||
|
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/alecthomas/units"
|
||||||
"github.com/go-kit/kit/log"
|
"github.com/go-kit/kit/log"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
@ -117,6 +118,9 @@ type Options struct {
|
||||||
// The maximum timestamp range of compacted blocks.
|
// The maximum timestamp range of compacted blocks.
|
||||||
MaxBlockDuration model.Duration
|
MaxBlockDuration model.Duration
|
||||||
|
|
||||||
|
// The maximum size of each WAL segment file.
|
||||||
|
WALSegmentSize units.Base2Bytes
|
||||||
|
|
||||||
// Duration for how long to retain data.
|
// Duration for how long to retain data.
|
||||||
Retention model.Duration
|
Retention model.Duration
|
||||||
|
|
||||||
|
@ -182,6 +186,7 @@ func Open(path string, l log.Logger, r prometheus.Registerer, opts *Options) (*t
|
||||||
|
|
||||||
db, err := tsdb.Open(path, l, r, &tsdb.Options{
|
db, err := tsdb.Open(path, l, r, &tsdb.Options{
|
||||||
WALFlushInterval: 10 * time.Second,
|
WALFlushInterval: 10 * time.Second,
|
||||||
|
WALSegmentSize: int(opts.WALSegmentSize),
|
||||||
RetentionDuration: uint64(time.Duration(opts.Retention).Seconds() * 1000),
|
RetentionDuration: uint64(time.Duration(opts.Retention).Seconds() * 1000),
|
||||||
BlockRanges: rngs,
|
BlockRanges: rngs,
|
||||||
NoLockfile: opts.NoLockfile,
|
NoLockfile: opts.NoLockfile,
|
||||||
|
|
3
vendor/github.com/prometheus/tsdb/CHANGELOG.md
generated
vendored
3
vendor/github.com/prometheus/tsdb/CHANGELOG.md
generated
vendored
|
@ -1,11 +1,10 @@
|
||||||
## master / unreleased
|
## master / unreleased
|
||||||
|
- [CHANGE] New `WALSegmentSize` option to override the `DefaultOptions.WALSegmentSize`. Added to allow using smaller wal files. For example using tmpfs on a RPI to minimise the SD card wear out from the constant WAL writes. As part of this change the `DefaultOptions.WALSegmentSize` constant was also exposed.
|
||||||
|
|
||||||
## 0.3.1
|
## 0.3.1
|
||||||
- [BUGFIX] Fixed most windows test and some actual bugs for unclosed file readers.
|
- [BUGFIX] Fixed most windows test and some actual bugs for unclosed file readers.
|
||||||
|
|
||||||
## 0.3.0
|
## 0.3.0
|
||||||
|
|
||||||
- [CHANGE] `LastCheckpoint()` used to return just the segment name and now it returns the full relative path.
|
- [CHANGE] `LastCheckpoint()` used to return just the segment name and now it returns the full relative path.
|
||||||
- [CHANGE] `NewSegmentsRangeReader()` can now read over miltiple wal ranges by using the new `SegmentRange{}` struct.
|
- [CHANGE] `NewSegmentsRangeReader()` can now read over miltiple wal ranges by using the new `SegmentRange{}` struct.
|
||||||
- [CHANGE] `CorruptionErr{}` now also exposes the Segment `Dir` which is added when displaying any errors.
|
- [CHANGE] `CorruptionErr{}` now also exposes the Segment `Dir` which is added when displaying any errors.
|
||||||
|
|
10
vendor/github.com/prometheus/tsdb/db.go
generated
vendored
10
vendor/github.com/prometheus/tsdb/db.go
generated
vendored
|
@ -45,6 +45,7 @@ import (
|
||||||
// millisecond precision timestamps.
|
// millisecond precision timestamps.
|
||||||
var DefaultOptions = &Options{
|
var DefaultOptions = &Options{
|
||||||
WALFlushInterval: 5 * time.Second,
|
WALFlushInterval: 5 * time.Second,
|
||||||
|
WALSegmentSize: wal.DefaultSegmentSize,
|
||||||
RetentionDuration: 15 * 24 * 60 * 60 * 1000, // 15 days in milliseconds
|
RetentionDuration: 15 * 24 * 60 * 60 * 1000, // 15 days in milliseconds
|
||||||
BlockRanges: ExponentialBlockRanges(int64(2*time.Hour)/1e6, 3, 5),
|
BlockRanges: ExponentialBlockRanges(int64(2*time.Hour)/1e6, 3, 5),
|
||||||
NoLockfile: false,
|
NoLockfile: false,
|
||||||
|
@ -55,6 +56,9 @@ type Options struct {
|
||||||
// The interval at which the write ahead log is flushed to disk.
|
// The interval at which the write ahead log is flushed to disk.
|
||||||
WALFlushInterval time.Duration
|
WALFlushInterval time.Duration
|
||||||
|
|
||||||
|
// Segments (wal files) max size
|
||||||
|
WALSegmentSize int
|
||||||
|
|
||||||
// Duration of persisted data to keep.
|
// Duration of persisted data to keep.
|
||||||
RetentionDuration uint64
|
RetentionDuration uint64
|
||||||
|
|
||||||
|
@ -263,7 +267,11 @@ func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options) (db
|
||||||
return nil, errors.Wrap(err, "create leveled compactor")
|
return nil, errors.Wrap(err, "create leveled compactor")
|
||||||
}
|
}
|
||||||
|
|
||||||
wlog, err := wal.New(l, r, filepath.Join(dir, "wal"))
|
segmentSize := wal.DefaultSegmentSize
|
||||||
|
if opts.WALSegmentSize > 0 {
|
||||||
|
segmentSize = opts.WALSegmentSize
|
||||||
|
}
|
||||||
|
wlog, err := wal.NewSize(l, r, filepath.Join(dir, "wal"), segmentSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
12
vendor/github.com/prometheus/tsdb/head.go
generated
vendored
12
vendor/github.com/prometheus/tsdb/head.go
generated
vendored
|
@ -89,6 +89,7 @@ type headMetrics struct {
|
||||||
maxTime prometheus.GaugeFunc
|
maxTime prometheus.GaugeFunc
|
||||||
samplesAppended prometheus.Counter
|
samplesAppended prometheus.Counter
|
||||||
walTruncateDuration prometheus.Summary
|
walTruncateDuration prometheus.Summary
|
||||||
|
walCorruptionsTotal prometheus.Counter
|
||||||
headTruncateFail prometheus.Counter
|
headTruncateFail prometheus.Counter
|
||||||
headTruncateTotal prometheus.Counter
|
headTruncateTotal prometheus.Counter
|
||||||
checkpointDeleteFail prometheus.Counter
|
checkpointDeleteFail prometheus.Counter
|
||||||
|
@ -152,6 +153,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
|
||||||
Name: "prometheus_tsdb_wal_truncate_duration_seconds",
|
Name: "prometheus_tsdb_wal_truncate_duration_seconds",
|
||||||
Help: "Duration of WAL truncation.",
|
Help: "Duration of WAL truncation.",
|
||||||
})
|
})
|
||||||
|
m.walCorruptionsTotal = prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "prometheus_tsdb_wal_corruptions_total",
|
||||||
|
Help: "Total number of WAL corruptions.",
|
||||||
|
})
|
||||||
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
|
m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
Name: "prometheus_tsdb_head_samples_appended_total",
|
Name: "prometheus_tsdb_head_samples_appended_total",
|
||||||
Help: "Total number of appended samples.",
|
Help: "Total number of appended samples.",
|
||||||
|
@ -195,6 +200,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
|
||||||
m.maxTime,
|
m.maxTime,
|
||||||
m.gcDuration,
|
m.gcDuration,
|
||||||
m.walTruncateDuration,
|
m.walTruncateDuration,
|
||||||
|
m.walCorruptionsTotal,
|
||||||
m.samplesAppended,
|
m.samplesAppended,
|
||||||
m.headTruncateFail,
|
m.headTruncateFail,
|
||||||
m.headTruncateTotal,
|
m.headTruncateTotal,
|
||||||
|
@ -480,10 +486,10 @@ func (h *Head) Init(minValidTime int64) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err)
|
level.Warn(h.logger).Log("msg", "encountered WAL error, attempting repair", "err", err)
|
||||||
|
h.metrics.walCorruptionsTotal.Inc()
|
||||||
if err := h.wal.Repair(err); err != nil {
|
if err := h.wal.Repair(err); err != nil {
|
||||||
return errors.Wrap(err, "repair corrupted WAL")
|
return errors.Wrap(err, "repair corrupted WAL")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -500,7 +506,7 @@ func (h *Head) Truncate(mint int64) (err error) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
atomic.StoreInt64(&h.minTime, mint)
|
atomic.StoreInt64(&h.minTime, mint)
|
||||||
h.minValidTime = mint
|
atomic.StoreInt64(&h.minValidTime, mint)
|
||||||
|
|
||||||
// Ensure that max time is at least as high as min time.
|
// Ensure that max time is at least as high as min time.
|
||||||
for h.MaxTime() < mint {
|
for h.MaxTime() < mint {
|
||||||
|
@ -656,7 +662,7 @@ func (h *Head) appender() *headAppender {
|
||||||
head: h,
|
head: h,
|
||||||
// Set the minimum valid time to whichever is greater the head min valid time or the compaciton window.
|
// Set the minimum valid time to whichever is greater the head min valid time or the compaciton window.
|
||||||
// This ensures that no samples will be added within the compaction window to avoid races.
|
// This ensures that no samples will be added within the compaction window to avoid races.
|
||||||
minValidTime: max(h.minValidTime, h.MaxTime()-h.chunkRange/2),
|
minValidTime: max(atomic.LoadInt64(&h.minValidTime), h.MaxTime()-h.chunkRange/2),
|
||||||
mint: math.MaxInt64,
|
mint: math.MaxInt64,
|
||||||
maxt: math.MinInt64,
|
maxt: math.MinInt64,
|
||||||
samples: h.getAppendBuffer(),
|
samples: h.getAppendBuffer(),
|
||||||
|
|
4
vendor/github.com/prometheus/tsdb/wal/wal.go
generated
vendored
4
vendor/github.com/prometheus/tsdb/wal/wal.go
generated
vendored
|
@ -35,7 +35,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
defaultSegmentSize = 128 * 1024 * 1024 // 128 MB
|
DefaultSegmentSize = 128 * 1024 * 1024 // 128 MB
|
||||||
pageSize = 32 * 1024 // 32KB
|
pageSize = 32 * 1024 // 32KB
|
||||||
recordHeaderSize = 7
|
recordHeaderSize = 7
|
||||||
)
|
)
|
||||||
|
@ -174,7 +174,7 @@ type WAL struct {
|
||||||
|
|
||||||
// New returns a new WAL over the given directory.
|
// New returns a new WAL over the given directory.
|
||||||
func New(logger log.Logger, reg prometheus.Registerer, dir string) (*WAL, error) {
|
func New(logger log.Logger, reg prometheus.Registerer, dir string) (*WAL, error) {
|
||||||
return NewSize(logger, reg, dir, defaultSegmentSize)
|
return NewSize(logger, reg, dir, DefaultSegmentSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSize returns a new WAL over the given directory.
|
// NewSize returns a new WAL over the given directory.
|
||||||
|
|
2
vendor/modules.txt
vendored
2
vendor/modules.txt
vendored
|
@ -187,7 +187,7 @@ github.com/prometheus/procfs
|
||||||
github.com/prometheus/procfs/nfs
|
github.com/prometheus/procfs/nfs
|
||||||
github.com/prometheus/procfs/xfs
|
github.com/prometheus/procfs/xfs
|
||||||
github.com/prometheus/procfs/internal/util
|
github.com/prometheus/procfs/internal/util
|
||||||
# github.com/prometheus/tsdb v0.3.1
|
# github.com/prometheus/tsdb v0.3.2-0.20181219094047-6d489a1004dc
|
||||||
github.com/prometheus/tsdb
|
github.com/prometheus/tsdb
|
||||||
github.com/prometheus/tsdb/labels
|
github.com/prometheus/tsdb/labels
|
||||||
github.com/prometheus/tsdb/chunkenc
|
github.com/prometheus/tsdb/chunkenc
|
||||||
|
|
Loading…
Reference in a new issue