From bf98acf33b064a7e23df58456fe34b62da3f4fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20LOYET?= <822436+fatpat@users.noreply.github.com> Date: Wed, 19 Apr 2023 15:15:02 +0200 Subject: [PATCH] Add --storage.tsdb.retention.percentage (#10287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jérôme LOYET <822436+fatpat@users.noreply.github.com> --- cmd/prometheus/main.go | 26 ++++++++++-- docs/command-line/prometheus.md | 1 + tsdb/db.go | 33 ++++++++++++++-- util/runtime/statfs.go | 12 ++++-- util/runtime/statfs_default.go | 15 ++++++- util/runtime/statfs_linux_386.go | 15 ++++++- util/runtime/statfs_uint32.go | 15 ++++++- util/runtime/statfs_unix_test.go | 59 ++++++++++++++++++++++++++++ util/runtime/statfs_windows.go | 61 +++++++++++++++++++++++++++++ util/runtime/statfs_windows_test.go | 50 +++++++++++++++++++++++ web/web.go | 7 ++++ 11 files changed, 278 insertions(+), 16 deletions(-) create mode 100644 util/runtime/statfs_unix_test.go create mode 100644 util/runtime/statfs_windows.go create mode 100644 util/runtime/statfs_windows_test.go diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 045389770e..6e5327d9fd 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -415,6 +415,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B."). BytesVar(&cfg.tsdb.MaxBytes) + serverOnlyFlag(a, "storage.tsdb.retention.percentage", "Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size)."). + UintVar(&cfg.tsdb.MaxPercentage) + serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) @@ -635,9 +638,9 @@ func main() { cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if !agentMode { - if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { + if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration - logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. @@ -650,6 +653,17 @@ func main() { logger.Warn("Time retention value is too high. Limiting to: " + y.String()) } + if cfg.tsdb.MaxPercentage > 100 { + cfg.tsdb.MaxPercentage = 100 + logger.Warn("Percentage retention value is too high. Limiting to: 100%") + } + if cfg.tsdb.MaxPercentage > 0 { + if prom_runtime.FsSize(localStoragePath) == 0 { + fmt.Fprintln(os.Stderr, fmt.Errorf("unable to detect size of partition %s, please disable retention percentage (%d%%)", localStoragePath, cfg.tsdb.MaxPercentage)) + os.Exit(2) + } + } + // Max block size settings. if cfg.tsdb.MaxBlockDuration == 0 { maxBlockDuration, err := model.ParseDuration("31d") @@ -824,6 +838,7 @@ func main() { cfg.web.Context = ctxWeb cfg.web.TSDBRetentionDuration = cfg.tsdb.RetentionDuration cfg.web.TSDBMaxBytes = cfg.tsdb.MaxBytes + cfg.web.TSDBMaxPercentage = cfg.tsdb.MaxPercentage cfg.web.TSDBDir = localStoragePath cfg.web.LocalStorage = localStorage cfg.web.Storage = fanoutStorage @@ -1246,7 +1261,7 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } - switch fsType := prom_runtime.Statfs(localStoragePath); fsType { + switch fsType := prom_runtime.FsType(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) default: @@ -1258,6 +1273,7 @@ func main() { "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, + "MaxPercentage", cfg.tsdb.MaxPercentage, "NoLockfile", cfg.tsdb.NoLockfile, "RetentionDuration", cfg.tsdb.RetentionDuration, "WALSegmentSize", cfg.tsdb.WALSegmentSize, @@ -1302,7 +1318,7 @@ func main() { return fmt.Errorf("opening storage failed: %w", err) } - switch fsType := prom_runtime.Statfs(localStoragePath); fsType { + switch fsType := prom_runtime.FsType(localStoragePath); fsType { case "NFS_SUPER_MAGIC": logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") default: @@ -1782,6 +1798,7 @@ type tsdbOptions struct { MaxBlockChunkSegmentSize units.Base2Bytes RetentionDuration model.Duration MaxBytes units.Base2Bytes + MaxPercentage uint NoLockfile bool WALCompression bool WALCompressionType string @@ -1806,6 +1823,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { MaxBlockChunkSegmentSize: int64(opts.MaxBlockChunkSegmentSize), RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond), MaxBytes: int64(opts.MaxBytes), + MaxPercentage: opts.MaxPercentage, NoLockfile: opts.NoLockfile, WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType), HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize, diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index a179a2f9f1..8755fd3944 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -37,6 +37,7 @@ The Prometheus monitoring server | --storage.tsdb.path | Base path for metrics storage. Use with server mode only. | `data/` | | --storage.tsdb.retention.time | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | | --storage.tsdb.retention.size | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | | +| --storage.tsdb.retention.percentage | Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size). Use with server mode only. | | | --storage.tsdb.no-lockfile | Do not create lockfile in data directory. Use with server mode only. | `false` | | --storage.tsdb.head-chunks-write-queue-size | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` | | --storage.agent.path | Base path for metrics storage. Use with agent mode only. | `data-agent/` | diff --git a/tsdb/db.go b/tsdb/db.go index 997bad36cb..bcf315d71a 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -46,6 +46,7 @@ import ( _ "github.com/prometheus/prometheus/tsdb/goversion" // Load the package into main to make sure minimum Go version is met. "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/tsdb/wlog" + prom_runtime "github.com/prometheus/prometheus/util/runtime" ) const ( @@ -115,6 +116,11 @@ type Options struct { // the current size of the database. MaxBytes int64 + // Maximum % of disk space to use for blocks to be retained + // 0 or less means disabled + // If both MaxBytes and MaxPercentage are set, percentage prevails + MaxPercentage uint + // NoLockfile disables creation and consideration of a lock file. NoLockfile bool @@ -1753,11 +1759,32 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc // BeyondSizeRetention returns those blocks which are beyond the size retention // set in the db options. func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) { - // Size retention is disabled or no blocks to work with. - if len(blocks) == 0 || db.opts.MaxBytes <= 0 { + // No blocks to work with + if len(blocks) == 0 { return } + maxBytes := db.opts.MaxBytes + + // percentage prevails + if db.opts.MaxPercentage > 0 { + // retrieve FS size + diskSize := prom_runtime.FsSize(db.dir) + if diskSize <= 0 { + db.logger.Warn("msg", "Unable to retrieve filesystem size of database directory (%s), skip percentage limitation and default to fixed size limitation", db.dir) + } else { + // apply percentage + maxBytes = int64(uint64(db.opts.MaxPercentage) * diskSize / 100) + } + } + + // Size retention is disabled + if maxBytes <= 0 { + return + } + // update MaxBytes gauge + db.metrics.maxBytes.Set(float64(maxBytes)) + deletable = make(map[ulid.ULID]struct{}) // Initializing size counter with WAL size and Head chunks @@ -1765,7 +1792,7 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc blocksSize := db.Head().Size() for i, block := range blocks { blocksSize += block.Size() - if blocksSize > db.opts.MaxBytes { + if blocksSize > maxBytes { // Add this and all following blocks for deletion. for _, b := range blocks[i:] { deletable[b.meta.ULID] = struct{}{} diff --git a/util/runtime/statfs.go b/util/runtime/statfs.go index 66bedb5ea1..338a02881a 100644 --- a/util/runtime/statfs.go +++ b/util/runtime/statfs.go @@ -11,12 +11,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build openbsd || windows || netbsd || solaris +//go:build openbsd || netbsd || solaris package runtime -// Statfs returns the file system type (Unix only) +// FsType returns the file system type (Unix only) // syscall.Statfs_t isn't available on openbsd -func Statfs(path string) string { +func FsType(path string) string { return "unknown" } + +// FsSize returns the file system size (Unix only) +// syscall.Statfs_t isn't available on openbsd +func FsSize(path string) uint64 { + return 0 +} diff --git a/util/runtime/statfs_default.go b/util/runtime/statfs_default.go index 78cfb1fe41..3284907c33 100644 --- a/util/runtime/statfs_default.go +++ b/util/runtime/statfs_default.go @@ -20,8 +20,8 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only). -func Statfs(path string) string { +// FsType returns the file system type (Unix only). +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[int64]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -67,6 +67,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -82,3 +83,13 @@ func Statfs(path string) string { } return strconv.FormatInt(localType, 16) } + +// FsSize returns the file system size (Unix only). +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_linux_386.go b/util/runtime/statfs_linux_386.go index a003b2effe..73207a8541 100644 --- a/util/runtime/statfs_linux_386.go +++ b/util/runtime/statfs_linux_386.go @@ -20,8 +20,8 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only) -func Statfs(path string) string { +// FsType returns the file system type (Unix only) +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[int32]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -63,6 +63,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -75,3 +76,13 @@ func Statfs(path string) string { } return strconv.Itoa(int(fs.Type)) } + +// FsSize returns the file system size (Unix only) +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_uint32.go b/util/runtime/statfs_uint32.go index fbf994ea63..6d7bea6aa3 100644 --- a/util/runtime/statfs_uint32.go +++ b/util/runtime/statfs_uint32.go @@ -20,8 +20,8 @@ import ( "syscall" ) -// Statfs returns the file system type (Unix only) -func Statfs(path string) string { +// FsType returns the file system type (Unix only) +func FsType(path string) string { // Types of file systems that may be returned by `statfs` fsTypes := map[uint32]string{ 0xadf5: "ADFS_SUPER_MAGIC", @@ -63,6 +63,7 @@ func Statfs(path string) string { 0x012FF7B4: "XENIX_SUPER_MAGIC", 0x58465342: "XFS_SUPER_MAGIC", 0x012FD16D: "_XIAFS_SUPER_MAGIC", + 0x794c7630: "OVERLAYFS_SUPER_MAGIC", } var fs syscall.Statfs_t @@ -75,3 +76,13 @@ func Statfs(path string) string { } return strconv.Itoa(int(fs.Type)) } + +// FsSize returns the file system size (Unix only) +func FsSize(path string) uint64 { + var fs syscall.Statfs_t + err := syscall.Statfs(path, &fs) + if err != nil { + return 0 + } + return uint64(fs.Bsize) * fs.Blocks +} diff --git a/util/runtime/statfs_unix_test.go b/util/runtime/statfs_unix_test.go new file mode 100644 index 0000000000..fd8c3f88bf --- /dev/null +++ b/util/runtime/statfs_unix_test.go @@ -0,0 +1,59 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows && !openbsd && !netbsd && !solaris +// +build !windows,!openbsd,!netbsd,!solaris + +package runtime + +import ( + "os" + "testing" + + "github.com/grafana/regexp" + "github.com/stretchr/testify/require" +) + +var regexpFsType = regexp.MustCompile("^[A-Z][A-Z0-9_]*_MAGIC$") + +func TestFsType(t *testing.T) { + var fsType string + + path, err := os.Getwd() + require.NoError(t, err) + + fsType = FsType(path) + require.Regexp(t, regexpFsType, fsType) + + fsType = FsType("/no/where/to/be/found") + require.Equal(t, "0", fsType) + + fsType = FsType(" %% not event a real path\n\n") + require.Equal(t, "0", fsType) +} + +func TestFsSize(t *testing.T) { + var size uint64 + + path, err := os.Getwd() + require.NoError(t, err) + + size = FsSize(path) + require.Positive(t, size) + + size = FsSize("/no/where/to/be/found") + require.Equal(t, uint64(0), size) + + size = FsSize(" %% not event a real path\n\n") + require.Equal(t, uint64(0), size) +} diff --git a/util/runtime/statfs_windows.go b/util/runtime/statfs_windows.go new file mode 100644 index 0000000000..f97432f89b --- /dev/null +++ b/util/runtime/statfs_windows.go @@ -0,0 +1,61 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows +// +build windows + +package runtime + +import ( + "os" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var ( + dll = windows.MustLoadDLL("kernel32.dll") + getDiskFreeSpaceExW = dll.MustFindProc("GetDiskFreeSpaceExW") +) + +// FsType returns the file system type (Unix only) +// syscall.Statfs_t isn't available on openbsd +func FsType(path string) string { + return "unknown" +} + +// FsSize returns the file system size (Unix only) +// syscall.Statfs_t isn't available on openbsd +func FsSize(path string) uint64 { + // ensure the path exists + if _, err := os.Stat(path); err != nil { + return 0 + } + + var avail int64 + var total int64 + var free int64 + // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getdiskfreespaceexa + ret, _, _ := getDiskFreeSpaceExW.Call( + uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))), + uintptr(unsafe.Pointer(&avail)), + uintptr(unsafe.Pointer(&total)), + uintptr(unsafe.Pointer(&free))) + + if ret == 0 || uint64(free) > uint64(total) { + return 0 + } + + return uint64(total) +} diff --git a/util/runtime/statfs_windows_test.go b/util/runtime/statfs_windows_test.go new file mode 100644 index 0000000000..8f533a7064 --- /dev/null +++ b/util/runtime/statfs_windows_test.go @@ -0,0 +1,50 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows +// +build windows + +package runtime + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFsType(t *testing.T) { + var fsType string + + path, err := os.Getwd() + require.NoError(t, err) + + fsType = FsType(path) + require.Equal(t, "unknown", fsType) + + fsType = FsType("A:\\no\\where\\to\\be\\found") + require.Equal(t, "unknown", fsType) +} + +func TestFsSize(t *testing.T) { + var size uint64 + + size = FsSize("C:\\") + require.Positive(t, size) + + size = FsSize("c:\\no\\where\\to\\be\\found") + require.Equal(t, uint64(0), size) + + size = FsSize(" %% not event a real path\n\n") + require.Equal(t, uint64(0), size) +} diff --git a/web/web.go b/web/web.go index 21c41c55eb..8e6282f6bb 100644 --- a/web/web.go +++ b/web/web.go @@ -257,6 +257,7 @@ type Options struct { TSDBRetentionDuration model.Duration TSDBDir string TSDBMaxBytes units.Base2Bytes + TSDBMaxPercentage uint LocalStorage LocalStorage Storage storage.Storage ExemplarStorage storage.ExemplarQueryable @@ -813,6 +814,12 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) { } status.StorageRetention += h.options.TSDBMaxBytes.String() } + if h.options.TSDBMaxPercentage != 0 { + if status.StorageRetention != "" { + status.StorageRetention += " or " + } + status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%" + } metrics, err := h.gatherer.Gather() if err != nil {