From bf98acf33b064a7e23df58456fe34b62da3f4fd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20LOYET?=
<822436+fatpat@users.noreply.github.com>
Date: Wed, 19 Apr 2023 15:15:02 +0200
Subject: [PATCH] Add --storage.tsdb.retention.percentage (#10287)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Jérôme LOYET <822436+fatpat@users.noreply.github.com>
---
cmd/prometheus/main.go | 26 ++++++++++--
docs/command-line/prometheus.md | 1 +
tsdb/db.go | 33 ++++++++++++++--
util/runtime/statfs.go | 12 ++++--
util/runtime/statfs_default.go | 15 ++++++-
util/runtime/statfs_linux_386.go | 15 ++++++-
util/runtime/statfs_uint32.go | 15 ++++++-
util/runtime/statfs_unix_test.go | 59 ++++++++++++++++++++++++++++
util/runtime/statfs_windows.go | 61 +++++++++++++++++++++++++++++
util/runtime/statfs_windows_test.go | 50 +++++++++++++++++++++++
web/web.go | 7 ++++
11 files changed, 278 insertions(+), 16 deletions(-)
create mode 100644 util/runtime/statfs_unix_test.go
create mode 100644 util/runtime/statfs_windows.go
create mode 100644 util/runtime/statfs_windows_test.go
diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 045389770e..6e5327d9fd 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -415,6 +415,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
BytesVar(&cfg.tsdb.MaxBytes)
+ serverOnlyFlag(a, "storage.tsdb.retention.percentage", "Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size).").
+ UintVar(&cfg.tsdb.MaxPercentage)
+
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
@@ -635,9 +638,9 @@ func main() {
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
if !agentMode {
- if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
+ if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
- logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
+ logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
@@ -650,6 +653,17 @@ func main() {
logger.Warn("Time retention value is too high. Limiting to: " + y.String())
}
+ if cfg.tsdb.MaxPercentage > 100 {
+ cfg.tsdb.MaxPercentage = 100
+ logger.Warn("Percentage retention value is too high. Limiting to: 100%")
+ }
+ if cfg.tsdb.MaxPercentage > 0 {
+ if prom_runtime.FsSize(localStoragePath) == 0 {
+ fmt.Fprintln(os.Stderr, fmt.Errorf("unable to detect size of partition %s, please disable retention percentage (%d%%)", localStoragePath, cfg.tsdb.MaxPercentage))
+ os.Exit(2)
+ }
+ }
+
// Max block size settings.
if cfg.tsdb.MaxBlockDuration == 0 {
maxBlockDuration, err := model.ParseDuration("31d")
@@ -824,6 +838,7 @@ func main() {
cfg.web.Context = ctxWeb
cfg.web.TSDBRetentionDuration = cfg.tsdb.RetentionDuration
cfg.web.TSDBMaxBytes = cfg.tsdb.MaxBytes
+ cfg.web.TSDBMaxPercentage = cfg.tsdb.MaxPercentage
cfg.web.TSDBDir = localStoragePath
cfg.web.LocalStorage = localStorage
cfg.web.Storage = fanoutStorage
@@ -1246,7 +1261,7 @@ func main() {
return fmt.Errorf("opening storage failed: %w", err)
}
- switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
+ switch fsType := prom_runtime.FsType(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
default:
@@ -1258,6 +1273,7 @@ func main() {
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
"MaxBytes", cfg.tsdb.MaxBytes,
+ "MaxPercentage", cfg.tsdb.MaxPercentage,
"NoLockfile", cfg.tsdb.NoLockfile,
"RetentionDuration", cfg.tsdb.RetentionDuration,
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
@@ -1302,7 +1318,7 @@ func main() {
return fmt.Errorf("opening storage failed: %w", err)
}
- switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
+ switch fsType := prom_runtime.FsType(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
default:
@@ -1782,6 +1798,7 @@ type tsdbOptions struct {
MaxBlockChunkSegmentSize units.Base2Bytes
RetentionDuration model.Duration
MaxBytes units.Base2Bytes
+ MaxPercentage uint
NoLockfile bool
WALCompression bool
WALCompressionType string
@@ -1806,6 +1823,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
MaxBlockChunkSegmentSize: int64(opts.MaxBlockChunkSegmentSize),
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
+ MaxPercentage: opts.MaxPercentage,
NoLockfile: opts.NoLockfile,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index a179a2f9f1..8755fd3944 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -37,6 +37,7 @@ The Prometheus monitoring server
| --storage.tsdb.path
| Base path for metrics storage. Use with server mode only. | `data/` |
| --storage.tsdb.retention.time
| How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
| --storage.tsdb.retention.size
| Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | |
+| --storage.tsdb.retention.percentage
| Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size). Use with server mode only. | |
| --storage.tsdb.no-lockfile
| Do not create lockfile in data directory. Use with server mode only. | `false` |
| --storage.tsdb.head-chunks-write-queue-size
| Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` |
| --storage.agent.path
| Base path for metrics storage. Use with agent mode only. | `data-agent/` |
diff --git a/tsdb/db.go b/tsdb/db.go
index 997bad36cb..bcf315d71a 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -46,6 +46,7 @@ import (
_ "github.com/prometheus/prometheus/tsdb/goversion" // Load the package into main to make sure minimum Go version is met.
"github.com/prometheus/prometheus/tsdb/tsdbutil"
"github.com/prometheus/prometheus/tsdb/wlog"
+ prom_runtime "github.com/prometheus/prometheus/util/runtime"
)
const (
@@ -115,6 +116,11 @@ type Options struct {
// the current size of the database.
MaxBytes int64
+ // Maximum % of disk space to use for blocks to be retained
+ // 0 or less means disabled
+ // If both MaxBytes and MaxPercentage are set, percentage prevails
+ MaxPercentage uint
+
// NoLockfile disables creation and consideration of a lock file.
NoLockfile bool
@@ -1753,11 +1759,32 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
// BeyondSizeRetention returns those blocks which are beyond the size retention
// set in the db options.
func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) {
- // Size retention is disabled or no blocks to work with.
- if len(blocks) == 0 || db.opts.MaxBytes <= 0 {
+ // No blocks to work with
+ if len(blocks) == 0 {
return
}
+ maxBytes := db.opts.MaxBytes
+
+ // percentage prevails
+ if db.opts.MaxPercentage > 0 {
+ // retrieve FS size
+ diskSize := prom_runtime.FsSize(db.dir)
+ if diskSize <= 0 {
+ db.logger.Warn("msg", "Unable to retrieve filesystem size of database directory (%s), skip percentage limitation and default to fixed size limitation", db.dir)
+ } else {
+ // apply percentage
+ maxBytes = int64(uint64(db.opts.MaxPercentage) * diskSize / 100)
+ }
+ }
+
+ // Size retention is disabled
+ if maxBytes <= 0 {
+ return
+ }
+ // update MaxBytes gauge
+ db.metrics.maxBytes.Set(float64(maxBytes))
+
deletable = make(map[ulid.ULID]struct{})
// Initializing size counter with WAL size and Head chunks
@@ -1765,7 +1792,7 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
blocksSize := db.Head().Size()
for i, block := range blocks {
blocksSize += block.Size()
- if blocksSize > db.opts.MaxBytes {
+ if blocksSize > maxBytes {
// Add this and all following blocks for deletion.
for _, b := range blocks[i:] {
deletable[b.meta.ULID] = struct{}{}
diff --git a/util/runtime/statfs.go b/util/runtime/statfs.go
index 66bedb5ea1..338a02881a 100644
--- a/util/runtime/statfs.go
+++ b/util/runtime/statfs.go
@@ -11,12 +11,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-//go:build openbsd || windows || netbsd || solaris
+//go:build openbsd || netbsd || solaris
package runtime
-// Statfs returns the file system type (Unix only)
+// FsType returns the file system type (Unix only)
// syscall.Statfs_t isn't available on openbsd
-func Statfs(path string) string {
+func FsType(path string) string {
return "unknown"
}
+
+// FsSize returns the file system size (Unix only)
+// syscall.Statfs_t isn't available on openbsd
+func FsSize(path string) uint64 {
+ return 0
+}
diff --git a/util/runtime/statfs_default.go b/util/runtime/statfs_default.go
index 78cfb1fe41..3284907c33 100644
--- a/util/runtime/statfs_default.go
+++ b/util/runtime/statfs_default.go
@@ -20,8 +20,8 @@ import (
"syscall"
)
-// Statfs returns the file system type (Unix only).
-func Statfs(path string) string {
+// FsType returns the file system type (Unix only).
+func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[int64]string{
0xadf5: "ADFS_SUPER_MAGIC",
@@ -67,6 +67,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
+ 0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@@ -82,3 +83,13 @@ func Statfs(path string) string {
}
return strconv.FormatInt(localType, 16)
}
+
+// FsSize returns the file system size (Unix only).
+func FsSize(path string) uint64 {
+ var fs syscall.Statfs_t
+ err := syscall.Statfs(path, &fs)
+ if err != nil {
+ return 0
+ }
+ return uint64(fs.Bsize) * fs.Blocks
+}
diff --git a/util/runtime/statfs_linux_386.go b/util/runtime/statfs_linux_386.go
index a003b2effe..73207a8541 100644
--- a/util/runtime/statfs_linux_386.go
+++ b/util/runtime/statfs_linux_386.go
@@ -20,8 +20,8 @@ import (
"syscall"
)
-// Statfs returns the file system type (Unix only)
-func Statfs(path string) string {
+// FsType returns the file system type (Unix only)
+func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[int32]string{
0xadf5: "ADFS_SUPER_MAGIC",
@@ -63,6 +63,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
+ 0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@@ -75,3 +76,13 @@ func Statfs(path string) string {
}
return strconv.Itoa(int(fs.Type))
}
+
+// FsSize returns the file system size (Unix only)
+func FsSize(path string) uint64 {
+ var fs syscall.Statfs_t
+ err := syscall.Statfs(path, &fs)
+ if err != nil {
+ return 0
+ }
+ return uint64(fs.Bsize) * fs.Blocks
+}
diff --git a/util/runtime/statfs_uint32.go b/util/runtime/statfs_uint32.go
index fbf994ea63..6d7bea6aa3 100644
--- a/util/runtime/statfs_uint32.go
+++ b/util/runtime/statfs_uint32.go
@@ -20,8 +20,8 @@ import (
"syscall"
)
-// Statfs returns the file system type (Unix only)
-func Statfs(path string) string {
+// FsType returns the file system type (Unix only)
+func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[uint32]string{
0xadf5: "ADFS_SUPER_MAGIC",
@@ -63,6 +63,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
+ 0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@@ -75,3 +76,13 @@ func Statfs(path string) string {
}
return strconv.Itoa(int(fs.Type))
}
+
+// FsSize returns the file system size (Unix only)
+func FsSize(path string) uint64 {
+ var fs syscall.Statfs_t
+ err := syscall.Statfs(path, &fs)
+ if err != nil {
+ return 0
+ }
+ return uint64(fs.Bsize) * fs.Blocks
+}
diff --git a/util/runtime/statfs_unix_test.go b/util/runtime/statfs_unix_test.go
new file mode 100644
index 0000000000..fd8c3f88bf
--- /dev/null
+++ b/util/runtime/statfs_unix_test.go
@@ -0,0 +1,59 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !windows && !openbsd && !netbsd && !solaris
+// +build !windows,!openbsd,!netbsd,!solaris
+
+package runtime
+
+import (
+ "os"
+ "testing"
+
+ "github.com/grafana/regexp"
+ "github.com/stretchr/testify/require"
+)
+
+var regexpFsType = regexp.MustCompile("^[A-Z][A-Z0-9_]*_MAGIC$")
+
+func TestFsType(t *testing.T) {
+ var fsType string
+
+ path, err := os.Getwd()
+ require.NoError(t, err)
+
+ fsType = FsType(path)
+ require.Regexp(t, regexpFsType, fsType)
+
+ fsType = FsType("/no/where/to/be/found")
+ require.Equal(t, "0", fsType)
+
+ fsType = FsType(" %% not event a real path\n\n")
+ require.Equal(t, "0", fsType)
+}
+
+func TestFsSize(t *testing.T) {
+ var size uint64
+
+ path, err := os.Getwd()
+ require.NoError(t, err)
+
+ size = FsSize(path)
+ require.Positive(t, size)
+
+ size = FsSize("/no/where/to/be/found")
+ require.Equal(t, uint64(0), size)
+
+ size = FsSize(" %% not event a real path\n\n")
+ require.Equal(t, uint64(0), size)
+}
diff --git a/util/runtime/statfs_windows.go b/util/runtime/statfs_windows.go
new file mode 100644
index 0000000000..f97432f89b
--- /dev/null
+++ b/util/runtime/statfs_windows.go
@@ -0,0 +1,61 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build windows
+// +build windows
+
+package runtime
+
+import (
+ "os"
+ "syscall"
+ "unsafe"
+
+ "golang.org/x/sys/windows"
+)
+
+var (
+ dll = windows.MustLoadDLL("kernel32.dll")
+ getDiskFreeSpaceExW = dll.MustFindProc("GetDiskFreeSpaceExW")
+)
+
+// FsType returns the file system type (Unix only)
+// syscall.Statfs_t isn't available on openbsd
+func FsType(path string) string {
+ return "unknown"
+}
+
+// FsSize returns the file system size (Unix only)
+// syscall.Statfs_t isn't available on openbsd
+func FsSize(path string) uint64 {
+ // ensure the path exists
+ if _, err := os.Stat(path); err != nil {
+ return 0
+ }
+
+ var avail int64
+ var total int64
+ var free int64
+ // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getdiskfreespaceexa
+ ret, _, _ := getDiskFreeSpaceExW.Call(
+ uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))),
+ uintptr(unsafe.Pointer(&avail)),
+ uintptr(unsafe.Pointer(&total)),
+ uintptr(unsafe.Pointer(&free)))
+
+ if ret == 0 || uint64(free) > uint64(total) {
+ return 0
+ }
+
+ return uint64(total)
+}
diff --git a/util/runtime/statfs_windows_test.go b/util/runtime/statfs_windows_test.go
new file mode 100644
index 0000000000..8f533a7064
--- /dev/null
+++ b/util/runtime/statfs_windows_test.go
@@ -0,0 +1,50 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build windows
+// +build windows
+
+package runtime
+
+import (
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestFsType(t *testing.T) {
+ var fsType string
+
+ path, err := os.Getwd()
+ require.NoError(t, err)
+
+ fsType = FsType(path)
+ require.Equal(t, "unknown", fsType)
+
+ fsType = FsType("A:\\no\\where\\to\\be\\found")
+ require.Equal(t, "unknown", fsType)
+}
+
+func TestFsSize(t *testing.T) {
+ var size uint64
+
+ size = FsSize("C:\\")
+ require.Positive(t, size)
+
+ size = FsSize("c:\\no\\where\\to\\be\\found")
+ require.Equal(t, uint64(0), size)
+
+ size = FsSize(" %% not event a real path\n\n")
+ require.Equal(t, uint64(0), size)
+}
diff --git a/web/web.go b/web/web.go
index 21c41c55eb..8e6282f6bb 100644
--- a/web/web.go
+++ b/web/web.go
@@ -257,6 +257,7 @@ type Options struct {
TSDBRetentionDuration model.Duration
TSDBDir string
TSDBMaxBytes units.Base2Bytes
+ TSDBMaxPercentage uint
LocalStorage LocalStorage
Storage storage.Storage
ExemplarStorage storage.ExemplarQueryable
@@ -813,6 +814,12 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
}
status.StorageRetention += h.options.TSDBMaxBytes.String()
}
+ if h.options.TSDBMaxPercentage != 0 {
+ if status.StorageRetention != "" {
+ status.StorageRetention += " or "
+ }
+ status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%"
+ }
metrics, err := h.gatherer.Gather()
if err != nil {