Add --storage.tsdb.retention.percentage (#10287)

Signed-off-by: Jérôme LOYET <822436+fatpat@users.noreply.github.com>
This commit is contained in:
Jérôme LOYET 2023-04-19 15:15:02 +02:00
parent 559722dc68
commit bf98acf33b
11 changed files with 278 additions and 16 deletions

View file

@ -415,6 +415,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
BytesVar(&cfg.tsdb.MaxBytes)
serverOnlyFlag(a, "storage.tsdb.retention.percentage", "Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size).").
UintVar(&cfg.tsdb.MaxPercentage)
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
@ -635,9 +638,9 @@ func main() {
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
if !agentMode {
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
@ -650,6 +653,17 @@ func main() {
logger.Warn("Time retention value is too high. Limiting to: " + y.String())
}
if cfg.tsdb.MaxPercentage > 100 {
cfg.tsdb.MaxPercentage = 100
logger.Warn("Percentage retention value is too high. Limiting to: 100%")
}
if cfg.tsdb.MaxPercentage > 0 {
if prom_runtime.FsSize(localStoragePath) == 0 {
fmt.Fprintln(os.Stderr, fmt.Errorf("unable to detect size of partition %s, please disable retention percentage (%d%%)", localStoragePath, cfg.tsdb.MaxPercentage))
os.Exit(2)
}
}
// Max block size settings.
if cfg.tsdb.MaxBlockDuration == 0 {
maxBlockDuration, err := model.ParseDuration("31d")
@ -824,6 +838,7 @@ func main() {
cfg.web.Context = ctxWeb
cfg.web.TSDBRetentionDuration = cfg.tsdb.RetentionDuration
cfg.web.TSDBMaxBytes = cfg.tsdb.MaxBytes
cfg.web.TSDBMaxPercentage = cfg.tsdb.MaxPercentage
cfg.web.TSDBDir = localStoragePath
cfg.web.LocalStorage = localStorage
cfg.web.Storage = fanoutStorage
@ -1246,7 +1261,7 @@ func main() {
return fmt.Errorf("opening storage failed: %w", err)
}
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
switch fsType := prom_runtime.FsType(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
default:
@ -1258,6 +1273,7 @@ func main() {
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
"MaxBytes", cfg.tsdb.MaxBytes,
"MaxPercentage", cfg.tsdb.MaxPercentage,
"NoLockfile", cfg.tsdb.NoLockfile,
"RetentionDuration", cfg.tsdb.RetentionDuration,
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
@ -1302,7 +1318,7 @@ func main() {
return fmt.Errorf("opening storage failed: %w", err)
}
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
switch fsType := prom_runtime.FsType(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
default:
@ -1782,6 +1798,7 @@ type tsdbOptions struct {
MaxBlockChunkSegmentSize units.Base2Bytes
RetentionDuration model.Duration
MaxBytes units.Base2Bytes
MaxPercentage uint
NoLockfile bool
WALCompression bool
WALCompressionType string
@ -1806,6 +1823,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
MaxBlockChunkSegmentSize: int64(opts.MaxBlockChunkSegmentSize),
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
MaxPercentage: opts.MaxPercentage,
NoLockfile: opts.NoLockfile,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,

View file

@ -37,6 +37,7 @@ The Prometheus monitoring server
| <code class="text-nowrap">--storage.tsdb.path</code> | Base path for metrics storage. Use with server mode only. | `data/` |
| <code class="text-nowrap">--storage.tsdb.retention.time</code> | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.retention.size</code> | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.retention.percentage</code> | Maximum percentage of the disk space that can be used to store blocks (prevails over storage.tsdb.retention.size). Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.no-lockfile</code> | Do not create lockfile in data directory. Use with server mode only. | `false` |
| <code class="text-nowrap">--storage.tsdb.head-chunks-write-queue-size</code> | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` |
| <code class="text-nowrap">--storage.agent.path</code> | Base path for metrics storage. Use with agent mode only. | `data-agent/` |

View file

@ -46,6 +46,7 @@ import (
_ "github.com/prometheus/prometheus/tsdb/goversion" // Load the package into main to make sure minimum Go version is met.
"github.com/prometheus/prometheus/tsdb/tsdbutil"
"github.com/prometheus/prometheus/tsdb/wlog"
prom_runtime "github.com/prometheus/prometheus/util/runtime"
)
const (
@ -115,6 +116,11 @@ type Options struct {
// the current size of the database.
MaxBytes int64
// Maximum % of disk space to use for blocks to be retained
// 0 or less means disabled
// If both MaxBytes and MaxPercentage are set, percentage prevails
MaxPercentage uint
// NoLockfile disables creation and consideration of a lock file.
NoLockfile bool
@ -1753,11 +1759,32 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
// BeyondSizeRetention returns those blocks which are beyond the size retention
// set in the db options.
func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) {
// Size retention is disabled or no blocks to work with.
if len(blocks) == 0 || db.opts.MaxBytes <= 0 {
// No blocks to work with
if len(blocks) == 0 {
return
}
maxBytes := db.opts.MaxBytes
// percentage prevails
if db.opts.MaxPercentage > 0 {
// retrieve FS size
diskSize := prom_runtime.FsSize(db.dir)
if diskSize <= 0 {
db.logger.Warn("msg", "Unable to retrieve filesystem size of database directory (%s), skip percentage limitation and default to fixed size limitation", db.dir)
} else {
// apply percentage
maxBytes = int64(uint64(db.opts.MaxPercentage) * diskSize / 100)
}
}
// Size retention is disabled
if maxBytes <= 0 {
return
}
// update MaxBytes gauge
db.metrics.maxBytes.Set(float64(maxBytes))
deletable = make(map[ulid.ULID]struct{})
// Initializing size counter with WAL size and Head chunks
@ -1765,7 +1792,7 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
blocksSize := db.Head().Size()
for i, block := range blocks {
blocksSize += block.Size()
if blocksSize > db.opts.MaxBytes {
if blocksSize > maxBytes {
// Add this and all following blocks for deletion.
for _, b := range blocks[i:] {
deletable[b.meta.ULID] = struct{}{}

View file

@ -11,12 +11,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build openbsd || windows || netbsd || solaris
//go:build openbsd || netbsd || solaris
package runtime
// Statfs returns the file system type (Unix only)
// FsType returns the file system type (Unix only)
// syscall.Statfs_t isn't available on openbsd
func Statfs(path string) string {
func FsType(path string) string {
return "unknown"
}
// FsSize returns the file system size (Unix only)
// syscall.Statfs_t isn't available on openbsd
func FsSize(path string) uint64 {
return 0
}

View file

@ -20,8 +20,8 @@ import (
"syscall"
)
// Statfs returns the file system type (Unix only).
func Statfs(path string) string {
// FsType returns the file system type (Unix only).
func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[int64]string{
0xadf5: "ADFS_SUPER_MAGIC",
@ -67,6 +67,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@ -82,3 +83,13 @@ func Statfs(path string) string {
}
return strconv.FormatInt(localType, 16)
}
// FsSize returns the file system size (Unix only).
func FsSize(path string) uint64 {
var fs syscall.Statfs_t
err := syscall.Statfs(path, &fs)
if err != nil {
return 0
}
return uint64(fs.Bsize) * fs.Blocks
}

View file

@ -20,8 +20,8 @@ import (
"syscall"
)
// Statfs returns the file system type (Unix only)
func Statfs(path string) string {
// FsType returns the file system type (Unix only)
func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[int32]string{
0xadf5: "ADFS_SUPER_MAGIC",
@ -63,6 +63,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@ -75,3 +76,13 @@ func Statfs(path string) string {
}
return strconv.Itoa(int(fs.Type))
}
// FsSize returns the file system size (Unix only)
func FsSize(path string) uint64 {
var fs syscall.Statfs_t
err := syscall.Statfs(path, &fs)
if err != nil {
return 0
}
return uint64(fs.Bsize) * fs.Blocks
}

View file

@ -20,8 +20,8 @@ import (
"syscall"
)
// Statfs returns the file system type (Unix only)
func Statfs(path string) string {
// FsType returns the file system type (Unix only)
func FsType(path string) string {
// Types of file systems that may be returned by `statfs`
fsTypes := map[uint32]string{
0xadf5: "ADFS_SUPER_MAGIC",
@ -63,6 +63,7 @@ func Statfs(path string) string {
0x012FF7B4: "XENIX_SUPER_MAGIC",
0x58465342: "XFS_SUPER_MAGIC",
0x012FD16D: "_XIAFS_SUPER_MAGIC",
0x794c7630: "OVERLAYFS_SUPER_MAGIC",
}
var fs syscall.Statfs_t
@ -75,3 +76,13 @@ func Statfs(path string) string {
}
return strconv.Itoa(int(fs.Type))
}
// FsSize returns the file system size (Unix only)
func FsSize(path string) uint64 {
var fs syscall.Statfs_t
err := syscall.Statfs(path, &fs)
if err != nil {
return 0
}
return uint64(fs.Bsize) * fs.Blocks
}

View file

@ -0,0 +1,59 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !windows && !openbsd && !netbsd && !solaris
// +build !windows,!openbsd,!netbsd,!solaris
package runtime
import (
"os"
"testing"
"github.com/grafana/regexp"
"github.com/stretchr/testify/require"
)
var regexpFsType = regexp.MustCompile("^[A-Z][A-Z0-9_]*_MAGIC$")
func TestFsType(t *testing.T) {
var fsType string
path, err := os.Getwd()
require.NoError(t, err)
fsType = FsType(path)
require.Regexp(t, regexpFsType, fsType)
fsType = FsType("/no/where/to/be/found")
require.Equal(t, "0", fsType)
fsType = FsType(" %% not event a real path\n\n")
require.Equal(t, "0", fsType)
}
func TestFsSize(t *testing.T) {
var size uint64
path, err := os.Getwd()
require.NoError(t, err)
size = FsSize(path)
require.Positive(t, size)
size = FsSize("/no/where/to/be/found")
require.Equal(t, uint64(0), size)
size = FsSize(" %% not event a real path\n\n")
require.Equal(t, uint64(0), size)
}

View file

@ -0,0 +1,61 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
// +build windows
package runtime
import (
"os"
"syscall"
"unsafe"
"golang.org/x/sys/windows"
)
var (
dll = windows.MustLoadDLL("kernel32.dll")
getDiskFreeSpaceExW = dll.MustFindProc("GetDiskFreeSpaceExW")
)
// FsType returns the file system type (Unix only)
// syscall.Statfs_t isn't available on openbsd
func FsType(path string) string {
return "unknown"
}
// FsSize returns the file system size (Unix only)
// syscall.Statfs_t isn't available on openbsd
func FsSize(path string) uint64 {
// ensure the path exists
if _, err := os.Stat(path); err != nil {
return 0
}
var avail int64
var total int64
var free int64
// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getdiskfreespaceexa
ret, _, _ := getDiskFreeSpaceExW.Call(
uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))),
uintptr(unsafe.Pointer(&avail)),
uintptr(unsafe.Pointer(&total)),
uintptr(unsafe.Pointer(&free)))
if ret == 0 || uint64(free) > uint64(total) {
return 0
}
return uint64(total)
}

View file

@ -0,0 +1,50 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
// +build windows
package runtime
import (
"os"
"testing"
"github.com/stretchr/testify/require"
)
func TestFsType(t *testing.T) {
var fsType string
path, err := os.Getwd()
require.NoError(t, err)
fsType = FsType(path)
require.Equal(t, "unknown", fsType)
fsType = FsType("A:\\no\\where\\to\\be\\found")
require.Equal(t, "unknown", fsType)
}
func TestFsSize(t *testing.T) {
var size uint64
size = FsSize("C:\\")
require.Positive(t, size)
size = FsSize("c:\\no\\where\\to\\be\\found")
require.Equal(t, uint64(0), size)
size = FsSize(" %% not event a real path\n\n")
require.Equal(t, uint64(0), size)
}

View file

@ -257,6 +257,7 @@ type Options struct {
TSDBRetentionDuration model.Duration
TSDBDir string
TSDBMaxBytes units.Base2Bytes
TSDBMaxPercentage uint
LocalStorage LocalStorage
Storage storage.Storage
ExemplarStorage storage.ExemplarQueryable
@ -813,6 +814,12 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
}
status.StorageRetention += h.options.TSDBMaxBytes.String()
}
if h.options.TSDBMaxPercentage != 0 {
if status.StorageRetention != "" {
status.StorageRetention += " or "
}
status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%"
}
metrics, err := h.gatherer.Gather()
if err != nil {