Add configuration option for GOGC

Add the ability to adjust the `GOGC` variable from the Prometheus
configuration file.
* Create a new top-level `runtime` section in the config.
* Adjust from the Go default of 100 to 50 to reduce wasted memory.
* Use the `GOGC` env value if no configuraiton is used.

Signed-off-by: SuperQ <superq@gmail.com>
This commit is contained in:
SuperQ 2024-04-01 17:34:35 +02:00
parent efbd6e41c5
commit 68ba6c1ae5
No known key found for this signature in database
GPG key ID: C646B23C9E3245F1
8 changed files with 75 additions and 0 deletions

View file

@ -2,7 +2,10 @@
## unreleased ## unreleased
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 50.
* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980
* [CHANGE] Runtime: Change GOGC threshold from 100 to 50 #14176
* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 * [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061
* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974
* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991

View file

@ -28,6 +28,8 @@ import (
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"runtime" "runtime"
"runtime/debug"
"strconv"
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
@ -1384,6 +1386,17 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
} }
oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC)
if oldGoGC != conf.Runtime.GoGC {
level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
}
// Write the new setting out to the ENV var for runtime API output.
if conf.Runtime.GoGC >= 0 {
os.Setenv("GOGC", strconv.Itoa(conf.Runtime.GoGC))
} else {
os.Setenv("GOGC", "off")
}
noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval)
l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)}
level.Info(logger).Log(append(l, timings...)...) level.Info(logger).Log(append(l, timings...)...)

View file

@ -20,6 +20,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"sort" "sort"
"strconv"
"strings" "strings"
"time" "time"
@ -151,6 +152,11 @@ var (
ScrapeProtocols: DefaultScrapeProtocols, ScrapeProtocols: DefaultScrapeProtocols,
} }
DefaultRuntimeConfig = RuntimeConfig{
// Go runtime tuning.
GoGC: 50,
}
// DefaultScrapeConfig is the default scrape configuration. // DefaultScrapeConfig is the default scrape configuration.
DefaultScrapeConfig = ScrapeConfig{ DefaultScrapeConfig = ScrapeConfig{
// ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
@ -225,6 +231,7 @@ var (
// Config is the top-level configuration for Prometheus's config files. // Config is the top-level configuration for Prometheus's config files.
type Config struct { type Config struct {
GlobalConfig GlobalConfig `yaml:"global"` GlobalConfig GlobalConfig `yaml:"global"`
Runtime RuntimeConfig `yaml:"runtime,omitempty"`
AlertingConfig AlertingConfig `yaml:"alerting,omitempty"` AlertingConfig AlertingConfig `yaml:"alerting,omitempty"`
RuleFiles []string `yaml:"rule_files,omitempty"` RuleFiles []string `yaml:"rule_files,omitempty"`
ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"` ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"`
@ -335,6 +342,14 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
c.GlobalConfig = DefaultGlobalConfig c.GlobalConfig = DefaultGlobalConfig
} }
// If a runtime block was open but empty the default runtime config is overwritten.
// We have to restore it here.
if c.Runtime.isZero() {
c.Runtime = DefaultRuntimeConfig
// Use the GOGC env var value if the runtime section is empty.
c.Runtime.GoGC = getGoGCEnv()
}
for _, rf := range c.RuleFiles { for _, rf := range c.RuleFiles {
if !patRulePath.MatchString(rf) { if !patRulePath.MatchString(rf) {
return fmt.Errorf("invalid rule file path %q", rf) return fmt.Errorf("invalid rule file path %q", rf)
@ -564,6 +579,17 @@ func (c *GlobalConfig) isZero() bool {
c.ScrapeProtocols == nil c.ScrapeProtocols == nil
} }
// RuntimeConfig configures the values for the process behavior.
type RuntimeConfig struct {
// The Go garbage collection target percentage.
GoGC int `yaml:"gogc,omitempty"`
}
// isZero returns true iff the global config is the zero value.
func (c *RuntimeConfig) isZero() bool {
return c.GoGC == 0
}
type ScrapeConfigs struct { type ScrapeConfigs struct {
ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"`
} }
@ -1211,3 +1237,19 @@ func filePath(filename string) string {
func fileErr(filename string, err error) error { func fileErr(filename string, err error) error {
return fmt.Errorf("%q: %w", filePath(filename), err) return fmt.Errorf("%q: %w", filePath(filename), err)
} }
func getGoGCEnv() int {
goGCEnv := os.Getenv("GOGC")
// If the GOGC env var is set, use the same logic as upstream Go.
if goGCEnv != "" {
// Special case for GOGC=off.
if strings.ToLower(goGCEnv) == "off" {
return -1
}
i, err := strconv.Atoi(goGCEnv)
if err == nil {
return i
}
}
return DefaultRuntimeConfig.GoGC
}

View file

@ -19,6 +19,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
var ruleFilesExpectedConf = &Config{ var ruleFilesExpectedConf = &Config{
GlobalConfig: DefaultGlobalConfig, GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{ RuleFiles: []string{
"testdata/first.rules", "testdata/first.rules",
"testdata/rules/second.rules", "testdata/rules/second.rules",

View file

@ -76,6 +76,7 @@ const (
globLabelLimit = 30 globLabelLimit = 30
globLabelNameLengthLimit = 200 globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200 globLabelValueLengthLimit = 200
globalGoGC = 42
) )
var expectedConf = &Config{ var expectedConf = &Config{
@ -96,6 +97,10 @@ var expectedConf = &Config{
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
}, },
Runtime: RuntimeConfig{
GoGC: globalGoGC,
},
RuleFiles: []string{ RuleFiles: []string{
filepath.FromSlash("testdata/first.rules"), filepath.FromSlash("testdata/first.rules"),
filepath.FromSlash("testdata/my/*.rules"), filepath.FromSlash("testdata/my/*.rules"),
@ -2081,6 +2086,7 @@ func TestEmptyGlobalBlock(t *testing.T) {
c, err := Load("global:\n", false, log.NewNopLogger()) c, err := Load("global:\n", false, log.NewNopLogger())
require.NoError(t, err) require.NoError(t, err)
exp := DefaultConfig exp := DefaultConfig
exp.Runtime = DefaultRuntimeConfig
require.Equal(t, exp, *c) require.Equal(t, exp, *c)
} }

View file

@ -17,6 +17,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
var ruleFilesExpectedConf = &Config{ var ruleFilesExpectedConf = &Config{
GlobalConfig: DefaultGlobalConfig, GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{ RuleFiles: []string{
"testdata\\first.rules", "testdata\\first.rules",
"testdata\\rules\\second.rules", "testdata\\rules\\second.rules",

View file

@ -14,6 +14,9 @@ global:
monitor: codelab monitor: codelab
foo: bar foo: bar
runtime:
gogc: 42
rule_files: rule_files:
- "first.rules" - "first.rules"
- "my/*.rules" - "my/*.rules"

View file

@ -121,6 +121,12 @@ global:
# that will be kept in memory. 0 means no limit. # that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ] [ keep_dropped_targets: <int> | default = 0 ]
runtime:
# Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC
# Lowering this number increases CPU usage.
[ gogc: <int> | default = 50 ]
# Rule files specifies a list of globs. Rules and alerts are read from # Rule files specifies a list of globs. Rules and alerts are read from
# all matching files. # all matching files.
rule_files: rule_files: