From 68ba6c1ae5497ac47b227e14bb330a2f89d63814 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Mon, 1 Apr 2024 17:34:35 +0200 Subject: [PATCH] Add configuration option for GOGC Add the ability to adjust the `GOGC` variable from the Prometheus configuration file. * Create a new top-level `runtime` section in the config. * Adjust from the Go default of 100 to 50 to reduce wasted memory. * Use the `GOGC` env value if no configuraiton is used. Signed-off-by: SuperQ --- CHANGELOG.md | 3 +++ cmd/prometheus/main.go | 13 +++++++++ config/config.go | 42 +++++++++++++++++++++++++++++ config/config_default_test.go | 1 + config/config_test.go | 6 +++++ config/config_windows_test.go | 1 + config/testdata/conf.good.yml | 3 +++ docs/configuration/configuration.md | 6 +++++ 8 files changed, 75 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71a2195d3..6c2c9ae31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,10 @@ ## unreleased +This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 50. + * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 +* [CHANGE] Runtime: Change GOGC threshold from 100 to 50 #14176 * [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 0532bc380..8db2f2c5e 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -28,6 +28,8 @@ import ( "os/signal" "path/filepath" "runtime" + "runtime/debug" + "strconv" "strings" "sync" "syscall" @@ -1384,6 +1386,17 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) } + oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC) + if oldGoGC != conf.Runtime.GoGC { + level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) + } + // Write the new setting out to the ENV var for runtime API output. + if conf.Runtime.GoGC >= 0 { + os.Setenv("GOGC", strconv.Itoa(conf.Runtime.GoGC)) + } else { + os.Setenv("GOGC", "off") + } + noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} level.Info(logger).Log(append(l, timings...)...) diff --git a/config/config.go b/config/config.go index 463dbc357..90efaed83 100644 --- a/config/config.go +++ b/config/config.go @@ -20,6 +20,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "time" @@ -151,6 +152,11 @@ var ( ScrapeProtocols: DefaultScrapeProtocols, } + DefaultRuntimeConfig = RuntimeConfig{ + // Go runtime tuning. + GoGC: 50, + } + // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. @@ -225,6 +231,7 @@ var ( // Config is the top-level configuration for Prometheus's config files. type Config struct { GlobalConfig GlobalConfig `yaml:"global"` + Runtime RuntimeConfig `yaml:"runtime,omitempty"` AlertingConfig AlertingConfig `yaml:"alerting,omitempty"` RuleFiles []string `yaml:"rule_files,omitempty"` ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"` @@ -335,6 +342,14 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { c.GlobalConfig = DefaultGlobalConfig } + // If a runtime block was open but empty the default runtime config is overwritten. + // We have to restore it here. + if c.Runtime.isZero() { + c.Runtime = DefaultRuntimeConfig + // Use the GOGC env var value if the runtime section is empty. + c.Runtime.GoGC = getGoGCEnv() + } + for _, rf := range c.RuleFiles { if !patRulePath.MatchString(rf) { return fmt.Errorf("invalid rule file path %q", rf) @@ -564,6 +579,17 @@ func (c *GlobalConfig) isZero() bool { c.ScrapeProtocols == nil } +// RuntimeConfig configures the values for the process behavior. +type RuntimeConfig struct { + // The Go garbage collection target percentage. + GoGC int `yaml:"gogc,omitempty"` +} + +// isZero returns true iff the global config is the zero value. +func (c *RuntimeConfig) isZero() bool { + return c.GoGC == 0 +} + type ScrapeConfigs struct { ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` } @@ -1211,3 +1237,19 @@ func filePath(filename string) string { func fileErr(filename string, err error) error { return fmt.Errorf("%q: %w", filePath(filename), err) } + +func getGoGCEnv() int { + goGCEnv := os.Getenv("GOGC") + // If the GOGC env var is set, use the same logic as upstream Go. + if goGCEnv != "" { + // Special case for GOGC=off. + if strings.ToLower(goGCEnv) == "off" { + return -1 + } + i, err := strconv.Atoi(goGCEnv) + if err == nil { + return i + } + } + return DefaultRuntimeConfig.GoGC +} diff --git a/config/config_default_test.go b/config/config_default_test.go index 26623590d..31133f1e0 100644 --- a/config/config_default_test.go +++ b/config/config_default_test.go @@ -19,6 +19,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml" var ruleFilesExpectedConf = &Config{ GlobalConfig: DefaultGlobalConfig, + Runtime: DefaultRuntimeConfig, RuleFiles: []string{ "testdata/first.rules", "testdata/rules/second.rules", diff --git a/config/config_test.go b/config/config_test.go index 14981d25f..ff056a267 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -76,6 +76,7 @@ const ( globLabelLimit = 30 globLabelNameLengthLimit = 200 globLabelValueLengthLimit = 200 + globalGoGC = 42 ) var expectedConf = &Config{ @@ -96,6 +97,10 @@ var expectedConf = &Config{ ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, }, + Runtime: RuntimeConfig{ + GoGC: globalGoGC, + }, + RuleFiles: []string{ filepath.FromSlash("testdata/first.rules"), filepath.FromSlash("testdata/my/*.rules"), @@ -2081,6 +2086,7 @@ func TestEmptyGlobalBlock(t *testing.T) { c, err := Load("global:\n", false, log.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig + exp.Runtime = DefaultRuntimeConfig require.Equal(t, exp, *c) } diff --git a/config/config_windows_test.go b/config/config_windows_test.go index 7fd1d46f6..db4d46ef1 100644 --- a/config/config_windows_test.go +++ b/config/config_windows_test.go @@ -17,6 +17,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml" var ruleFilesExpectedConf = &Config{ GlobalConfig: DefaultGlobalConfig, + Runtime: DefaultRuntimeConfig, RuleFiles: []string{ "testdata\\first.rules", "testdata\\rules\\second.rules", diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index b58430164..184e6363c 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -14,6 +14,9 @@ global: monitor: codelab foo: bar +runtime: + gogc: 42 + rule_files: - "first.rules" - "my/*.rules" diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index a8fc9c626..26c088e13 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -121,6 +121,12 @@ global: # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] +runtime: + # Configure the Go garbage collector GOGC parameter + # See: https://tip.golang.org/doc/gc-guide#GOGC + # Lowering this number increases CPU usage. + [ gogc: | default = 50 ] + # Rule files specifies a list of globs. Rules and alerts are read from # all matching files. rule_files: