diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 5528358337..a3ad1745c1 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -154,6 +154,9 @@ type flagConfig struct {
RemoteFlushDeadline model.Duration
nameEscapingScheme string
+ enableAutoReload bool
+ autoReloadInterval model.Duration
+
featureList []string
memlimitRatio float64
// These options are extracted from featureList
@@ -212,6 +215,12 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "auto-gomaxprocs":
c.enableAutoGOMAXPROCS = true
level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota")
+ case "auto-reload-config":
+ c.enableAutoReload = true
+ if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 {
+ c.autoReloadInterval, _ = model.ParseDuration("1s")
+ }
+ level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval))
case "auto-gomemlimit":
c.enableAutoGOMEMLIMIT = true
level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit")
@@ -302,6 +311,9 @@ func main() {
a.Flag("config.file", "Prometheus configuration file path.").
Default("prometheus.yml").StringVar(&cfg.configFile)
+ a.Flag("config.auto-reload-interval", "Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes.").
+ Default("30s").SetValue(&cfg.autoReloadInterval)
+
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated.").
Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses)
@@ -492,7 +504,7 @@ func main() {
a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme)
- a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
+ a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig)
@@ -1129,6 +1141,15 @@ func main() {
hup := make(chan os.Signal, 1)
signal.Notify(hup, syscall.SIGHUP)
cancel := make(chan struct{})
+
+ var checksum string
+ if cfg.enableAutoReload {
+ checksum, err = config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err)
+ }
+ }
+
g.Add(
func() error {
<-reloadReady.C
@@ -1138,6 +1159,12 @@ func main() {
case <-hup:
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
+ } else if cfg.enableAutoReload {
+ if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
+ checksum = currentChecksum
+ } else {
+ level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
+ }
}
case rc := <-webHandler.Reload():
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
@@ -1145,6 +1172,32 @@ func main() {
rc <- err
} else {
rc <- nil
+ if cfg.enableAutoReload {
+ if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
+ checksum = currentChecksum
+ } else {
+ level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
+ }
+ }
+ }
+ case <-time.Tick(time.Duration(cfg.autoReloadInterval)):
+ if !cfg.enableAutoReload {
+ continue
+ }
+ currentChecksum, err := config.GenerateChecksum(cfg.configFile)
+ if err != nil {
+ level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
+ continue
+ }
+ if currentChecksum == checksum {
+ continue
+ }
+ level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.")
+
+ if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
+ level.Error(logger).Log("msg", "Error reloading config", "err", err)
+ } else {
+ checksum = currentChecksum
}
case <-cancel:
return nil
diff --git a/config/reload.go b/config/reload.go
new file mode 100644
index 0000000000..8be1b28d8a
--- /dev/null
+++ b/config/reload.go
@@ -0,0 +1,92 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "gopkg.in/yaml.v2"
+)
+
+type ExternalFilesConfig struct {
+ RuleFiles []string `yaml:"rule_files"`
+ ScrapeConfigFiles []string `yaml:"scrape_config_files"`
+}
+
+// GenerateChecksum generates a checksum of the YAML file and the files it references.
+func GenerateChecksum(yamlFilePath string) (string, error) {
+ hash := sha256.New()
+
+ yamlContent, err := os.ReadFile(yamlFilePath)
+ if err != nil {
+ return "", fmt.Errorf("error reading YAML file: %w", err)
+ }
+ _, err = hash.Write(yamlContent)
+ if err != nil {
+ return "", fmt.Errorf("error writing YAML file to hash: %w", err)
+ }
+
+ var config ExternalFilesConfig
+ if err := yaml.Unmarshal(yamlContent, &config); err != nil {
+ return "", fmt.Errorf("error unmarshalling YAML: %w", err)
+ }
+
+ dir := filepath.Dir(yamlFilePath)
+
+ for i, file := range config.RuleFiles {
+ config.RuleFiles[i] = filepath.Join(dir, file)
+ }
+ for i, file := range config.ScrapeConfigFiles {
+ config.ScrapeConfigFiles[i] = filepath.Join(dir, file)
+ }
+
+ files := map[string][]string{
+ "r": config.RuleFiles, // "r" for rule files
+ "s": config.ScrapeConfigFiles, // "s" for scrape config files
+ }
+
+ for _, prefix := range []string{"r", "s"} {
+ for _, pattern := range files[prefix] {
+ matchingFiles, err := filepath.Glob(pattern)
+ if err != nil {
+ return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err)
+ }
+
+ for _, file := range matchingFiles {
+ // Write prefix to the hash ("r" or "s") followed by \0, then
+ // the file path.
+ _, err = hash.Write([]byte(prefix + "\x00" + file + "\x00"))
+ if err != nil {
+ return "", fmt.Errorf("error writing %q path to hash: %w", file, err)
+ }
+
+ // Read and hash the content of the file.
+ content, err := os.ReadFile(file)
+ if err != nil {
+ return "", fmt.Errorf("error reading file %s: %w", file, err)
+ }
+ _, err = hash.Write(append(content, []byte("\x00")...))
+ if err != nil {
+ return "", fmt.Errorf("error writing %q content to hash: %w", file, err)
+ }
+ }
+ }
+ }
+
+ return hex.EncodeToString(hash.Sum(nil)), nil
+}
diff --git a/config/reload_test.go b/config/reload_test.go
new file mode 100644
index 0000000000..f0f44f3588
--- /dev/null
+++ b/config/reload_test.go
@@ -0,0 +1,222 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestGenerateChecksum(t *testing.T) {
+ tmpDir := t.TempDir()
+
+ // Define paths for the temporary files.
+ yamlFilePath := filepath.Join(tmpDir, "test.yml")
+ ruleFilePath := filepath.Join(tmpDir, "rule_file.yml")
+ scrapeConfigFilePath := filepath.Join(tmpDir, "scrape_config.yml")
+
+ // Define initial and modified content for the files.
+ originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert"
+ modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert"
+
+ originalScrapeConfigContent := "scrape_configs:\n- job_name: example"
+ modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example"
+
+ // Define YAML content referencing the rule and scrape config files.
+ yamlContent := `
+rule_files:
+ - rule_file.yml
+scrape_config_files:
+ - scrape_config.yml
+`
+
+ // Write initial content to files.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Generate the original checksum.
+ originalChecksum := calculateChecksum(t, yamlFilePath)
+
+ t.Run("Rule File Change", func(t *testing.T) {
+ // Modify the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Change", func(t *testing.T) {
+ // Modify the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Rule File Deletion", func(t *testing.T) {
+ // Delete the rule file.
+ require.NoError(t, os.Remove(ruleFilePath))
+
+ // Checksum should change.
+ deletedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, deletedChecksum)
+
+ // Restore the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Deletion", func(t *testing.T) {
+ // Delete the scrape config file.
+ require.NoError(t, os.Remove(scrapeConfigFilePath))
+
+ // Checksum should change.
+ deletedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, deletedChecksum)
+
+ // Restore the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Main File Change", func(t *testing.T) {
+ // Modify the main YAML file.
+ modifiedYamlContent := `
+global:
+ scrape_interval: 3s
+rule_files:
+ - rule_file.yml
+scrape_config_files:
+ - scrape_config.yml
+`
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the main YAML file.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Rule File Removed from YAML Config", func(t *testing.T) {
+ // Modify the YAML content to remove the rule file.
+ modifiedYamlContent := `
+scrape_config_files:
+ - scrape_config.yml
+`
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the YAML content.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) {
+ // Modify the YAML content to remove the scrape config file.
+ modifiedYamlContent := `
+rule_files:
+ - rule_file.yml
+`
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
+
+ // Checksum should change.
+ modifiedChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, modifiedChecksum)
+
+ // Revert the YAML content.
+ require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Empty Rule File", func(t *testing.T) {
+ // Write an empty rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644))
+
+ // Checksum should change.
+ emptyChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, emptyChecksum)
+
+ // Restore the rule file.
+ require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+
+ t.Run("Empty Scrape Config File", func(t *testing.T) {
+ // Write an empty scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644))
+
+ // Checksum should change.
+ emptyChecksum := calculateChecksum(t, yamlFilePath)
+ require.NotEqual(t, originalChecksum, emptyChecksum)
+
+ // Restore the scrape config file.
+ require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
+
+ // Checksum should return to the original.
+ revertedChecksum := calculateChecksum(t, yamlFilePath)
+ require.Equal(t, originalChecksum, revertedChecksum)
+ })
+}
+
+// calculateChecksum generates a checksum for the given YAML file path.
+func calculateChecksum(t *testing.T, yamlFilePath string) string {
+ checksum, err := GenerateChecksum(yamlFilePath)
+ require.NoError(t, err)
+ require.NotEmpty(t, checksum)
+ return checksum
+}
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index 7d9e5a3c80..8fefa8ecc9 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -15,6 +15,7 @@ The Prometheus monitoring server
| -h
, --help
| Show context-sensitive help (also try --help-long and --help-man). | |
| --version
| Show application version. | |
| --config.file
| Prometheus configuration file path. | `prometheus.yml` |
+| --config.auto-reload-interval
| Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` |
| --web.listen-address
... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` |
| --auto-gomemlimit.ratio
| The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` |
| --web.config.file
| [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | |
@@ -57,7 +58,7 @@ The Prometheus monitoring server
| --query.max-concurrency
| Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| --query.max-samples
| Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
| --scrape.name-escaping-scheme
| Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` |
-| --enable-feature
... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
+| --enable-feature
... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| --log.level
| Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| --log.format
| Output format of log messages. One of: [logfmt, json] | `logfmt` |
diff --git a/docs/feature_flags.md b/docs/feature_flags.md
index 7b07a04d0e..1ef9efb9b1 100644
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@@ -265,3 +265,15 @@ This allows optionally preserving the `__name__` label via the `label_replace` a
When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set.
By itself, this flag does not enable the request of UTF-8 names via content negotiation.
Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis.
+
+## Auto Reload Config
+
+`--enable-feature=auto-reload-config`
+
+When enabled, Prometheus will automatically reload its configuration file at a
+specified interval. The interval is defined by the
+`--config.auto-reload-interval` flag, which defaults to `30s`.
+
+Configuration reloads are triggered by detecting changes in the checksum of the
+main configuration file or any referenced files, such as rule and scrape
+configurations.