mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
Add option to auto detect configuration changes.
Fixes https://github.com/prometheus/prometheus/issues/9783#issuecomment-1669341273 Signed-off-by: bwplotka <bwplotka@gmail.com>
This commit is contained in:
parent
8d47b3d497
commit
9254279be1
|
@ -47,6 +47,7 @@ import (
|
|||
promlogflag "github.com/prometheus/common/promlog/flag"
|
||||
"github.com/prometheus/common/version"
|
||||
toolkit_web "github.com/prometheus/exporter-toolkit/web"
|
||||
"github.com/prometheus/prometheus/util/fswatch"
|
||||
"go.uber.org/atomic"
|
||||
"go.uber.org/automaxprocs/maxprocs"
|
||||
"k8s.io/klog"
|
||||
|
@ -128,7 +129,9 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla
|
|||
}
|
||||
|
||||
type flagConfig struct {
|
||||
configFile string
|
||||
configFile string
|
||||
configWatchInterval model.Duration
|
||||
configWatchDelay model.Duration
|
||||
|
||||
agentStoragePath string
|
||||
serverStoragePath string
|
||||
|
@ -253,6 +256,19 @@ func main() {
|
|||
a.Flag("config.file", "Prometheus configuration file path.").
|
||||
Default("prometheus.yml").StringVar(&cfg.configFile)
|
||||
|
||||
a.Flag("config.watch-interval", "For non-zero duration, Prometheus will watch for configuration file changes, "+
|
||||
"as well as, previously specified rule changes and scrape config file changes. Once changes are noticed,"+
|
||||
"and after delay specified in --config.watch-delay, Prometheus will self-reload. "+
|
||||
"Change detection is done through filesystem inotify with the regular interval specified in the flag, as well as, checksum validation."+
|
||||
"With this flag, there is no need to reload Prometheus on configuration changes from the outside.").
|
||||
Default("0").SetValue(&cfg.configWatchInterval)
|
||||
|
||||
a.Flag("config.watch-delay", "The duration between noticing the configuration changes "+
|
||||
"and Prometheus self-reloading. Needed for throttling reloads which can be expensive."+
|
||||
"For automation that updates configuration it's common to update file one by one within seconds."+
|
||||
"Delay allows waiting some time to perform one reload for multiple small changes within short period").
|
||||
Default("30s").SetValue(&cfg.configWatchDelay)
|
||||
|
||||
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
|
||||
Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
|
||||
|
||||
|
@ -723,6 +739,18 @@ func main() {
|
|||
// This is passed to ruleManager.Update().
|
||||
externalURL := cfg.web.ExternalURL.String()
|
||||
|
||||
var configWatcher *fswatch.Watch
|
||||
if cfg.configWatchInterval > 0 {
|
||||
configWatcher = fswatch.New(
|
||||
prometheus.DefaultRegisterer,
|
||||
"config",
|
||||
logger,
|
||||
time.Duration(cfg.configWatchInterval),
|
||||
time.Duration(cfg.configWatchDelay),
|
||||
)
|
||||
configWatcher.AddFiles(context.TODO(), cfg.configFile)
|
||||
}
|
||||
|
||||
reloaders := []reloader{
|
||||
{
|
||||
name: "db_storage",
|
||||
|
@ -815,6 +843,44 @@ func main() {
|
|||
},
|
||||
}
|
||||
|
||||
if configWatcher != nil {
|
||||
reloaders = append(reloaders, reloader{
|
||||
name: configWatcher.Name(),
|
||||
reloader: func(c *config.Config) error {
|
||||
if agentMode {
|
||||
// No-op in Agent mode
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
if err := configWatcher.Reset(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := configWatcher.AddFiles(ctx, cfg.configFile); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get all rule files matching the configuration paths.
|
||||
var files []string
|
||||
for _, pat := range c.RuleFiles {
|
||||
fs, err := filepath.Glob(pat)
|
||||
if err != nil {
|
||||
// The only error can be a bad pattern.
|
||||
return fmt.Errorf("error retrieving rule files for %s: %w", pat, err)
|
||||
}
|
||||
files = append(files, fs...)
|
||||
}
|
||||
|
||||
if err := configWatcher.AddFiles(ctx, files...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Add scrape config files
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
prometheus.MustRegister(configSuccess)
|
||||
prometheus.MustRegister(configSuccessTime)
|
||||
|
||||
|
@ -941,6 +1007,20 @@ func main() {
|
|||
},
|
||||
)
|
||||
}
|
||||
// Optional configuration file watcher.
|
||||
if configWatcher != nil {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
g.Add(
|
||||
func() error {
|
||||
<-reloadReady.C
|
||||
configWatcher.Run(ctx)
|
||||
return nil
|
||||
},
|
||||
func(err error) {
|
||||
cancel()
|
||||
},
|
||||
)
|
||||
}
|
||||
{
|
||||
// Tracing manager.
|
||||
g.Add(
|
||||
|
@ -971,6 +1051,7 @@ func main() {
|
|||
case <-hup:
|
||||
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||
level.Error(logger).Log("msg", "Error reloading config", "err", err)
|
||||
// TODO: metric?
|
||||
}
|
||||
case rc := <-webHandler.Reload():
|
||||
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||
|
@ -979,6 +1060,11 @@ func main() {
|
|||
} else {
|
||||
rc <- nil
|
||||
}
|
||||
case <-configWatcher.FilesChanged():
|
||||
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||
level.Error(logger).Log("msg", "Error reloading config after watcher notification", "err", err)
|
||||
// TODO: metric?
|
||||
}
|
||||
case <-cancel:
|
||||
return nil
|
||||
}
|
||||
|
|
58
util/fswatch/fswatch.go
Normal file
58
util/fswatch/fswatch.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
package fswatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type Watch struct {
|
||||
name string
|
||||
logger log.Logger
|
||||
interval time.Duration
|
||||
delay time.Duration
|
||||
|
||||
// TODO: Close it, handle it.
|
||||
notifyCh chan struct{}
|
||||
}
|
||||
|
||||
func New(
|
||||
reg prometheus.Registerer,
|
||||
name string,
|
||||
logger log.Logger,
|
||||
interval time.Duration,
|
||||
delay time.Duration,
|
||||
) *Watch {
|
||||
// TODO: Add metrics
|
||||
|
||||
return &Watch{name: name, logger: logger, interval: interval, delay: delay, notifyCh: make(chan struct{}, 1)}
|
||||
}
|
||||
|
||||
// TODO(bwplotka): In future we could consider string slice channel to mention which files changed. For now
|
||||
// reload does not care (it reloads all).
|
||||
// Only one caller at the time can use this.
|
||||
func (w *Watch) FilesChanged() <-chan struct{} {
|
||||
if w == nil {
|
||||
return nil // A receive from a nil channel blocks forever.
|
||||
}
|
||||
return w.notifyCh
|
||||
}
|
||||
|
||||
func (w *Watch) Name() string { return w.name }
|
||||
|
||||
func (w *Watch) AddFiles(ctx context.Context, file ...string) error {
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (w *Watch) Reset(ctx context.Context) error {
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
// Run errors means run stopped working, it should be safe to restart by calling Run again.
|
||||
func (w *Watch) Run(ctx context.Context) error {
|
||||
// TODO: Copy the code from https://github.com/thanos-io/thanos/blob/main/pkg/reloader/reloader.go and adjust based on requirements
|
||||
return errors.New("not implemented")
|
||||
}
|
Loading…
Reference in a new issue