mirror of
https://github.com/prometheus/prometheus.git
synced 2024-11-09 23:24:05 -08:00
Add option to auto detect configuration changes.
Fixes https://github.com/prometheus/prometheus/issues/9783#issuecomment-1669341273 Signed-off-by: bwplotka <bwplotka@gmail.com>
This commit is contained in:
parent
8d47b3d497
commit
9254279be1
|
@ -47,6 +47,7 @@ import (
|
||||||
promlogflag "github.com/prometheus/common/promlog/flag"
|
promlogflag "github.com/prometheus/common/promlog/flag"
|
||||||
"github.com/prometheus/common/version"
|
"github.com/prometheus/common/version"
|
||||||
toolkit_web "github.com/prometheus/exporter-toolkit/web"
|
toolkit_web "github.com/prometheus/exporter-toolkit/web"
|
||||||
|
"github.com/prometheus/prometheus/util/fswatch"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
"go.uber.org/automaxprocs/maxprocs"
|
"go.uber.org/automaxprocs/maxprocs"
|
||||||
"k8s.io/klog"
|
"k8s.io/klog"
|
||||||
|
@ -129,6 +130,8 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla
|
||||||
|
|
||||||
type flagConfig struct {
|
type flagConfig struct {
|
||||||
configFile string
|
configFile string
|
||||||
|
configWatchInterval model.Duration
|
||||||
|
configWatchDelay model.Duration
|
||||||
|
|
||||||
agentStoragePath string
|
agentStoragePath string
|
||||||
serverStoragePath string
|
serverStoragePath string
|
||||||
|
@ -253,6 +256,19 @@ func main() {
|
||||||
a.Flag("config.file", "Prometheus configuration file path.").
|
a.Flag("config.file", "Prometheus configuration file path.").
|
||||||
Default("prometheus.yml").StringVar(&cfg.configFile)
|
Default("prometheus.yml").StringVar(&cfg.configFile)
|
||||||
|
|
||||||
|
a.Flag("config.watch-interval", "For non-zero duration, Prometheus will watch for configuration file changes, "+
|
||||||
|
"as well as, previously specified rule changes and scrape config file changes. Once changes are noticed,"+
|
||||||
|
"and after delay specified in --config.watch-delay, Prometheus will self-reload. "+
|
||||||
|
"Change detection is done through filesystem inotify with the regular interval specified in the flag, as well as, checksum validation."+
|
||||||
|
"With this flag, there is no need to reload Prometheus on configuration changes from the outside.").
|
||||||
|
Default("0").SetValue(&cfg.configWatchInterval)
|
||||||
|
|
||||||
|
a.Flag("config.watch-delay", "The duration between noticing the configuration changes "+
|
||||||
|
"and Prometheus self-reloading. Needed for throttling reloads which can be expensive."+
|
||||||
|
"For automation that updates configuration it's common to update file one by one within seconds."+
|
||||||
|
"Delay allows waiting some time to perform one reload for multiple small changes within short period").
|
||||||
|
Default("30s").SetValue(&cfg.configWatchDelay)
|
||||||
|
|
||||||
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
|
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
|
||||||
Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
|
Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
|
||||||
|
|
||||||
|
@ -723,6 +739,18 @@ func main() {
|
||||||
// This is passed to ruleManager.Update().
|
// This is passed to ruleManager.Update().
|
||||||
externalURL := cfg.web.ExternalURL.String()
|
externalURL := cfg.web.ExternalURL.String()
|
||||||
|
|
||||||
|
var configWatcher *fswatch.Watch
|
||||||
|
if cfg.configWatchInterval > 0 {
|
||||||
|
configWatcher = fswatch.New(
|
||||||
|
prometheus.DefaultRegisterer,
|
||||||
|
"config",
|
||||||
|
logger,
|
||||||
|
time.Duration(cfg.configWatchInterval),
|
||||||
|
time.Duration(cfg.configWatchDelay),
|
||||||
|
)
|
||||||
|
configWatcher.AddFiles(context.TODO(), cfg.configFile)
|
||||||
|
}
|
||||||
|
|
||||||
reloaders := []reloader{
|
reloaders := []reloader{
|
||||||
{
|
{
|
||||||
name: "db_storage",
|
name: "db_storage",
|
||||||
|
@ -815,6 +843,44 @@ func main() {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if configWatcher != nil {
|
||||||
|
reloaders = append(reloaders, reloader{
|
||||||
|
name: configWatcher.Name(),
|
||||||
|
reloader: func(c *config.Config) error {
|
||||||
|
if agentMode {
|
||||||
|
// No-op in Agent mode
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.TODO()
|
||||||
|
if err := configWatcher.Reset(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := configWatcher.AddFiles(ctx, cfg.configFile); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all rule files matching the configuration paths.
|
||||||
|
var files []string
|
||||||
|
for _, pat := range c.RuleFiles {
|
||||||
|
fs, err := filepath.Glob(pat)
|
||||||
|
if err != nil {
|
||||||
|
// The only error can be a bad pattern.
|
||||||
|
return fmt.Errorf("error retrieving rule files for %s: %w", pat, err)
|
||||||
|
}
|
||||||
|
files = append(files, fs...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := configWatcher.AddFiles(ctx, files...); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Add scrape config files
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
prometheus.MustRegister(configSuccess)
|
prometheus.MustRegister(configSuccess)
|
||||||
prometheus.MustRegister(configSuccessTime)
|
prometheus.MustRegister(configSuccessTime)
|
||||||
|
|
||||||
|
@ -941,6 +1007,20 @@ func main() {
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
// Optional configuration file watcher.
|
||||||
|
if configWatcher != nil {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
g.Add(
|
||||||
|
func() error {
|
||||||
|
<-reloadReady.C
|
||||||
|
configWatcher.Run(ctx)
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
func(err error) {
|
||||||
|
cancel()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
{
|
{
|
||||||
// Tracing manager.
|
// Tracing manager.
|
||||||
g.Add(
|
g.Add(
|
||||||
|
@ -971,6 +1051,7 @@ func main() {
|
||||||
case <-hup:
|
case <-hup:
|
||||||
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||||
level.Error(logger).Log("msg", "Error reloading config", "err", err)
|
level.Error(logger).Log("msg", "Error reloading config", "err", err)
|
||||||
|
// TODO: metric?
|
||||||
}
|
}
|
||||||
case rc := <-webHandler.Reload():
|
case rc := <-webHandler.Reload():
|
||||||
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||||
|
@ -979,6 +1060,11 @@ func main() {
|
||||||
} else {
|
} else {
|
||||||
rc <- nil
|
rc <- nil
|
||||||
}
|
}
|
||||||
|
case <-configWatcher.FilesChanged():
|
||||||
|
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
|
||||||
|
level.Error(logger).Log("msg", "Error reloading config after watcher notification", "err", err)
|
||||||
|
// TODO: metric?
|
||||||
|
}
|
||||||
case <-cancel:
|
case <-cancel:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
58
util/fswatch/fswatch.go
Normal file
58
util/fswatch/fswatch.go
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
package fswatch
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-kit/log"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Watch struct {
|
||||||
|
name string
|
||||||
|
logger log.Logger
|
||||||
|
interval time.Duration
|
||||||
|
delay time.Duration
|
||||||
|
|
||||||
|
// TODO: Close it, handle it.
|
||||||
|
notifyCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(
|
||||||
|
reg prometheus.Registerer,
|
||||||
|
name string,
|
||||||
|
logger log.Logger,
|
||||||
|
interval time.Duration,
|
||||||
|
delay time.Duration,
|
||||||
|
) *Watch {
|
||||||
|
// TODO: Add metrics
|
||||||
|
|
||||||
|
return &Watch{name: name, logger: logger, interval: interval, delay: delay, notifyCh: make(chan struct{}, 1)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(bwplotka): In future we could consider string slice channel to mention which files changed. For now
|
||||||
|
// reload does not care (it reloads all).
|
||||||
|
// Only one caller at the time can use this.
|
||||||
|
func (w *Watch) FilesChanged() <-chan struct{} {
|
||||||
|
if w == nil {
|
||||||
|
return nil // A receive from a nil channel blocks forever.
|
||||||
|
}
|
||||||
|
return w.notifyCh
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Watch) Name() string { return w.name }
|
||||||
|
|
||||||
|
func (w *Watch) AddFiles(ctx context.Context, file ...string) error {
|
||||||
|
return errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Watch) Reset(ctx context.Context) error {
|
||||||
|
return errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run errors means run stopped working, it should be safe to restart by calling Run again.
|
||||||
|
func (w *Watch) Run(ctx context.Context) error {
|
||||||
|
// TODO: Copy the code from https://github.com/thanos-io/thanos/blob/main/pkg/reloader/reloader.go and adjust based on requirements
|
||||||
|
return errors.New("not implemented")
|
||||||
|
}
|
Loading…
Reference in a new issue