diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 066b319e3..6df37900c 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -468,6 +468,14 @@ func main() { level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } + if _, err := cfgFile.GetScrapeConfigs(); err != nil { + absPath, pathErr := filepath.Abs(cfg.configFile) + if pathErr != nil { + absPath = cfg.configFile + } + level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) + os.Exit(2) + } if cfg.tsdb.EnableExemplarStorage { if cfgFile.StorageConfig.ExemplarsConfig == nil { cfgFile.StorageConfig.ExemplarsConfig = &config.DefaultExemplarsConfig @@ -730,7 +738,11 @@ func main() { name: "scrape_sd", reloader: func(cfg *config.Config) error { c := make(map[string]discovery.Configs) - for _, v := range cfg.ScrapeConfigs { + scfgs, err := cfg.GetScrapeConfigs() + if err != nil { + return err + } + for _, v := range scfgs { c[v.JobName] = v.ServiceDiscoveryConfigs } return discoveryManagerScrape.ApplyConfig(c) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 81c90b4ec..2799cfb8a 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -463,7 +463,18 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin } } - for _, scfg := range cfg.ScrapeConfigs { + var scfgs []*config.ScrapeConfig + if checkSyntaxOnly { + scfgs = cfg.ScrapeConfigs + } else { + var err error + scfgs, err = cfg.GetScrapeConfigs() + if err != nil { + return nil, fmt.Errorf("error loading scrape configs: %w", err) + } + } + + for _, scfg := range scfgs { if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil { if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil { return nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err) diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index 981a67af1..e0fa5f191 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -47,9 +47,15 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault } var scrapeConfig *config.ScrapeConfig + scfgs, err := cfg.GetScrapeConfigs() + if err != nil { + fmt.Fprintln(os.Stderr, "Cannot load scrape configs", err) + return failureExitCode + } + jobs := []string{} jobMatched := false - for _, v := range cfg.ScrapeConfigs { + for _, v := range scfgs { jobs = append(jobs, v.JobName) if v.JobName == sdJobName { jobMatched = true diff --git a/config/config.go b/config/config.go index 8bc4bf34a..a29c98eed 100644 --- a/config/config.go +++ b/config/config.go @@ -216,12 +216,13 @@ var ( // Config is the top-level configuration for Prometheus's config files. type Config struct { - GlobalConfig GlobalConfig `yaml:"global"` - AlertingConfig AlertingConfig `yaml:"alerting,omitempty"` - RuleFiles []string `yaml:"rule_files,omitempty"` - ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` - StorageConfig StorageConfig `yaml:"storage,omitempty"` - TracingConfig TracingConfig `yaml:"tracing,omitempty"` + GlobalConfig GlobalConfig `yaml:"global"` + AlertingConfig AlertingConfig `yaml:"alerting,omitempty"` + RuleFiles []string `yaml:"rule_files,omitempty"` + ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"` + ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` + StorageConfig StorageConfig `yaml:"storage,omitempty"` + TracingConfig TracingConfig `yaml:"tracing,omitempty"` RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"` RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"` @@ -235,6 +236,9 @@ func (c *Config) SetDirectory(dir string) { for i, file := range c.RuleFiles { c.RuleFiles[i] = config.JoinDir(dir, file) } + for i, file := range c.ScrapeConfigFiles { + c.ScrapeConfigFiles[i] = config.JoinDir(dir, file) + } for _, c := range c.ScrapeConfigs { c.SetDirectory(dir) } @@ -254,6 +258,58 @@ func (c Config) String() string { return string(b) } +// ScrapeConfigs returns the scrape configurations. +func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) { + scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs)) + + jobNames := map[string]string{} + for i, scfg := range c.ScrapeConfigs { + // We do these checks for library users that would not call Validate in + // Unmarshal. + if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil { + return nil, err + } + + if _, ok := jobNames[scfg.JobName]; ok { + return nil, fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName) + } + jobNames[scfg.JobName] = "main config file" + scfgs[i] = scfg + } + for _, pat := range c.ScrapeConfigFiles { + fs, err := filepath.Glob(pat) + if err != nil { + // The only error can be a bad pattern. + return nil, fmt.Errorf("error retrieving scrape config files for %q: %w", pat, err) + } + for _, filename := range fs { + cfg := ScrapeConfigs{} + content, err := os.ReadFile(filename) + if err != nil { + return nil, fileErr(filename, err) + } + err = yaml.UnmarshalStrict(content, &cfg) + if err != nil { + return nil, fileErr(filename, err) + } + for _, scfg := range cfg.ScrapeConfigs { + if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil { + return nil, fileErr(filename, err) + } + + if f, ok := jobNames[scfg.JobName]; ok { + return nil, fileErr(filename, fmt.Errorf("found multiple scrape configs with job name %q, first found in %s", scfg.JobName, f)) + } + jobNames[scfg.JobName] = fmt.Sprintf("%q", filePath(filename)) + + scfg.SetDirectory(filepath.Dir(filename)) + scfgs = append(scfgs, scfg) + } + } + } + return scfgs, nil +} + // UnmarshalYAML implements the yaml.Unmarshaler interface. func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { *c = DefaultConfig @@ -276,26 +332,18 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { return fmt.Errorf("invalid rule file path %q", rf) } } + + for _, sf := range c.ScrapeConfigFiles { + if !patRulePath.MatchString(sf) { + return fmt.Errorf("invalid scrape config file path %q", sf) + } + } + // Do global overrides and validate unique names. jobNames := map[string]struct{}{} for _, scfg := range c.ScrapeConfigs { - if scfg == nil { - return errors.New("empty or null scrape config section") - } - // First set the correct scrape interval, then check that the timeout - // (inferred or explicit) is not greater than that. - if scfg.ScrapeInterval == 0 { - scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval - } - if scfg.ScrapeTimeout > scfg.ScrapeInterval { - return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName) - } - if scfg.ScrapeTimeout == 0 { - if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval { - scfg.ScrapeTimeout = scfg.ScrapeInterval - } else { - scfg.ScrapeTimeout = c.GlobalConfig.ScrapeTimeout - } + if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil { + return err } if _, ok := jobNames[scfg.JobName]; ok { @@ -401,6 +449,10 @@ func (c *GlobalConfig) isZero() bool { c.QueryLogFile == "" } +type ScrapeConfigs struct { + ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"` +} + // ScrapeConfig configures a scraping unit for Prometheus. type ScrapeConfig struct { // The job name to which the job label is set by default. @@ -494,6 +546,28 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return nil } +func (c *ScrapeConfig) Validate(defaultInterval, defaultTimeout model.Duration) error { + if c == nil { + return errors.New("empty or null scrape config section") + } + // First set the correct scrape interval, then check that the timeout + // (inferred or explicit) is not greater than that. + if c.ScrapeInterval == 0 { + c.ScrapeInterval = defaultInterval + } + if c.ScrapeTimeout > c.ScrapeInterval { + return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", c.JobName) + } + if c.ScrapeTimeout == 0 { + if defaultTimeout > c.ScrapeInterval { + c.ScrapeTimeout = c.ScrapeInterval + } else { + c.ScrapeTimeout = defaultTimeout + } + } + return nil +} + // MarshalYAML implements the yaml.Marshaler interface. func (c *ScrapeConfig) MarshalYAML() (interface{}, error) { return discovery.MarshalYAMLWithInlineConfigs(c) @@ -936,3 +1010,15 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro // Thus we just do its validation here. return c.HTTPClientConfig.Validate() } + +func filePath(filename string) string { + absPath, err := filepath.Abs(filename) + if err != nil { + return filename + } + return absPath +} + +func fileErr(filename string, err error) error { + return fmt.Errorf("%q: %w", filePath(filename), err) +} diff --git a/config/config_test.go b/config/config_test.go index 26f257ea2..3ee327c5f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1693,6 +1693,10 @@ var expectedErrors = []struct { filename: "ovhcloud_bad_service.bad.yml", errMsg: "unknown service: fakeservice", }, + { + filename: "scrape_config_files_glob.bad.yml", + errMsg: `parsing YAML file testdata/scrape_config_files_glob.bad.yml: invalid scrape config file path "scrape_configs/*/*"`, + }, } func TestBadConfigs(t *testing.T) { @@ -1779,6 +1783,156 @@ func TestEmptyGlobalBlock(t *testing.T) { require.Equal(t, exp, *c) } +func TestGetScrapeConfigs(t *testing.T) { + sc := func(jobName string, scrapeInterval, scrapeTimeout model.Duration) *ScrapeConfig { + return &ScrapeConfig{ + JobName: jobName, + HonorTimestamps: true, + ScrapeInterval: scrapeInterval, + ScrapeTimeout: scrapeTimeout, + MetricsPath: "/metrics", + Scheme: "http", + HTTPClientConfig: config.DefaultHTTPClientConfig, + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: "localhost:8080", + }, + }, + Source: "0", + }, + }, + }, + } + } + + testCases := []struct { + name string + configFile string + expectedResult []*ScrapeConfig + expectedError string + }{ + { + name: "An included config file should be a valid global config.", + configFile: "testdata/scrape_config_files.good.yml", + expectedResult: []*ScrapeConfig{sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second))}, + }, + { + name: "An global config that only include a scrape config file.", + configFile: "testdata/scrape_config_files_only.good.yml", + expectedResult: []*ScrapeConfig{sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second))}, + }, + { + name: "An global config that combine scrape config files and scrape configs.", + configFile: "testdata/scrape_config_files_combined.good.yml", + expectedResult: []*ScrapeConfig{ + sc("node", model.Duration(60*time.Second), model.Duration(10*time.Second)), + sc("prometheus", model.Duration(60*time.Second), model.Duration(10*time.Second)), + sc("alertmanager", model.Duration(60*time.Second), model.Duration(10*time.Second)), + }, + }, + { + name: "An global config that includes a scrape config file with globs", + configFile: "testdata/scrape_config_files_glob.good.yml", + expectedResult: []*ScrapeConfig{ + { + JobName: "prometheus", + + HonorTimestamps: true, + ScrapeInterval: model.Duration(60 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ + CertFile: filepath.FromSlash("testdata/scrape_configs/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/scrape_configs/valid_key_file"), + }, + FollowRedirects: true, + EnableHTTP2: true, + }, + + ServiceDiscoveryConfigs: discovery.Configs{ + discovery.StaticConfig{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: "localhost:8080"}, + }, + Source: "0", + }, + }, + }, + }, + { + JobName: "node", + + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HTTPClientConfig: config.HTTPClientConfig{ + TLSConfig: config.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), + }, + FollowRedirects: true, + EnableHTTP2: true, + }, + + MetricsPath: DefaultScrapeConfig.MetricsPath, + Scheme: DefaultScrapeConfig.Scheme, + + ServiceDiscoveryConfigs: discovery.Configs{ + &vultr.SDConfig{ + HTTPClientConfig: config.HTTPClientConfig{ + Authorization: &config.Authorization{ + Type: "Bearer", + Credentials: "abcdef", + }, + FollowRedirects: true, + EnableHTTP2: true, + }, + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), + }, + }, + }, + }, + }, + { + name: "An global config that includes twice the same scrape configs.", + configFile: "testdata/scrape_config_files_double_import.bad.yml", + expectedError: `found multiple scrape configs with job name "prometheus"`, + }, + { + name: "An global config that includes a scrape config identical to a scrape config in the main file.", + configFile: "testdata/scrape_config_files_duplicate.bad.yml", + expectedError: `found multiple scrape configs with job name "prometheus"`, + }, + { + name: "An global config that includes a scrape config file with errors.", + configFile: "testdata/scrape_config_files_global.bad.yml", + expectedError: `scrape timeout greater than scrape interval for scrape config with job name "prometheus"`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger()) + require.NoError(t, err) + + scfgs, err := c.GetScrapeConfigs() + if len(tc.expectedError) > 0 { + require.ErrorContains(t, err, tc.expectedError) + } + require.Equal(t, tc.expectedResult, scfgs) + }) + } +} + func kubernetesSDHostURL() config.URL { tURL, _ := url.Parse("https://localhost:1234") return config.URL{URL: tURL} diff --git a/config/testdata/scrape_config_files.bad.yml b/config/testdata/scrape_config_files.bad.yml new file mode 100644 index 000000000..1b7690899 --- /dev/null +++ b/config/testdata/scrape_config_files.bad.yml @@ -0,0 +1,6 @@ +scrape_configs: + - job_name: prometheus + scrape_interval: 10s + scrape_timeout: 20s + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files.good.yml b/config/testdata/scrape_config_files.good.yml new file mode 100644 index 000000000..5a0c2df9d --- /dev/null +++ b/config/testdata/scrape_config_files.good.yml @@ -0,0 +1,4 @@ +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files2.good.yml b/config/testdata/scrape_config_files2.good.yml new file mode 100644 index 000000000..f10bdf06e --- /dev/null +++ b/config/testdata/scrape_config_files2.good.yml @@ -0,0 +1,4 @@ +scrape_configs: + - job_name: alertmanager + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_combined.good.yml b/config/testdata/scrape_config_files_combined.good.yml new file mode 100644 index 000000000..ee3b0909c --- /dev/null +++ b/config/testdata/scrape_config_files_combined.good.yml @@ -0,0 +1,7 @@ +scrape_config_files: + - scrape_config_files.good.yml + - scrape_config_files2.good.yml +scrape_configs: + - job_name: node + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_double_import.bad.yml b/config/testdata/scrape_config_files_double_import.bad.yml new file mode 100644 index 000000000..b153d7f4b --- /dev/null +++ b/config/testdata/scrape_config_files_double_import.bad.yml @@ -0,0 +1,3 @@ +scrape_config_files: + - scrape_config_files.good.yml + - scrape_config_files.good.yml diff --git a/config/testdata/scrape_config_files_duplicate.bad.yml b/config/testdata/scrape_config_files_duplicate.bad.yml new file mode 100644 index 000000000..ceac36eaa --- /dev/null +++ b/config/testdata/scrape_config_files_duplicate.bad.yml @@ -0,0 +1,6 @@ +scrape_config_files: + - scrape_config_files.good.yml +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_glob.bad.yml b/config/testdata/scrape_config_files_glob.bad.yml new file mode 100644 index 000000000..f3cd63676 --- /dev/null +++ b/config/testdata/scrape_config_files_glob.bad.yml @@ -0,0 +1,6 @@ +scrape_config_files: + - scrape_configs/*/* +scrape_configs: + - job_name: node + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_glob.good.yml b/config/testdata/scrape_config_files_glob.good.yml new file mode 100644 index 000000000..d72404503 --- /dev/null +++ b/config/testdata/scrape_config_files_glob.good.yml @@ -0,0 +1,2 @@ +scrape_config_files: + - scrape_configs/*.yml diff --git a/config/testdata/scrape_config_files_global.bad.yml b/config/testdata/scrape_config_files_global.bad.yml new file mode 100644 index 000000000..b825fa62c --- /dev/null +++ b/config/testdata/scrape_config_files_global.bad.yml @@ -0,0 +1,2 @@ +scrape_config_files: + - scrape_config_files.bad.yml diff --git a/config/testdata/scrape_config_files_global_duplicate.bad.yml b/config/testdata/scrape_config_files_global_duplicate.bad.yml new file mode 100644 index 000000000..b6ec1a7fd --- /dev/null +++ b/config/testdata/scrape_config_files_global_duplicate.bad.yml @@ -0,0 +1,11 @@ +global: + scrape_interval: 15s + +scrape_config_files: + - scrape_config_files.good.yml + - scrape_config_files.good.yml + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_only.good.yml b/config/testdata/scrape_config_files_only.good.yml new file mode 100644 index 000000000..9f2711d51 --- /dev/null +++ b/config/testdata/scrape_config_files_only.good.yml @@ -0,0 +1,2 @@ +scrape_config_files: + - scrape_config_files.good.yml diff --git a/config/testdata/scrape_configs/scrape_config_files1.good.yml b/config/testdata/scrape_configs/scrape_config_files1.good.yml new file mode 100644 index 000000000..b0a05c532 --- /dev/null +++ b/config/testdata/scrape_configs/scrape_config_files1.good.yml @@ -0,0 +1,7 @@ +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ['localhost:8080'] + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file diff --git a/config/testdata/scrape_configs/scrape_config_files2.good.yml b/config/testdata/scrape_configs/scrape_config_files2.good.yml new file mode 100644 index 000000000..468d02296 --- /dev/null +++ b/config/testdata/scrape_configs/scrape_config_files2.good.yml @@ -0,0 +1,9 @@ +scrape_configs: + - job_name: node + scrape_interval: 15s + tls_config: + cert_file: ../valid_cert_file + key_file: ../valid_key_file + vultr_sd_configs: + - authorization: + credentials: abcdef diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index be810541d..974babbf1 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -78,6 +78,11 @@ global: rule_files: [ - ... ] +# Scrape config files specifies a list of globs. Scrape configs are read from +# all matching files and appended to the list of scrape configs. +scrape_config_files: + [ - ... ] + # A list of scrape configurations. scrape_configs: [ - ... ] diff --git a/scrape/manager.go b/scrape/manager.go index e0a710285..69a0eaa1f 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -270,8 +270,13 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.mtxScrape.Lock() defer m.mtxScrape.Unlock() + scfgs, err := cfg.GetScrapeConfigs() + if err != nil { + return err + } + c := make(map[string]*config.ScrapeConfig) - for _, scfg := range cfg.ScrapeConfigs { + for _, scfg := range scfgs { c[scfg.JobName] = scfg } m.scrapeConfigs = c