promtool: Support linting of scrape interval (#15719)

* PromTool: Support Scrape Interval Lint Checking

---------

Signed-off-by: zhaowang <zhaowang@apac.freewheel.com>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: zhaowang <zhaowang@apac.freewheel.com>
This commit is contained in:
Arve Knudsen 2025-01-15 08:45:05 +01:00 committed by GitHub
parent c12f963997
commit 5df6ea3042
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 167 additions and 58 deletions

View file

@ -2,6 +2,8 @@
## unreleased
* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719
## 3.1.0 / 2025-01-02
* [SECURITY] upgrade golang.org/x/crypto to address reported CVE-2024-45337. #15691

View file

@ -73,14 +73,19 @@ const (
// Exit code 3 is used for "one or more lint issues detected".
lintErrExitCode = 3
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
lintOptionTooLongScrapeInterval = "too-long-scrape-interval"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
)
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
var (
lintRulesOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
// Same as lintRulesOptions, but including scrape config linting options as well.
lintConfigOptions = append(append([]string{}, lintRulesOptions...), lintOptionTooLongScrapeInterval)
)
func main() {
var (
@ -97,6 +102,10 @@ func main() {
app.HelpFlag.Short('h')
checkCmd := app.Command("check", "Check the resources for validity.")
checkLookbackDelta := checkCmd.Flag(
"query.lookback-delta",
"The server's maximum query lookback duration.",
).Default("5m").Duration()
experimental := app.Flag("experimental", "Enable experimental commands.").Bool()
@ -113,7 +122,7 @@ func main() {
checkConfigSyntaxOnly := checkConfigCmd.Flag("syntax-only", "Only check the config file syntax, ignoring file and content validation referenced in the config").Bool()
checkConfigLint := checkConfigCmd.Flag(
"lint",
"Linting checks to apply to the rules specified in the config. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
"Linting checks to apply to the rules/scrape configs specified in the config. Available options are: "+strings.Join(lintConfigOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkConfigLintFatal := checkConfigCmd.Flag(
"lint-fatal",
@ -140,7 +149,7 @@ func main() {
).ExistingFiles()
checkRulesLint := checkRulesCmd.Flag(
"lint",
"Linting checks to apply. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
"Linting checks to apply. Available options are: "+strings.Join(lintRulesOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkRulesLintFatal := checkRulesCmd.Flag(
"lint-fatal",
@ -339,7 +348,7 @@ func main() {
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, model.Duration(*checkLookbackDelta)), *configFiles...))
case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper)))
@ -351,7 +360,7 @@ func main() {
os.Exit(CheckWebConfig(*webConfigFiles...))
case checkRulesCmd.FullCommand():
os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...))
os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended))
@ -445,15 +454,15 @@ func checkExperimental(f bool) {
var errLint = errors.New("lint error")
type lintConfig struct {
type rulesLintConfig struct {
all bool
duplicateRules bool
fatal bool
}
func newLintConfig(stringVal string, fatal bool) lintConfig {
func newRulesLintConfig(stringVal string, fatal bool) rulesLintConfig {
items := strings.Split(stringVal, ",")
ls := lintConfig{
ls := rulesLintConfig{
fatal: fatal,
}
for _, setting := range items {
@ -464,16 +473,57 @@ func newLintConfig(stringVal string, fatal bool) lintConfig {
ls.duplicateRules = true
case lintOptionNone:
default:
fmt.Printf("WARNING: unknown lint option %s\n", setting)
fmt.Printf("WARNING: unknown lint option: %q\n", setting)
}
}
return ls
}
func (ls lintConfig) lintDuplicateRules() bool {
func (ls rulesLintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
type configLintConfig struct {
rulesLintConfig
lookbackDelta model.Duration
}
func newConfigLintConfig(optionsStr string, fatal bool, lookbackDelta model.Duration) configLintConfig {
c := configLintConfig{
rulesLintConfig: rulesLintConfig{
fatal: fatal,
},
}
lintNone := false
var rulesOptions []string
for _, option := range strings.Split(optionsStr, ",") {
switch option {
case lintOptionAll, lintOptionTooLongScrapeInterval:
c.lookbackDelta = lookbackDelta
if option == lintOptionAll {
rulesOptions = append(rulesOptions, lintOptionAll)
}
case lintOptionNone:
lintNone = true
default:
rulesOptions = append(rulesOptions, option)
}
}
if lintNone {
c.lookbackDelta = 0
rulesOptions = nil
}
if len(rulesOptions) > 0 {
c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal)
}
return c
}
// CheckServerStatus - healthy & ready.
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
if serverURL.Scheme == "" {
@ -512,12 +562,12 @@ func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper ht
}
// CheckConfig validates configuration files.
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, files ...string) int {
failed := false
hasErrors := false
for _, f := range files {
ruleFiles, err := checkConfig(agentMode, f, checkSyntaxOnly)
ruleFiles, scrapeConfigs, err := checkConfig(agentMode, f, checkSyntaxOnly)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
hasErrors = true
@ -530,12 +580,12 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files
}
fmt.Println()
rulesFailed, rulesHasErrors := checkRules(ruleFiles, lintSettings)
if rulesFailed {
failed = rulesFailed
}
if rulesHasErrors {
hasErrors = rulesHasErrors
if !checkSyntaxOnly {
scrapeConfigsFailed := lintScrapeConfigs(scrapeConfigs, lintSettings)
failed = failed || scrapeConfigsFailed
rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig)
failed = failed || rulesFailed
hasErrors = hasErrors || rulesHaveErrors
}
}
if failed && hasErrors {
@ -574,12 +624,12 @@ func checkFileExists(fn string) error {
return err
}
func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) {
func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, []*config.ScrapeConfig, error) {
fmt.Println("Checking", filename)
cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger())
if err != nil {
return nil, err
return nil, nil, err
}
var ruleFiles []string
@ -587,15 +637,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, rf := range cfg.RuleFiles {
rfs, err := filepath.Glob(rf)
if err != nil {
return nil, err
return nil, nil, err
}
// If an explicit file was given, error if it is not accessible.
if !strings.Contains(rf, "*") {
if len(rfs) == 0 {
return nil, fmt.Errorf("%q does not point to an existing file", rf)
return nil, nil, fmt.Errorf("%q does not point to an existing file", rf)
}
if err := checkFileExists(rfs[0]); err != nil {
return nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
return nil, nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
}
}
ruleFiles = append(ruleFiles, rfs...)
@ -609,26 +659,26 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
var err error
scfgs, err = cfg.GetScrapeConfigs()
if err != nil {
return nil, fmt.Errorf("error loading scrape configs: %w", err)
return nil, nil, fmt.Errorf("error loading scrape configs: %w", err)
}
}
for _, scfg := range scfgs {
if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil {
if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil {
return nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
return nil, nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
}
}
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, err
return nil, nil, err
}
for _, c := range scfg.ServiceDiscoveryConfigs {
switch c := c.(type) {
case *kubernetes.SDConfig:
if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, err
return nil, nil, err
}
case *file.SDConfig:
if checkSyntaxOnly {
@ -637,17 +687,17 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, err
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, fmt.Errorf("checking SD file %q: %w", file, err)
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForScrapeConfig(targetGroups, scfg); err != nil {
return nil, err
return nil, nil, err
}
}
continue
@ -656,7 +706,7 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForScrapeConfig(c, scfg); err != nil {
return nil, err
return nil, nil, err
}
}
}
@ -673,18 +723,18 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, err
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, fmt.Errorf("checking SD file %q: %w", file, err)
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForAlertmanager(targetGroups, amcfg); err != nil {
return nil, err
return nil, nil, err
}
}
continue
@ -693,12 +743,12 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForAlertmanager(c, amcfg); err != nil {
return nil, err
return nil, nil, err
}
}
}
}
return ruleFiles, nil
return ruleFiles, scfgs, nil
}
func checkTLSConfig(tlsConfig promconfig.TLSConfig, checkSyntaxOnly bool) error {
@ -760,7 +810,7 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) {
}
// CheckRules validates rule files.
func CheckRules(ls lintConfig, files ...string) int {
func CheckRules(ls rulesLintConfig, files ...string) int {
failed := false
hasErrors := false
if len(files) == 0 {
@ -780,7 +830,7 @@ func CheckRules(ls lintConfig, files ...string) int {
}
// checkRulesFromStdin validates rule from stdin.
func checkRulesFromStdin(ls lintConfig) (bool, bool) {
func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
fmt.Println("Checking standard input")
@ -818,7 +868,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
}
// checkRules validates rule files.
func checkRules(files []string, ls lintConfig) (bool, bool) {
func checkRules(files []string, ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
@ -852,7 +902,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
return failed, hasErrors
}
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []error) {
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings rulesLintConfig) (int, []error) {
numRules := 0
for _, rg := range rgs.Groups {
numRules += len(rg.Rules)
@ -876,6 +926,16 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e
return numRules, nil
}
func lintScrapeConfigs(scrapeConfigs []*config.ScrapeConfig, lintSettings configLintConfig) bool {
for _, scfg := range scrapeConfigs {
if lintSettings.lookbackDelta > 0 && scfg.ScrapeInterval >= lintSettings.lookbackDelta {
fmt.Fprintf(os.Stderr, " FAILED: too long scrape interval found, data point will be marked as stale - job: %s, interval: %s\n", scfg.JobName, scfg.ScrapeInterval)
return true
}
}
return false
}
type compareRuleType struct {
metric string
label labels.Labels

View file

@ -234,7 +234,7 @@ func TestCheckTargetConfig(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
_, err := checkConfig(false, "testdata/"+test.file, false)
_, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
@ -319,7 +319,7 @@ func TestCheckConfigSyntax(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
_, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
_, _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
expectedErrMsg := test.err
if strings.Contains(runtime.GOOS, "windows") {
expectedErrMsg = test.errWindows
@ -355,7 +355,7 @@ func TestAuthorizationConfig(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
_, err := checkConfig(false, "testdata/"+test.file, false)
_, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error())
return
@ -508,7 +508,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false))
require.Equal(t, successExitCode, exitCode, "")
})
@ -530,7 +530,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false))
require.Equal(t, failureExitCode, exitCode, "")
})
@ -552,7 +552,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true))
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
@ -560,23 +560,66 @@ func TestCheckRules(t *testing.T) {
func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml")
require.Equal(t, successExitCode, exitCode, "")
})
t.Run("rules-bad", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml")
require.Equal(t, failureExitCode, exitCode, "")
})
t.Run("rules-lint-fatal", func(t *testing.T) {
t.Parallel()
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml")
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml")
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
func TestCheckScrapeConfigs(t *testing.T) {
for _, tc := range []struct {
name string
lookbackDelta model.Duration
expectError bool
}{
{
name: "scrape interval less than lookback delta",
lookbackDelta: model.Duration(11 * time.Minute),
expectError: false,
},
{
name: "scrape interval greater than lookback delta",
lookbackDelta: model.Duration(5 * time.Minute),
expectError: true,
},
{
name: "scrape interval same as lookback delta",
lookbackDelta: model.Duration(10 * time.Minute),
expectError: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Non-fatal linting.
code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
// Fatal linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
if tc.expectError {
require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
} else {
require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
}
// Check syntax only, no linting.
code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
// Lint option "none" should disable linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
})
}
}
func TestTSDBDumpCommand(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")

View file

@ -0,0 +1,3 @@
scrape_configs:
- job_name: too_long_scrape_interval_test
scrape_interval: 10m

View file

@ -59,9 +59,10 @@ Check the resources for validity.
#### Flags
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--query.lookback-delta</code> | The server's maximum query lookback duration. | `5m` |
| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. | |
@ -102,7 +103,7 @@ Check if the config files are valid or not.
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--syntax-only</code> | Only check the config file syntax, ignoring file and content validation referenced in the config | |
| <code class="text-nowrap">--lint</code> | Linting checks to apply to the rules specified in the config. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` |
| <code class="text-nowrap">--lint</code> | Linting checks to apply to the rules/scrape configs specified in the config. Available options are: all, duplicate-rules, none, too-long-scrape-interval. Use --lint=none to disable linting | `duplicate-rules` |
| <code class="text-nowrap">--lint-fatal</code> | Make lint errors exit with exit code 3. | `false` |
| <code class="text-nowrap">--agent</code> | Check config file for Prometheus in Agent mode. | |