Add the look back duration flag for linting check

Signed-off-by: zhaowang <zhaowang@apac.freewheel.com>
This commit is contained in:
zhaowang 2024-09-10 09:50:58 +08:00
parent 485bf30ea9
commit 8e855a31e6
4 changed files with 46 additions and 23 deletions

View file

@ -74,9 +74,6 @@ const (
lintOptionNone = "none" lintOptionNone = "none"
checkHealth = "/-/healthy" checkHealth = "/-/healthy"
checkReadiness = "/-/ready" checkReadiness = "/-/ready"
// Remove when the lookback delta can be set as a command line flag on PromTool.
defaultLookbackDelta = 5 * time.Minute
) )
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone} var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
@ -144,6 +141,10 @@ func main() {
checkRulesLintFatal := checkRulesCmd.Flag( checkRulesLintFatal := checkRulesCmd.Flag(
"lint-fatal", "lint-fatal",
"Make lint errors exit with exit code 3.").Default("false").Bool() "Make lint errors exit with exit code 3.").Default("false").Bool()
checkRulesLintLookbackDelta := checkRulesCmd.Flag(
"lint.lookback-delta",
"The maximum lookback duration. This config is only used for rules linting checks.",
).Default("5m").Duration()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage) checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool() checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
@ -341,7 +342,7 @@ func main() {
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand(): case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkRulesLintLookbackDelta), *configFiles...))
case checkServerHealthCmd.FullCommand(): case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper))) os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper)))
@ -353,7 +354,7 @@ func main() {
os.Exit(CheckWebConfig(*webConfigFiles...)) os.Exit(CheckWebConfig(*webConfigFiles...))
case checkRulesCmd.FullCommand(): case checkRulesCmd.FullCommand():
os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...)) os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesLintLookbackDelta), *ruleFiles...))
case checkMetricsCmd.FullCommand(): case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended)) os.Exit(CheckMetrics(*checkMetricsExtended))
@ -451,12 +452,14 @@ type lintConfig struct {
duplicateRules bool duplicateRules bool
longScrapeInterval bool longScrapeInterval bool
fatal bool fatal bool
lookbackDelta model.Duration
} }
func newLintConfig(stringVal string, fatal bool) lintConfig { func newLintConfig(stringVal string, fatal bool, lookbackDelta time.Duration) lintConfig {
items := strings.Split(stringVal, ",") items := strings.Split(stringVal, ",")
ls := lintConfig{ ls := lintConfig{
fatal: fatal, fatal: fatal,
lookbackDelta: model.Duration(lookbackDelta),
} }
for _, setting := range items { for _, setting := range items {
switch setting { switch setting {
@ -623,7 +626,7 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool, lintSett
for _, scfg := range scfgs { for _, scfg := range scfgs {
if lintSettings.lintLongScrapeInterval() { if lintSettings.lintLongScrapeInterval() {
if scfg.ScrapeInterval >= model.Duration(defaultLookbackDelta) { if scfg.ScrapeInterval >= lintSettings.lookbackDelta {
errMessage := fmt.Sprintf("Long Scrape Interval found. Data point will be marked as stale. Job: %s. Interval: %s", scfg.JobName, scfg.ScrapeInterval.String()) errMessage := fmt.Sprintf("Long Scrape Interval found. Data point will be marked as stale. Job: %s. Interval: %s", scfg.JobName, scfg.ScrapeInterval.String())
return nil, fmt.Errorf("%w %s", errLint, errMessage) return nil, fmt.Errorf("%w %s", errLint, errMessage)
} }

View file

@ -219,7 +219,7 @@ func TestCheckTargetConfig(t *testing.T) {
} }
for _, test := range cases { for _, test := range cases {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false, newLintConfig(lintOptionNone, false)) _, err := checkConfig(false, "testdata/"+test.file, false, newLintConfig(lintOptionNone, false, 5*time.Minute))
if test.err != "" { if test.err != "" {
require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
return return
@ -302,7 +302,7 @@ func TestCheckConfigSyntax(t *testing.T) {
} }
for _, test := range cases { for _, test := range cases {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly, newLintConfig(lintOptionNone, false)) _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly, newLintConfig(lintOptionNone, false, 5*time.Minute))
expectedErrMsg := test.err expectedErrMsg := test.err
if strings.Contains(runtime.GOOS, "windows") { if strings.Contains(runtime.GOOS, "windows") {
expectedErrMsg = test.errWindows expectedErrMsg = test.errWindows
@ -336,7 +336,7 @@ func TestAuthorizationConfig(t *testing.T) {
for _, test := range cases { for _, test := range cases {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false, newLintConfig(lintOptionNone, false)) _, err := checkConfig(false, "testdata/"+test.file, false, newLintConfig(lintOptionNone, false, 5*time.Minute))
if test.err != "" { if test.err != "" {
require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error()) require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error())
return return
@ -484,7 +484,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin) defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false)) exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false, 5*time.Minute))
require.Equal(t, successExitCode, exitCode, "") require.Equal(t, successExitCode, exitCode, "")
}) })
@ -506,7 +506,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin) defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false)) exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false, 5*time.Minute))
require.Equal(t, failureExitCode, exitCode, "") require.Equal(t, failureExitCode, exitCode, "")
}) })
@ -528,30 +528,49 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin) defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true)) exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true, 5*time.Minute))
require.Equal(t, lintErrExitCode, exitCode, "") require.Equal(t, lintErrExitCode, exitCode, "")
}) })
for _, c := range []struct {
lookbackDelta time.Duration
expectedErrMsg string
}{
{
lookbackDelta: 5 * time.Minute,
expectedErrMsg: "lint error Long Scrape Interval found. Data point will be marked as stale. Job: long_scrape_interval_test. Interval: 10m",
},
{
lookbackDelta: 20 * time.Minute,
expectedErrMsg: "",
},
} {
t.Run("config-lint-fatal", func(t *testing.T) { t.Run("config-lint-fatal", func(t *testing.T) {
_, err := checkConfig(false, "./testdata/prometheus-config.lint.yml", false, newLintConfig(lintOptionLongScrapeInterval, false)) _, err := checkConfig(false, "./testdata/prometheus-config.lint.yml", false, newLintConfig(lintOptionLongScrapeInterval, false, c.lookbackDelta))
expectedErrMsg := "lint error Long Scrape Interval found. Data point will be marked as stale. Job: long_scrape_interval_test. Interval: 1h" var errMsg string
require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", expectedErrMsg, err.Error()) if err != nil {
errMsg = err.Error()
} else {
errMsg = ""
}
require.Equalf(t, c.expectedErrMsg, errMsg, "Expected error %q, got %q", c.expectedErrMsg, errMsg)
}) })
}
} }
func TestCheckRulesWithRuleFiles(t *testing.T) { func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) { t.Run("rules-good", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml") exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false, 5*time.Minute), "./testdata/rules.yml")
require.Equal(t, successExitCode, exitCode, "") require.Equal(t, successExitCode, exitCode, "")
}) })
t.Run("rules-bad", func(t *testing.T) { t.Run("rules-bad", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml") exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false, 5*time.Minute), "./testdata/rules-bad.yml")
require.Equal(t, failureExitCode, exitCode, "") require.Equal(t, failureExitCode, exitCode, "")
}) })
t.Run("rules-lint-fatal", func(t *testing.T) { t.Run("rules-lint-fatal", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml") exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true, 5*time.Minute), "./testdata/prometheus-rules.lint.yml")
require.Equal(t, lintErrExitCode, exitCode, "") require.Equal(t, lintErrExitCode, exitCode, "")
}) })
} }

View file

@ -1,3 +1,3 @@
scrape_configs: scrape_configs:
- job_name: long_scrape_interval_test - job_name: long_scrape_interval_test
scrape_interval: 1h scrape_interval: 10m

View file

@ -177,6 +177,7 @@ Check if the rule files are valid or not.
| --- | --- | --- | | --- | --- | --- |
| <code class="text-nowrap">--lint</code> | Linting checks to apply. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` | | <code class="text-nowrap">--lint</code> | Linting checks to apply. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` |
| <code class="text-nowrap">--lint-fatal</code> | Make lint errors exit with exit code 3. | `false` | | <code class="text-nowrap">--lint-fatal</code> | Make lint errors exit with exit code 3. | `false` |
| <code class="text-nowrap">--lint.lookback-delta</code> | The maximum lookback duration. This config is only used for rules linting checks. | `5m` |