From 594b882e8b3aef70b6cc3ff44fc4ccdb8cf94c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 14 Jan 2025 18:44:00 +0100 Subject: [PATCH 1/3] fix(remotewrite2): do not overwrite help text with unit in enqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing POC for https://github.com/grafana/mimir/issues/9072 I saw no unit or help metadata. Our test env: https://github.com/grafana/mimir/tree/main/development/mimir-monolithic-mode doesn't have units, so that was empty and cleared the help due to this bug. Signed-off-by: György Krajcsovits --- storage/remote/queue_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 4b966059f6..f0ea33fb52 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -1923,7 +1923,7 @@ func populateV2TimeSeries(symbolTable *writev2.SymbolsTable, batch []timeSeries, if d.metadata != nil { pendingData[nPending].Metadata.Type = writev2.FromMetadataType(d.metadata.Type) pendingData[nPending].Metadata.HelpRef = symbolTable.Symbolize(d.metadata.Help) - pendingData[nPending].Metadata.HelpRef = symbolTable.Symbolize(d.metadata.Unit) + pendingData[nPending].Metadata.UnitRef = symbolTable.Symbolize(d.metadata.Unit) nPendingMetadata++ } From 5df6ea30420d94eec1f8f247367767053aabcb3c Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 15 Jan 2025 08:45:05 +0100 Subject: [PATCH 2/3] promtool: Support linting of scrape interval (#15719) * PromTool: Support Scrape Interval Lint Checking --------- Signed-off-by: zhaowang Signed-off-by: Arve Knudsen Co-authored-by: zhaowang --- CHANGELOG.md | 2 + cmd/promtool/main.go | 150 ++++++++++++------ cmd/promtool/main_test.go | 61 +++++-- ...s-config.lint.too_long_scrape_interval.yml | 3 + docs/command-line/promtool.md | 9 +- 5 files changed, 167 insertions(+), 58 deletions(-) create mode 100644 cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index d0a7ef6611..40be59f724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## unreleased +* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719 + ## 3.1.0 / 2025-01-02 * [SECURITY] upgrade golang.org/x/crypto to address reported CVE-2024-45337. #15691 diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 62a1d4f906..54292bbfe5 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -73,14 +73,19 @@ const ( // Exit code 3 is used for "one or more lint issues detected". lintErrExitCode = 3 - lintOptionAll = "all" - lintOptionDuplicateRules = "duplicate-rules" - lintOptionNone = "none" - checkHealth = "/-/healthy" - checkReadiness = "/-/ready" + lintOptionAll = "all" + lintOptionDuplicateRules = "duplicate-rules" + lintOptionTooLongScrapeInterval = "too-long-scrape-interval" + lintOptionNone = "none" + checkHealth = "/-/healthy" + checkReadiness = "/-/ready" ) -var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone} +var ( + lintRulesOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone} + // Same as lintRulesOptions, but including scrape config linting options as well. + lintConfigOptions = append(append([]string{}, lintRulesOptions...), lintOptionTooLongScrapeInterval) +) func main() { var ( @@ -97,6 +102,10 @@ func main() { app.HelpFlag.Short('h') checkCmd := app.Command("check", "Check the resources for validity.") + checkLookbackDelta := checkCmd.Flag( + "query.lookback-delta", + "The server's maximum query lookback duration.", + ).Default("5m").Duration() experimental := app.Flag("experimental", "Enable experimental commands.").Bool() @@ -113,7 +122,7 @@ func main() { checkConfigSyntaxOnly := checkConfigCmd.Flag("syntax-only", "Only check the config file syntax, ignoring file and content validation referenced in the config").Bool() checkConfigLint := checkConfigCmd.Flag( "lint", - "Linting checks to apply to the rules specified in the config. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting", + "Linting checks to apply to the rules/scrape configs specified in the config. Available options are: "+strings.Join(lintConfigOptions, ", ")+". Use --lint=none to disable linting", ).Default(lintOptionDuplicateRules).String() checkConfigLintFatal := checkConfigCmd.Flag( "lint-fatal", @@ -140,7 +149,7 @@ func main() { ).ExistingFiles() checkRulesLint := checkRulesCmd.Flag( "lint", - "Linting checks to apply. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting", + "Linting checks to apply. Available options are: "+strings.Join(lintRulesOptions, ", ")+". Use --lint=none to disable linting", ).Default(lintOptionDuplicateRules).String() checkRulesLintFatal := checkRulesCmd.Flag( "lint-fatal", @@ -339,7 +348,7 @@ func main() { os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): - os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) + os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, model.Duration(*checkLookbackDelta)), *configFiles...)) case checkServerHealthCmd.FullCommand(): os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper))) @@ -351,7 +360,7 @@ func main() { os.Exit(CheckWebConfig(*webConfigFiles...)) case checkRulesCmd.FullCommand(): - os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...)) + os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...)) case checkMetricsCmd.FullCommand(): os.Exit(CheckMetrics(*checkMetricsExtended)) @@ -445,15 +454,15 @@ func checkExperimental(f bool) { var errLint = errors.New("lint error") -type lintConfig struct { +type rulesLintConfig struct { all bool duplicateRules bool fatal bool } -func newLintConfig(stringVal string, fatal bool) lintConfig { +func newRulesLintConfig(stringVal string, fatal bool) rulesLintConfig { items := strings.Split(stringVal, ",") - ls := lintConfig{ + ls := rulesLintConfig{ fatal: fatal, } for _, setting := range items { @@ -464,16 +473,57 @@ func newLintConfig(stringVal string, fatal bool) lintConfig { ls.duplicateRules = true case lintOptionNone: default: - fmt.Printf("WARNING: unknown lint option %s\n", setting) + fmt.Printf("WARNING: unknown lint option: %q\n", setting) } } return ls } -func (ls lintConfig) lintDuplicateRules() bool { +func (ls rulesLintConfig) lintDuplicateRules() bool { return ls.all || ls.duplicateRules } +type configLintConfig struct { + rulesLintConfig + + lookbackDelta model.Duration +} + +func newConfigLintConfig(optionsStr string, fatal bool, lookbackDelta model.Duration) configLintConfig { + c := configLintConfig{ + rulesLintConfig: rulesLintConfig{ + fatal: fatal, + }, + } + + lintNone := false + var rulesOptions []string + for _, option := range strings.Split(optionsStr, ",") { + switch option { + case lintOptionAll, lintOptionTooLongScrapeInterval: + c.lookbackDelta = lookbackDelta + if option == lintOptionAll { + rulesOptions = append(rulesOptions, lintOptionAll) + } + case lintOptionNone: + lintNone = true + default: + rulesOptions = append(rulesOptions, option) + } + } + + if lintNone { + c.lookbackDelta = 0 + rulesOptions = nil + } + + if len(rulesOptions) > 0 { + c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal) + } + + return c +} + // CheckServerStatus - healthy & ready. func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error { if serverURL.Scheme == "" { @@ -512,12 +562,12 @@ func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper ht } // CheckConfig validates configuration files. -func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int { +func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, files ...string) int { failed := false hasErrors := false for _, f := range files { - ruleFiles, err := checkConfig(agentMode, f, checkSyntaxOnly) + ruleFiles, scrapeConfigs, err := checkConfig(agentMode, f, checkSyntaxOnly) if err != nil { fmt.Fprintln(os.Stderr, " FAILED:", err) hasErrors = true @@ -530,12 +580,12 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files } fmt.Println() - rulesFailed, rulesHasErrors := checkRules(ruleFiles, lintSettings) - if rulesFailed { - failed = rulesFailed - } - if rulesHasErrors { - hasErrors = rulesHasErrors + if !checkSyntaxOnly { + scrapeConfigsFailed := lintScrapeConfigs(scrapeConfigs, lintSettings) + failed = failed || scrapeConfigsFailed + rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig) + failed = failed || rulesFailed + hasErrors = hasErrors || rulesHaveErrors } } if failed && hasErrors { @@ -574,12 +624,12 @@ func checkFileExists(fn string) error { return err } -func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { +func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, []*config.ScrapeConfig, error) { fmt.Println("Checking", filename) cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { - return nil, err + return nil, nil, err } var ruleFiles []string @@ -587,15 +637,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin for _, rf := range cfg.RuleFiles { rfs, err := filepath.Glob(rf) if err != nil { - return nil, err + return nil, nil, err } // If an explicit file was given, error if it is not accessible. if !strings.Contains(rf, "*") { if len(rfs) == 0 { - return nil, fmt.Errorf("%q does not point to an existing file", rf) + return nil, nil, fmt.Errorf("%q does not point to an existing file", rf) } if err := checkFileExists(rfs[0]); err != nil { - return nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err) + return nil, nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err) } } ruleFiles = append(ruleFiles, rfs...) @@ -609,26 +659,26 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin var err error scfgs, err = cfg.GetScrapeConfigs() if err != nil { - return nil, fmt.Errorf("error loading scrape configs: %w", err) + return nil, nil, fmt.Errorf("error loading scrape configs: %w", err) } } for _, scfg := range scfgs { if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil { if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil { - return nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err) + return nil, nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err) } } if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil { - return nil, err + return nil, nil, err } for _, c := range scfg.ServiceDiscoveryConfigs { switch c := c.(type) { case *kubernetes.SDConfig: if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil { - return nil, err + return nil, nil, err } case *file.SDConfig: if checkSyntaxOnly { @@ -637,17 +687,17 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin for _, file := range c.Files { files, err := filepath.Glob(file) if err != nil { - return nil, err + return nil, nil, err } if len(files) != 0 { for _, f := range files { var targetGroups []*targetgroup.Group targetGroups, err = checkSDFile(f) if err != nil { - return nil, fmt.Errorf("checking SD file %q: %w", file, err) + return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err) } if err := checkTargetGroupsForScrapeConfig(targetGroups, scfg); err != nil { - return nil, err + return nil, nil, err } } continue @@ -656,7 +706,7 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin } case discovery.StaticConfig: if err := checkTargetGroupsForScrapeConfig(c, scfg); err != nil { - return nil, err + return nil, nil, err } } } @@ -673,18 +723,18 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin for _, file := range c.Files { files, err := filepath.Glob(file) if err != nil { - return nil, err + return nil, nil, err } if len(files) != 0 { for _, f := range files { var targetGroups []*targetgroup.Group targetGroups, err = checkSDFile(f) if err != nil { - return nil, fmt.Errorf("checking SD file %q: %w", file, err) + return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err) } if err := checkTargetGroupsForAlertmanager(targetGroups, amcfg); err != nil { - return nil, err + return nil, nil, err } } continue @@ -693,12 +743,12 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin } case discovery.StaticConfig: if err := checkTargetGroupsForAlertmanager(c, amcfg); err != nil { - return nil, err + return nil, nil, err } } } } - return ruleFiles, nil + return ruleFiles, scfgs, nil } func checkTLSConfig(tlsConfig promconfig.TLSConfig, checkSyntaxOnly bool) error { @@ -760,7 +810,7 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) { } // CheckRules validates rule files. -func CheckRules(ls lintConfig, files ...string) int { +func CheckRules(ls rulesLintConfig, files ...string) int { failed := false hasErrors := false if len(files) == 0 { @@ -780,7 +830,7 @@ func CheckRules(ls lintConfig, files ...string) int { } // checkRulesFromStdin validates rule from stdin. -func checkRulesFromStdin(ls lintConfig) (bool, bool) { +func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) { failed := false hasErrors := false fmt.Println("Checking standard input") @@ -818,7 +868,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) { } // checkRules validates rule files. -func checkRules(files []string, ls lintConfig) (bool, bool) { +func checkRules(files []string, ls rulesLintConfig) (bool, bool) { failed := false hasErrors := false for _, f := range files { @@ -852,7 +902,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) { return failed, hasErrors } -func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []error) { +func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings rulesLintConfig) (int, []error) { numRules := 0 for _, rg := range rgs.Groups { numRules += len(rg.Rules) @@ -876,6 +926,16 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e return numRules, nil } +func lintScrapeConfigs(scrapeConfigs []*config.ScrapeConfig, lintSettings configLintConfig) bool { + for _, scfg := range scrapeConfigs { + if lintSettings.lookbackDelta > 0 && scfg.ScrapeInterval >= lintSettings.lookbackDelta { + fmt.Fprintf(os.Stderr, " FAILED: too long scrape interval found, data point will be marked as stale - job: %s, interval: %s\n", scfg.JobName, scfg.ScrapeInterval) + return true + } + } + return false +} + type compareRuleType struct { metric string label labels.Labels diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 9a07269188..92e5ff9e67 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -234,7 +234,7 @@ func TestCheckTargetConfig(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := checkConfig(false, "testdata/"+test.file, false) + _, _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return @@ -319,7 +319,7 @@ func TestCheckConfigSyntax(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly) + _, _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly) expectedErrMsg := test.err if strings.Contains(runtime.GOOS, "windows") { expectedErrMsg = test.errWindows @@ -355,7 +355,7 @@ func TestAuthorizationConfig(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := checkConfig(false, "testdata/"+test.file, false) + _, _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error()) return @@ -508,7 +508,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false)) require.Equal(t, successExitCode, exitCode, "") }) @@ -530,7 +530,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false)) require.Equal(t, failureExitCode, exitCode, "") }) @@ -552,7 +552,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true)) require.Equal(t, lintErrExitCode, exitCode, "") }) } @@ -560,23 +560,66 @@ func TestCheckRules(t *testing.T) { func TestCheckRulesWithRuleFiles(t *testing.T) { t.Run("rules-good", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml") require.Equal(t, successExitCode, exitCode, "") }) t.Run("rules-bad", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml") require.Equal(t, failureExitCode, exitCode, "") }) t.Run("rules-lint-fatal", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml") require.Equal(t, lintErrExitCode, exitCode, "") }) } +func TestCheckScrapeConfigs(t *testing.T) { + for _, tc := range []struct { + name string + lookbackDelta model.Duration + expectError bool + }{ + { + name: "scrape interval less than lookback delta", + lookbackDelta: model.Duration(11 * time.Minute), + expectError: false, + }, + { + name: "scrape interval greater than lookback delta", + lookbackDelta: model.Duration(5 * time.Minute), + expectError: true, + }, + { + name: "scrape interval same as lookback delta", + lookbackDelta: model.Duration(10 * time.Minute), + expectError: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + // Non-fatal linting. + code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + require.Equal(t, successExitCode, code, "Non-fatal linting should return success") + // Fatal linting. + code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + if tc.expectError { + require.Equal(t, lintErrExitCode, code, "Fatal linting should return error") + } else { + require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems") + } + // Check syntax only, no linting. + code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only") + // Lint option "none" should disable linting. + code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`) + }) + } +} + func TestTSDBDumpCommand(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") diff --git a/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml b/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml new file mode 100644 index 0000000000..0c85d13f31 --- /dev/null +++ b/cmd/promtool/testdata/prometheus-config.lint.too_long_scrape_interval.yml @@ -0,0 +1,3 @@ +scrape_configs: + - job_name: too_long_scrape_interval_test + scrape_interval: 10m diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 5e2a8f6bb1..09800af748 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -59,9 +59,10 @@ Check the resources for validity. #### Flags -| Flag | Description | -| --- | --- | -| --extended | Print extended information related to the cardinality of the metrics. | +| Flag | Description | Default | +| --- | --- | --- | +| --query.lookback-delta | The server's maximum query lookback duration. | `5m` | +| --extended | Print extended information related to the cardinality of the metrics. | | @@ -102,7 +103,7 @@ Check if the config files are valid or not. | Flag | Description | Default | | --- | --- | --- | | --syntax-only | Only check the config file syntax, ignoring file and content validation referenced in the config | | -| --lint | Linting checks to apply to the rules specified in the config. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` | +| --lint | Linting checks to apply to the rules/scrape configs specified in the config. Available options are: all, duplicate-rules, none, too-long-scrape-interval. Use --lint=none to disable linting | `duplicate-rules` | | --lint-fatal | Make lint errors exit with exit code 3. | `false` | | --agent | Check config file for Prometheus in Agent mode. | | From 92218ecb9b5c6e3c54abc909215d1da4cc8c9b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 20 Dec 2024 14:56:51 +0200 Subject: [PATCH 3/3] promtool: add --ignore-unknown-fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --ignore-unknown-fields that ignores unknown fields in rule group files. There are lots of tools in the ecosystem that "like" to extend the rule group file structure but they are currently unreadable by promtool if there's anything extra. The purpose of this flag is so that we could use the "vanilla" promtool instead of rolling our own. Some examples of tools/code: https://github.com/grafana/mimir/blob/main/pkg/mimirtool/rules/rwrulefmt/rulefmt.go https://github.com/grafana/cortex-tools/blob/8898eb3cc5355b917634c9da476045df7b0426af/pkg/rules/rules.go#L18-L25 Signed-off-by: Giedrius Statkevičius --- cmd/promtool/main.go | 28 +++++++++------- cmd/promtool/main_test.go | 24 +++++++------- cmd/promtool/rules.go | 2 +- cmd/promtool/testdata/rules_extrafields.yml | 33 +++++++++++++++++++ .../testdata/rules_run_extrafields.yml | 21 ++++++++++++ cmd/promtool/unittest.go | 16 ++++----- cmd/promtool/unittest_test.go | 23 +++++++++---- docs/command-line/promtool.md | 3 ++ model/rulefmt/rulefmt.go | 10 +++--- model/rulefmt/rulefmt_test.go | 8 ++--- rules/manager.go | 12 +++---- rules/manager_test.go | 26 +++++++-------- 12 files changed, 140 insertions(+), 66 deletions(-) create mode 100644 cmd/promtool/testdata/rules_extrafields.yml create mode 100644 cmd/promtool/testdata/rules_run_extrafields.yml diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 54292bbfe5..81ba93d2de 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -127,6 +127,7 @@ func main() { checkConfigLintFatal := checkConfigCmd.Flag( "lint-fatal", "Make lint errors exit with exit code 3.").Default("false").Bool() + checkConfigIgnoreUnknownFields := checkConfigCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool() checkWebConfigCmd := checkCmd.Command("web-config", "Check if the web config files are valid or not.") webConfigFiles := checkWebConfigCmd.Arg( @@ -154,6 +155,7 @@ func main() { checkRulesLintFatal := checkRulesCmd.Flag( "lint-fatal", "Make lint errors exit with exit code 3.").Default("false").Bool() + checkRulesIgnoreUnknownFields := checkRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool() checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage) checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool() @@ -227,6 +229,7 @@ func main() { ).Required().ExistingFiles() testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() + testRulesIgnoreUnknownFields := testRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool() defaultDBPath := "data/" tsdbCmd := app.Command("tsdb", "Run tsdb commands.") @@ -348,7 +351,7 @@ func main() { os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): - os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, model.Duration(*checkLookbackDelta)), *configFiles...)) + os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkConfigIgnoreUnknownFields, model.Duration(*checkLookbackDelta)), *configFiles...)) case checkServerHealthCmd.FullCommand(): os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper))) @@ -360,7 +363,7 @@ func main() { os.Exit(CheckWebConfig(*webConfigFiles...)) case checkRulesCmd.FullCommand(): - os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...)) + os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields), *ruleFiles...)) case checkMetricsCmd.FullCommand(): os.Exit(CheckMetrics(*checkMetricsExtended)) @@ -402,6 +405,7 @@ func main() { *testRulesRun, *testRulesDiff, *testRulesDebug, + *testRulesIgnoreUnknownFields, *testRulesFiles...), ) @@ -455,15 +459,17 @@ func checkExperimental(f bool) { var errLint = errors.New("lint error") type rulesLintConfig struct { - all bool - duplicateRules bool - fatal bool + all bool + duplicateRules bool + fatal bool + ignoreUnknownFields bool } -func newRulesLintConfig(stringVal string, fatal bool) rulesLintConfig { +func newRulesLintConfig(stringVal string, fatal, ignoreUnknownFields bool) rulesLintConfig { items := strings.Split(stringVal, ",") ls := rulesLintConfig{ - fatal: fatal, + fatal: fatal, + ignoreUnknownFields: ignoreUnknownFields, } for _, setting := range items { switch setting { @@ -489,7 +495,7 @@ type configLintConfig struct { lookbackDelta model.Duration } -func newConfigLintConfig(optionsStr string, fatal bool, lookbackDelta model.Duration) configLintConfig { +func newConfigLintConfig(optionsStr string, fatal, ignoreUnknownFields bool, lookbackDelta model.Duration) configLintConfig { c := configLintConfig{ rulesLintConfig: rulesLintConfig{ fatal: fatal, @@ -518,7 +524,7 @@ func newConfigLintConfig(optionsStr string, fatal bool, lookbackDelta model.Dura } if len(rulesOptions) > 0 { - c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal) + c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal, ignoreUnknownFields) } return c @@ -839,7 +845,7 @@ func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) { fmt.Fprintln(os.Stderr, " FAILED:", err) return true, true } - rgs, errs := rulefmt.Parse(data) + rgs, errs := rulefmt.Parse(data, ls.ignoreUnknownFields) if errs != nil { failed = true fmt.Fprintln(os.Stderr, " FAILED:") @@ -873,7 +879,7 @@ func checkRules(files []string, ls rulesLintConfig) (bool, bool) { hasErrors := false for _, f := range files { fmt.Println("Checking", f) - rgs, errs := rulefmt.ParseFile(f) + rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields) if errs != nil { failed = true fmt.Fprintln(os.Stderr, " FAILED:") diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 92e5ff9e67..48bed9a2df 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -185,7 +185,7 @@ func TestCheckDuplicates(t *testing.T) { c := test t.Run(c.name, func(t *testing.T) { t.Parallel() - rgs, err := rulefmt.ParseFile(c.ruleFile) + rgs, err := rulefmt.ParseFile(c.ruleFile, false) require.Empty(t, err) dups := checkDuplicates(rgs.Groups) require.Equal(t, c.expectedDups, dups) @@ -194,7 +194,7 @@ func TestCheckDuplicates(t *testing.T) { } func BenchmarkCheckDuplicates(b *testing.B) { - rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml") + rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml", false) require.Empty(b, err) b.ResetTimer() @@ -508,7 +508,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false)) require.Equal(t, successExitCode, exitCode, "") }) @@ -530,7 +530,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false)) require.Equal(t, failureExitCode, exitCode, "") }) @@ -552,7 +552,7 @@ func TestCheckRules(t *testing.T) { defer func(v *os.File) { os.Stdin = v }(os.Stdin) os.Stdin = r - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true)) + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false)) require.Equal(t, lintErrExitCode, exitCode, "") }) } @@ -560,19 +560,19 @@ func TestCheckRules(t *testing.T) { func TestCheckRulesWithRuleFiles(t *testing.T) { t.Run("rules-good", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules.yml") require.Equal(t, successExitCode, exitCode, "") }) t.Run("rules-bad", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules-bad.yml") require.Equal(t, failureExitCode, exitCode, "") }) t.Run("rules-lint-fatal", func(t *testing.T) { t.Parallel() - exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml") + exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false), "./testdata/prometheus-rules.lint.yml") require.Equal(t, lintErrExitCode, exitCode, "") }) } @@ -601,20 +601,20 @@ func TestCheckScrapeConfigs(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { // Non-fatal linting. - code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") require.Equal(t, successExitCode, code, "Non-fatal linting should return success") // Fatal linting. - code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") if tc.expectError { require.Equal(t, lintErrExitCode, code, "Fatal linting should return error") } else { require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems") } // Check syntax only, no linting. - code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only") // Lint option "none" should disable linting. - code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") + code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml") require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`) }) } diff --git a/cmd/promtool/rules.go b/cmd/promtool/rules.go index adb214b812..b2eb18ca8e 100644 --- a/cmd/promtool/rules.go +++ b/cmd/promtool/rules.go @@ -69,7 +69,7 @@ func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient q // loadGroups parses groups from a list of recording rule files. func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) (errs []error) { - groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, filenames...) + groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, false, filenames...) if errs != nil { return errs } diff --git a/cmd/promtool/testdata/rules_extrafields.yml b/cmd/promtool/testdata/rules_extrafields.yml new file mode 100644 index 0000000000..85ef079bb8 --- /dev/null +++ b/cmd/promtool/testdata/rules_extrafields.yml @@ -0,0 +1,33 @@ +# This is the rules file. It has an extra "ownership" +# field in the second group. promtool should ignore this field +# and not return an error with --ignore-unknown-fields. + +groups: + - name: alerts + namespace: "foobar" + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." + - alert: AlwaysFiring + expr: 1 + + - name: rules + ownership: + service: "test" + rules: + - record: job:test:count_over_time1m + expr: sum without(instance) (count_over_time(test[1m])) + + # A recording rule that doesn't depend on input series. + - record: fixed_data + expr: 1 + + # Subquery with default resolution test. + - record: suquery_interval_test + expr: count_over_time(up[5m:]) diff --git a/cmd/promtool/testdata/rules_run_extrafields.yml b/cmd/promtool/testdata/rules_run_extrafields.yml new file mode 100644 index 0000000000..86879fc396 --- /dev/null +++ b/cmd/promtool/testdata/rules_run_extrafields.yml @@ -0,0 +1,21 @@ +# Minimal test case to see that --ignore-unknown-fields +# is working as expected. It should not return an error +# when any extra fields are present in the rules file. +rule_files: + - rules_extrafields.yml + +evaluation_interval: 1m + + +tests: + - name: extra ownership field test + input_series: + - series: test + values: 1 + + promql_expr_test: + - expr: test + eval_time: 0 + exp_samples: + - value: 1 + labels: test diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 78dacdc569..7f494e27aa 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, ignoreUnknownFields, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, ignoreUnknownFields, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug, ignoreUnknownFields bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, ignoreUnknownFields, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,7 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug, ignoreUnknownFields bool, ruleFiles ...string) (outErr []error) { if debug { testStart := time.Now() fmt.Printf("DEBUG: Starting test %s\n", testname) @@ -228,7 +228,7 @@ func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrde Logger: promslog.NewNopLogger(), } m := rules.NewManager(opts) - groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...) + groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ignoreUnknownFields, ruleFiles...) if ers != nil { return ers } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index ec34ad3185..7466b222ca 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -143,7 +143,7 @@ func TestRulesUnitTest(t *testing.T) { } t.Run(tt.name, func(t *testing.T) { t.Parallel() - if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) @@ -151,7 +151,7 @@ func TestRulesUnitTest(t *testing.T) { t.Run("Junit xml output ", func(t *testing.T) { t.Parallel() var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -194,10 +194,11 @@ func TestRulesUnitTestRun(t *testing.T) { files []string } tests := []struct { - name string - args args - queryOpts promqltest.LazyLoaderOpts - want int + name string + args args + queryOpts promqltest.LazyLoaderOpts + want int + ignoreUnknownFields bool }{ { name: "Test all without run arg", @@ -231,11 +232,19 @@ func TestRulesUnitTestRun(t *testing.T) { }, want: 1, }, + { + name: "Test all with extra fields", + args: args{ + files: []string{"./testdata/rules_run_extrafields.yml"}, + }, + ignoreUnknownFields: true, + want: 0, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.ignoreUnknownFields, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 09800af748..ab675e6345 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -105,6 +105,7 @@ Check if the config files are valid or not. | --syntax-only | Only check the config file syntax, ignoring file and content validation referenced in the config | | | --lint | Linting checks to apply to the rules/scrape configs specified in the config. Available options are: all, duplicate-rules, none, too-long-scrape-interval. Use --lint=none to disable linting | `duplicate-rules` | | --lint-fatal | Make lint errors exit with exit code 3. | `false` | +| --ignore-unknown-fields | Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | | --agent | Check config file for Prometheus in Agent mode. | | @@ -178,6 +179,7 @@ Check if the rule files are valid or not. | --- | --- | --- | | --lint | Linting checks to apply. Available options are: all, duplicate-rules, none. Use --lint=none to disable linting | `duplicate-rules` | | --lint-fatal | Make lint errors exit with exit code 3. | `false` | +| --ignore-unknown-fields | Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | @@ -465,6 +467,7 @@ Unit tests for rules. | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | | --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | +| --ignore-unknown-fields | Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default. | `false` | diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index bb36a21208..76b12f0e5c 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -314,7 +314,7 @@ func testTemplateParsing(rl *RuleNode) (errs []error) { } // Parse parses and validates a set of rules. -func Parse(content []byte) (*RuleGroups, []error) { +func Parse(content []byte, ignoreUnknownFields bool) (*RuleGroups, []error) { var ( groups RuleGroups node ruleGroups @@ -322,7 +322,9 @@ func Parse(content []byte) (*RuleGroups, []error) { ) decoder := yaml.NewDecoder(bytes.NewReader(content)) - decoder.KnownFields(true) + if !ignoreUnknownFields { + decoder.KnownFields(true) + } err := decoder.Decode(&groups) // Ignore io.EOF which happens with empty input. if err != nil && !errors.Is(err, io.EOF) { @@ -341,12 +343,12 @@ func Parse(content []byte) (*RuleGroups, []error) { } // ParseFile reads and parses rules from a file. -func ParseFile(file string) (*RuleGroups, []error) { +func ParseFile(file string, ignoreUnknownFields bool) (*RuleGroups, []error) { b, err := os.ReadFile(file) if err != nil { return nil, []error{fmt.Errorf("%s: %w", file, err)} } - rgs, errs := Parse(b) + rgs, errs := Parse(b, ignoreUnknownFields) for i := range errs { errs[i] = fmt.Errorf("%s: %w", file, errs[i]) } diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index 73ea174594..286e760a41 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -24,7 +24,7 @@ import ( ) func TestParseFileSuccess(t *testing.T) { - _, errs := ParseFile("testdata/test.yaml") + _, errs := ParseFile("testdata/test.yaml", false) require.Empty(t, errs, "unexpected errors parsing file") } @@ -84,7 +84,7 @@ func TestParseFileFailure(t *testing.T) { } for _, c := range table { - _, errs := ParseFile(filepath.Join("testdata", c.filename)) + _, errs := ParseFile(filepath.Join("testdata", c.filename), false) require.NotEmpty(t, errs, "Expected error parsing %s but got none", c.filename) require.ErrorContainsf(t, errs[0], c.errMsg, "Expected error for %s.", c.filename) } @@ -179,7 +179,7 @@ groups: } for _, tst := range tests { - rgs, errs := Parse([]byte(tst.ruleString)) + rgs, errs := Parse([]byte(tst.ruleString), false) require.NotNil(t, rgs, "Rule parsing, rule=\n"+tst.ruleString) passed := (tst.shouldPass && len(errs) == 0) || (!tst.shouldPass && len(errs) > 0) require.True(t, passed, "Rule validation failed, rule=\n"+tst.ruleString) @@ -206,7 +206,7 @@ groups: annotations: summary: "Instance {{ $labels.instance }} up" ` - _, errs := Parse([]byte(group)) + _, errs := Parse([]byte(group), false) require.Len(t, errs, 2, "Expected two errors") var err00 *Error require.ErrorAs(t, errs[0], &err00) diff --git a/rules/manager.go b/rules/manager.go index a3ae716e2b..2b06dee8b5 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -207,7 +207,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels default: } - groups, errs := m.LoadGroups(interval, externalLabels, externalURL, groupEvalIterationFunc, files...) + groups, errs := m.LoadGroups(interval, externalLabels, externalURL, groupEvalIterationFunc, false, files...) if errs != nil { for _, e := range errs { @@ -276,7 +276,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels // GroupLoader is responsible for loading rule groups from arbitrary sources and parsing them. type GroupLoader interface { - Load(identifier string) (*rulefmt.RuleGroups, []error) + Load(identifier string, ignoreUnknownFields bool) (*rulefmt.RuleGroups, []error) Parse(query string) (parser.Expr, error) } @@ -284,22 +284,22 @@ type GroupLoader interface { // and parser.ParseExpr. type FileLoader struct{} -func (FileLoader) Load(identifier string) (*rulefmt.RuleGroups, []error) { - return rulefmt.ParseFile(identifier) +func (FileLoader) Load(identifier string, ignoreUnknownFields bool) (*rulefmt.RuleGroups, []error) { + return rulefmt.ParseFile(identifier, ignoreUnknownFields) } func (FileLoader) Parse(query string) (parser.Expr, error) { return parser.ParseExpr(query) } // LoadGroups reads groups from a list of files. func (m *Manager) LoadGroups( - interval time.Duration, externalLabels labels.Labels, externalURL string, groupEvalIterationFunc GroupEvalIterationFunc, filenames ...string, + interval time.Duration, externalLabels labels.Labels, externalURL string, groupEvalIterationFunc GroupEvalIterationFunc, ignoreUnknownFields bool, filenames ...string, ) (map[string]*Group, []error) { groups := make(map[string]*Group) shouldRestore := !m.restored for _, fn := range filenames { - rgs, errs := m.opts.GroupLoader.Load(fn) + rgs, errs := m.opts.GroupLoader.Load(fn, ignoreUnknownFields) if errs != nil { return nil, errs } diff --git a/rules/manager_test.go b/rules/manager_test.go index 45da7a44b0..d7c767a3cd 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -808,7 +808,7 @@ func TestUpdate(t *testing.T) { } // Groups will be recreated if updated. - rgs, errs := rulefmt.ParseFile("fixtures/rules.yaml") + rgs, errs := rulefmt.ParseFile("fixtures/rules.yaml", false) require.Empty(t, errs, "file parsing failures") tmpFile, err := os.CreateTemp("", "rules.test.*.yaml") @@ -1532,7 +1532,7 @@ func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependenci }) t.Run("load a mix of dependent and independent rules", func(t *testing.T) { - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -1567,7 +1567,7 @@ func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependenci }) t.Run("load only independent rules", func(t *testing.T) { - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple_independent.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple_independent.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -1975,7 +1975,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { t.Cleanup(cancel) ruleManager := NewManager(optsFactory(storage, &maxInflight, &inflightQueries, 0)) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2021,7 +2021,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2059,7 +2059,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple_independent.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple_independent.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2103,7 +2103,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple_independent.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple_independent.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2150,7 +2150,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_indeterminates.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_indeterminates.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2189,7 +2189,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_multiple_dependents_on_base.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_multiple_dependents_on_base.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) var group *Group @@ -2235,7 +2235,7 @@ func TestAsyncRuleEvaluation(t *testing.T) { opts.RuleConcurrencyController = nil ruleManager := NewManager(opts) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, []string{"fixtures/rules_chain.yaml"}...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, []string{"fixtures/rules_chain.yaml"}...) require.Empty(t, errs) require.Len(t, groups, 1) var group *Group @@ -2279,7 +2279,7 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) { ruleManager := NewManager(optsFactory(storage, &maxInflight, &inflightQueries, maxConcurrency)) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, files...) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, files...) require.Empty(t, errs) require.Len(t, groups, groupCount) @@ -2521,7 +2521,7 @@ func TestRuleDependencyController_AnalyseRules(t *testing.T) { QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { return nil, nil }, }) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, tc.ruleFile) + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, tc.ruleFile) require.Empty(t, errs) require.Len(t, groups, 1) @@ -2550,7 +2550,7 @@ func BenchmarkRuleDependencyController_AnalyseRules(b *testing.B) { QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { return nil, nil }, }) - groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, "fixtures/rules_multiple.yaml") + groups, errs := ruleManager.LoadGroups(time.Second, labels.EmptyLabels(), "", nil, false, "fixtures/rules_multiple.yaml") require.Empty(b, errs) require.Len(b, groups, 1)