From d2802c6facf16dee253135f839aff071836b1665 Mon Sep 17 00:00:00 2001 From: Yijie Qin Date: Mon, 21 Oct 2024 19:04:40 -0400 Subject: [PATCH] api: Add rule group pagination to list rules api (#14017) * Add paginated feature to list rules api Signed-off-by: Yijie Qin * Refactor to simplify code: * Reduce number of variables * Reduce type convesion Signed-off-by: Raphael Silva * Simplify paginated implementation * Remove maxAlerts parameter. * Reuse existing API responses by using omitempty in some fields Signed-off-by: Raphael Silva * Simplify pagination implementation * Eliminate the need to sort the rule groups. Signed-off-by: Raphael Silva * Fix linting error Signed-off-by: Raphael Silva * Add more unit tests Signed-off-by: Raphael Silva * Update pagination parameters to be consistent with existing parameters Signed-off-by: Raphael Silva * Rename max_rule_groups to max_groups Signed-off-by: Raphael Silva * Refactor to simplify code Signed-off-by: Raphael Silva * Refactor to simplify the calculation of next token Signed-off-by: Raphael Silva * Handle corner case in pagination request Signed-off-by: Raphael Silva * Handle corner cases for pagination of list rules Signed-off-by: Raphael Silva * Update documentation for list rules parameters Signed-off-by: Raphael Silva * Refactor comments Signed-off-by: Raphael Silva * Simplify pagination implementation * Eliminate need for extra structs to store pagination parameters Signed-off-by: Raphael Silva * Update docs/querying/api.md Co-authored-by: Julius Volz Signed-off-by: Raphael Philipe Mendes da Silva * Update web/api/v1/api.go Co-authored-by: Bartlomiej Plotka Signed-off-by: Raphael Philipe Mendes da Silva * Update comment describing the need for next token Signed-off-by: Raphael Silva --------- Signed-off-by: Yijie Qin Signed-off-by: Raphael Silva Signed-off-by: Raphael Philipe Mendes da Silva Co-authored-by: Raphael Silva Co-authored-by: Julius Volz Co-authored-by: Bartlomiej Plotka --- docs/querying/api.md | 2 + web/api/v1/api.go | 71 ++++++++++++- web/api/v1/api_test.go | 220 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 291 insertions(+), 2 deletions(-) diff --git a/docs/querying/api.md b/docs/querying/api.md index 1095171b2f..6b7ae0524b 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -764,6 +764,8 @@ URL query parameters: - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. - `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 9fb01f5767..b37605f5d5 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,6 +15,8 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -1371,7 +1373,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken:omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1458,8 +1461,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1504,6 +1522,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1519,6 +1538,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1545,9 +1565,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1566,6 +1597,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7ac2fe5693..35ad4a9ad3 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -338,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -2241,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2329,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2410,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2681,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, { endpoint: api.queryExemplars, query: url.Values{