api: Add rule group pagination to list rules api (#14017)

* Add paginated feature to list rules api

Signed-off-by: Yijie Qin <qinyijie@amazon.com>

* Refactor to simplify code:

* Reduce number of variables
* Reduce type convesion

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Simplify paginated implementation

* Remove maxAlerts parameter.
* Reuse existing API responses by using omitempty in some fields

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Simplify pagination implementation

* Eliminate the need to sort the rule groups.

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Fix linting error

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Add more unit tests

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Update pagination parameters to be consistent with existing parameters

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Rename max_rule_groups to max_groups

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Refactor to simplify code

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Refactor to simplify the calculation of next token

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Handle corner case in pagination request

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Handle corner cases for pagination of list rules

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Update documentation for list rules parameters

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Refactor comments

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Simplify pagination implementation

* Eliminate need for extra structs to store pagination parameters

Signed-off-by: Raphael Silva <rapphil@gmail.com>

* Update docs/querying/api.md

Co-authored-by: Julius Volz <julius.volz@gmail.com>
Signed-off-by: Raphael Philipe Mendes da Silva <rapphil@gmail.com>

* Update web/api/v1/api.go

Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Raphael Philipe Mendes da Silva <rapphil@gmail.com>

* Update comment describing the need for next token

Signed-off-by: Raphael Silva <rapphil@gmail.com>

---------

Signed-off-by: Yijie Qin <qinyijie@amazon.com>
Signed-off-by: Raphael Silva <rapphil@gmail.com>
Signed-off-by: Raphael Philipe Mendes da Silva <rapphil@gmail.com>
Co-authored-by: Raphael Silva <rapphil@gmail.com>
Co-authored-by: Julius Volz <julius.volz@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
This commit is contained in:
Yijie Qin 2024-10-21 19:04:40 -04:00 committed by GitHub
parent 70e2d23027
commit d2802c6fac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 291 additions and 2 deletions

View file

@ -764,6 +764,8 @@ URL query parameters:
- `file[]=<string>`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done.
- `exclude_alerts=<bool>`: only return rules, do not return active alerts.
- `match[]=<label_selector>`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional.
- `group_limit=<number>`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process.
- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned.
```json
$ curl http://localhost:9090/api/v1/rules

View file

@ -15,6 +15,8 @@ package v1
import (
"context"
"crypto/sha1"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
@ -1372,6 +1374,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult {
// RuleDiscovery has info for all rules.
type RuleDiscovery struct {
RuleGroups []*RuleGroup `json:"groups"`
GroupNextToken string `json:"groupNextToken:omitempty"`
}
// RuleGroup has info for rules which are part of a group.
@ -1458,8 +1461,23 @@ func (api *API) rules(r *http.Request) apiFuncResult {
return invalidParamError(err, "exclude_alerts")
}
maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r)
if parseErr != nil {
return *parseErr
}
rgs := make([]*RuleGroup, 0, len(ruleGroups))
foundToken := false
for _, grp := range ruleGroups {
if maxGroups > 0 && nextToken != "" && !foundToken {
if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) {
continue
}
foundToken = true
}
if len(rgSet) > 0 {
if _, ok := rgSet[grp.Name()]; !ok {
continue
@ -1504,6 +1522,7 @@ func (api *API) rules(r *http.Request) apiFuncResult {
if !excludeAlerts {
activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts())
}
enrichedRule = AlertingRule{
State: rule.State().String(),
Name: rule.Name(),
@ -1519,6 +1538,7 @@ func (api *API) rules(r *http.Request) apiFuncResult {
LastEvaluation: rule.GetEvaluationTimestamp(),
Type: "alerting",
}
case *rules.RecordingRule:
if !returnRecording {
break
@ -1545,9 +1565,20 @@ func (api *API) rules(r *http.Request) apiFuncResult {
// If the rule group response has no rules, skip it - this means we filtered all the rules of this group.
if len(apiRuleGroup.Rules) > 0 {
if maxGroups > 0 && len(rgs) == int(maxGroups) {
// We've reached the capacity of our page plus one. That means that for sure there will be at least one
// rule group in a subsequent request. Therefore a next token is required.
res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name())
break
}
rgs = append(rgs, apiRuleGroup)
}
}
if maxGroups > 0 && nextToken != "" && !foundToken {
return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token")
}
res.RuleGroups = rgs
return apiFuncResult{res, nil, nil, nil}
}
@ -1566,6 +1597,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) {
return excludeAlerts, nil
}
func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) {
var (
parsedMaxGroups int64 = -1
err error
)
maxGroups := r.URL.Query().Get("group_limit")
nextToken := r.URL.Query().Get("group_next_token")
if nextToken != "" && maxGroups == "" {
errResult := invalidParamError(fmt.Errorf("group_limit needs to be present in order to paginate over the groups"), "group_next_token")
return -1, "", &errResult
}
if maxGroups != "" {
parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32)
if err != nil {
errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit")
return -1, "", &errResult
}
if parsedMaxGroups <= 0 {
errResult := invalidParamError(fmt.Errorf("group_limit needs to be greater than 0"), "group_limit")
return -1, "", &errResult
}
}
if parsedMaxGroups > 0 {
return parsedMaxGroups, nextToken, nil
}
return -1, "", nil
}
func getRuleGroupNextToken(file, group string) string {
h := sha1.New()
h.Write([]byte(file + ";" + group))
return hex.EncodeToString(h.Sum(nil))
}
type prometheusConfig struct {
YAML string `json:"yaml"`
}

View file

@ -338,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() {
ShouldRestore: false,
Opts: opts,
})
m.ruleGroups = []*rules.Group{group}
group2 := rules.NewGroup(rules.GroupOptions{
Name: "grp2",
File: "/path/to/file",
Interval: time.Second,
Rules: []rules.Rule{r[0]},
ShouldRestore: false,
Opts: opts,
})
m.ruleGroups = []*rules.Group{group, group2}
}
func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule {
@ -2241,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
{
Name: "grp2",
File: "/path/to/file",
Interval: 1,
Limit: 0,
Rules: []Rule{
AlertingRule{
State: "inactive",
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
},
},
},
},
zeroFunc: rulesZeroFunc,
@ -2329,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
{
Name: "grp2",
File: "/path/to/file",
Interval: 1,
Limit: 0,
Rules: []Rule{
AlertingRule{
State: "inactive",
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: nil,
Health: "ok",
Type: "alerting",
},
},
},
},
},
zeroFunc: rulesZeroFunc,
@ -2410,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
},
},
{
Name: "grp2",
File: "/path/to/file",
Interval: 1,
Limit: 0,
Rules: []Rule{
AlertingRule{
State: "inactive",
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
},
},
},
},
zeroFunc: rulesZeroFunc,
@ -2681,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E
},
zeroFunc: rulesZeroFunc,
},
{
endpoint: api.rules,
query: url.Values{
"group_limit": []string{"1"},
},
response: &RuleDiscovery{
GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"),
RuleGroups: []*RuleGroup{
{
Name: "grp",
File: "/path/to/file",
Interval: 1,
Limit: 0,
Rules: []Rule{
AlertingRule{
State: "inactive",
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
AlertingRule{
State: "inactive",
Name: "test_metric4",
Query: "up == 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
AlertingRule{
State: "pending",
Name: "test_metric5",
Query: "vector(1)",
Duration: 1,
Labels: labels.FromStrings("name", "tm5"),
Annotations: labels.Labels{},
Alerts: []*Alert{
{
Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"),
Annotations: labels.Labels{},
State: "pending",
Value: "1e+00",
},
},
Health: "ok",
Type: "alerting",
},
AlertingRule{
State: "inactive",
Name: "test_metric6",
Query: "up == 1",
Duration: 1,
Labels: labels.FromStrings("testlabel", "rule"),
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
AlertingRule{
State: "inactive",
Name: "test_metric7",
Query: "up == 1",
Duration: 1,
Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"),
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
RecordingRule{
Name: "recording-rule-1",
Query: "vector(1)",
Labels: labels.Labels{},
Health: "ok",
Type: "recording",
},
RecordingRule{
Name: "recording-rule-2",
Query: "vector(1)",
Labels: labels.FromStrings("testlabel", "rule"),
Health: "ok",
Type: "recording",
},
},
},
},
},
zeroFunc: rulesZeroFunc,
},
{
endpoint: api.rules,
query: url.Values{
"group_limit": []string{"1"},
"group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")},
},
response: &RuleDiscovery{
RuleGroups: []*RuleGroup{
{
Name: "grp2",
File: "/path/to/file",
Interval: 1,
Limit: 0,
Rules: []Rule{
AlertingRule{
State: "inactive",
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Health: "ok",
Type: "alerting",
},
},
},
},
},
zeroFunc: rulesZeroFunc,
},
{ // invalid pagination request
endpoint: api.rules,
query: url.Values{
"group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")},
},
errType: errorBadData,
zeroFunc: rulesZeroFunc,
},
{ // invalid group_limit
endpoint: api.rules,
query: url.Values{
"group_limit": []string{"0"},
"group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")},
},
errType: errorBadData,
zeroFunc: rulesZeroFunc,
},
{ // Pagination token is invalid due to changes in the rule groups
endpoint: api.rules,
query: url.Values{
"group_limit": []string{"1"},
"group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")},
},
errType: errorBadData,
zeroFunc: rulesZeroFunc,
},
{
endpoint: api.queryExemplars,
query: url.Values{