Updates to /targets and /rules (scrape duration, last evaluation time) (#4722)

* Add evaluationTimestamp (Last Evaluation) column to display on /rules
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Add lastScrapeDuration ("Scrape Duration") to display on /targets
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Updates based on Julius' feedback

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update to set timestamp to when eval started (after eval completes)

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update /rules to display time since last evaluation

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Re-order Last Eval/Eval Time to be consistent with targets page

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>
This commit is contained in:
Will Hegedus 2018-10-12 12:26:59 -04:00 committed by Julius Volz
parent 3e6b9d43c3
commit 193ebe7e34
7 changed files with 149 additions and 78 deletions

View file

@ -118,13 +118,15 @@ type AlertingRule struct {
labels labels.Labels labels labels.Labels
// Non-identifying key/value pairs. // Non-identifying key/value pairs.
annotations labels.Labels annotations labels.Labels
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// true if old state has been restored. We start persisting samples for ALERT_FOR_STATE // true if old state has been restored. We start persisting samples for ALERT_FOR_STATE
// only after the restoration. // only after the restoration.
restored bool restored bool
// Protects the below. // Protects the below.
mtx sync.Mutex mtx sync.Mutex
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// Timestamp of last evaluation of rule.
evaluationTimestamp time.Time
// The health of the alerting rule. // The health of the alerting rule.
health RuleHealth health RuleHealth
// The last error seen by the alerting rule. // The last error seen by the alerting rule.
@ -258,6 +260,20 @@ func (r *AlertingRule) GetEvaluationDuration() time.Duration {
return r.evaluationDuration return r.evaluationDuration
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (r *AlertingRule) SetEvaluationTimestamp(ts time.Time) {
r.mtx.Lock()
defer r.mtx.Unlock()
r.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (r *AlertingRule) GetEvaluationTimestamp() time.Time {
r.mtx.Lock()
defer r.mtx.Unlock()
return r.evaluationTimestamp
}
// SetRestored updates the restoration state of the alerting rule. // SetRestored updates the restoration state of the alerting rule.
func (r *AlertingRule) SetRestored(restored bool) { func (r *AlertingRule) SetRestored(restored bool) {
r.restored = restored r.restored = restored

View file

@ -160,6 +160,8 @@ type Rule interface {
Health() RuleHealth Health() RuleHealth
SetEvaluationDuration(time.Duration) SetEvaluationDuration(time.Duration)
GetEvaluationDuration() time.Duration GetEvaluationDuration() time.Duration
SetEvaluationTimestamp(time.Time)
GetEvaluationTimestamp() time.Time
// HTMLSnippet returns a human-readable string representation of the rule, // HTMLSnippet returns a human-readable string representation of the rule,
// decorated with HTML elements for use the web frontend. // decorated with HTML elements for use the web frontend.
HTMLSnippet(pathPrefix string) html_template.HTML HTMLSnippet(pathPrefix string) html_template.HTML
@ -173,8 +175,9 @@ type Group struct {
rules []Rule rules []Rule
seriesInPreviousEval []map[string]labels.Labels // One per Rule. seriesInPreviousEval []map[string]labels.Labels // One per Rule.
opts *ManagerOptions opts *ManagerOptions
evaluationDuration time.Duration
mtx sync.Mutex mtx sync.Mutex
evaluationDuration time.Duration
evaluationTimestamp time.Time
shouldRestore bool shouldRestore bool
@ -232,6 +235,7 @@ func (g *Group) run(ctx context.Context) {
iterationDuration.Observe(timeSinceStart.Seconds()) iterationDuration.Observe(timeSinceStart.Seconds())
g.SetEvaluationDuration(timeSinceStart) g.SetEvaluationDuration(timeSinceStart)
g.SetEvaluationTimestamp(start)
} }
// The assumption here is that since the ticker was started after having // The assumption here is that since the ticker was started after having
@ -311,6 +315,20 @@ func (g *Group) SetEvaluationDuration(dur time.Duration) {
g.evaluationDuration = dur g.evaluationDuration = dur
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
func (g *Group) SetEvaluationTimestamp(ts time.Time) {
g.mtx.Lock()
defer g.mtx.Unlock()
g.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the last evaluation of the rule group took place.
func (g *Group) GetEvaluationTimestamp() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.evaluationTimestamp
}
// evalTimestamp returns the immediately preceding consistently slotted evaluation time. // evalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) evalTimestamp() time.Time { func (g *Group) evalTimestamp() time.Time {
var ( var (
@ -377,6 +395,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
sp.Finish() sp.Finish()
evalDuration.Observe(time.Since(t).Seconds()) evalDuration.Observe(time.Since(t).Seconds())
rule.SetEvaluationDuration(time.Since(t)) rule.SetEvaluationDuration(time.Since(t))
rule.SetEvaluationTimestamp(t)
}(time.Now()) }(time.Now())
evalTotal.Inc() evalTotal.Inc()

View file

@ -38,8 +38,11 @@ type RecordingRule struct {
mtx sync.Mutex mtx sync.Mutex
// The health of the recording rule. // The health of the recording rule.
health RuleHealth health RuleHealth
// Timestamp of last evaluation of the recording rule.
evaluationTimestamp time.Time
// The last error seen by the recording rule. // The last error seen by the recording rule.
lastError error lastError error
// Duration of how long it took to evaluate the recording rule.
evaluationDuration time.Duration evaluationDuration time.Duration
} }
@ -156,6 +159,20 @@ func (rule *RecordingRule) GetEvaluationDuration() time.Duration {
return rule.evaluationDuration return rule.evaluationDuration
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (rule *RecordingRule) SetEvaluationTimestamp(ts time.Time) {
rule.mtx.Lock()
defer rule.mtx.Unlock()
rule.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (rule *RecordingRule) GetEvaluationTimestamp() time.Time {
rule.mtx.Lock()
defer rule.mtx.Unlock()
return rule.evaluationTimestamp
}
// HTMLSnippet returns an HTML snippet representing this rule. // HTMLSnippet returns an HTML snippet representing this rule.
func (rule *RecordingRule) HTMLSnippet(pathPrefix string) template.HTML { func (rule *RecordingRule) HTMLSnippet(pathPrefix string) template.HTML {
ruleExpr := rule.vector.String() ruleExpr := rule.vector.String()

View file

@ -53,11 +53,12 @@ type Target struct {
// Additional URL parmeters that are part of the target URL. // Additional URL parmeters that are part of the target URL.
params url.Values params url.Values
mtx sync.RWMutex mtx sync.RWMutex
lastError error lastError error
lastScrape time.Time lastScrape time.Time
health TargetHealth lastScrapeDuration time.Duration
metadata metricMetadataStore health TargetHealth
metadata metricMetadataStore
} }
// NewTarget creates a reasonably configured target for querying. // NewTarget creates a reasonably configured target for querying.
@ -206,6 +207,7 @@ func (t *Target) report(start time.Time, dur time.Duration, err error) {
t.lastError = err t.lastError = err
t.lastScrape = start t.lastScrape = start
t.lastScrapeDuration = dur
} }
// LastError returns the error encountered during the last scrape. // LastError returns the error encountered during the last scrape.
@ -224,6 +226,14 @@ func (t *Target) LastScrape() time.Time {
return t.lastScrape return t.lastScrape
} }
// LastScrapeDuration returns how long the last scrape of the target took.
func (t *Target) LastScrapeDuration() time.Duration {
t.mtx.RLock()
defer t.mtx.RUnlock()
return t.lastScrapeDuration
}
// Health returns the last known health state of the target. // Health returns the last known health state of the target.
func (t *Target) Health() TargetHealth { func (t *Target) Health() TargetHealth {
t.mtx.RLock() t.mtx.RLock()

File diff suppressed because one or more lines are too long

View file

@ -10,6 +10,7 @@
<thead> <thead>
<tr> <tr>
<td colspan="3"><h2><a href="#{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}" name="{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}">{{.Name}}</h2></td> <td colspan="3"><h2><a href="#{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}" name="{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}">{{.Name}}</h2></td>
<td><h2>{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}</h2></td>
<td><h2>{{humanizeDuration .GetEvaluationDuration.Seconds}}</h2></td> <td><h2>{{humanizeDuration .GetEvaluationDuration.Seconds}}</h2></td>
</tr> </tr>
</thead> </thead>
@ -18,6 +19,7 @@
<td style="font-weight:bold">Rule</td> <td style="font-weight:bold">Rule</td>
<td style="font-weight:bold">State</td> <td style="font-weight:bold">State</td>
<td style="font-weight:bold">Error</td> <td style="font-weight:bold">Error</td>
<td style="font-weight:bold">Last Evaluation</td>
<td style="font-weight:bold">Evaluation Time</td> <td style="font-weight:bold">Evaluation Time</td>
</tr> </tr>
{{range .Rules}} {{range .Rules}}
@ -33,6 +35,9 @@
<span class="alert alert-danger state_indicator">{{.LastError}}</span> <span class="alert alert-danger state_indicator">{{.LastError}}</span>
{{end}} {{end}}
</td> </td>
<td>
{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}
</td>
<td>{{humanizeDuration .GetEvaluationDuration.Seconds}}</td> <td>{{humanizeDuration .GetEvaluationDuration.Seconds}}</td>
</tr> </tr>
{{end}} {{end}}

View file

@ -32,6 +32,7 @@
<th>State</th> <th>State</th>
<th>Labels</th> <th>Labels</th>
<th>Last Scrape</th> <th>Last Scrape</th>
<th>Scrape Duration</th>
<th>Error</th> <th>Error</th>
</tr> </tr>
</thead> </thead>
@ -64,6 +65,9 @@
<td class="last-scrape"> <td class="last-scrape">
{{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}} {{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}}
</td> </td>
<td class="scrape-duration">
{{humanizeDuration .LastScrapeDuration.Seconds}}
</td>
<td class="errors"> <td class="errors">
{{if .LastError}} {{if .LastError}}
<span class="alert alert-danger state_indicator">{{.LastError}}</span> <span class="alert alert-danger state_indicator">{{.LastError}}</span>