Updates to /targets and /rules (scrape duration, last evaluation time) (#4722)

* Add evaluationTimestamp (Last Evaluation) column to display on /rules
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Add lastScrapeDuration ("Scrape Duration") to display on /targets
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Updates based on Julius' feedback

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update to set timestamp to when eval started (after eval completes)

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update /rules to display time since last evaluation

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Re-order Last Eval/Eval Time to be consistent with targets page

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>
This commit is contained in:
Will Hegedus 2018-10-12 12:26:59 -04:00 committed by Julius Volz
parent 3e6b9d43c3
commit 193ebe7e34
7 changed files with 149 additions and 78 deletions

View file

@ -118,13 +118,15 @@ type AlertingRule struct {
labels labels.Labels
// Non-identifying key/value pairs.
annotations labels.Labels
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// true if old state has been restored. We start persisting samples for ALERT_FOR_STATE
// only after the restoration.
restored bool
// Protects the below.
mtx sync.Mutex
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// Timestamp of last evaluation of rule.
evaluationTimestamp time.Time
// The health of the alerting rule.
health RuleHealth
// The last error seen by the alerting rule.
@ -258,6 +260,20 @@ func (r *AlertingRule) GetEvaluationDuration() time.Duration {
return r.evaluationDuration
}
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (r *AlertingRule) SetEvaluationTimestamp(ts time.Time) {
r.mtx.Lock()
defer r.mtx.Unlock()
r.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (r *AlertingRule) GetEvaluationTimestamp() time.Time {
r.mtx.Lock()
defer r.mtx.Unlock()
return r.evaluationTimestamp
}
// SetRestored updates the restoration state of the alerting rule.
func (r *AlertingRule) SetRestored(restored bool) {
r.restored = restored

View file

@ -160,6 +160,8 @@ type Rule interface {
Health() RuleHealth
SetEvaluationDuration(time.Duration)
GetEvaluationDuration() time.Duration
SetEvaluationTimestamp(time.Time)
GetEvaluationTimestamp() time.Time
// HTMLSnippet returns a human-readable string representation of the rule,
// decorated with HTML elements for use the web frontend.
HTMLSnippet(pathPrefix string) html_template.HTML
@ -173,8 +175,9 @@ type Group struct {
rules []Rule
seriesInPreviousEval []map[string]labels.Labels // One per Rule.
opts *ManagerOptions
evaluationDuration time.Duration
mtx sync.Mutex
evaluationDuration time.Duration
evaluationTimestamp time.Time
shouldRestore bool
@ -232,6 +235,7 @@ func (g *Group) run(ctx context.Context) {
iterationDuration.Observe(timeSinceStart.Seconds())
g.SetEvaluationDuration(timeSinceStart)
g.SetEvaluationTimestamp(start)
}
// The assumption here is that since the ticker was started after having
@ -311,6 +315,20 @@ func (g *Group) SetEvaluationDuration(dur time.Duration) {
g.evaluationDuration = dur
}
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
func (g *Group) SetEvaluationTimestamp(ts time.Time) {
g.mtx.Lock()
defer g.mtx.Unlock()
g.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the last evaluation of the rule group took place.
func (g *Group) GetEvaluationTimestamp() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.evaluationTimestamp
}
// evalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) evalTimestamp() time.Time {
var (
@ -377,6 +395,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
sp.Finish()
evalDuration.Observe(time.Since(t).Seconds())
rule.SetEvaluationDuration(time.Since(t))
rule.SetEvaluationTimestamp(t)
}(time.Now())
evalTotal.Inc()

View file

@ -38,8 +38,11 @@ type RecordingRule struct {
mtx sync.Mutex
// The health of the recording rule.
health RuleHealth
// Timestamp of last evaluation of the recording rule.
evaluationTimestamp time.Time
// The last error seen by the recording rule.
lastError error
lastError error
// Duration of how long it took to evaluate the recording rule.
evaluationDuration time.Duration
}
@ -156,6 +159,20 @@ func (rule *RecordingRule) GetEvaluationDuration() time.Duration {
return rule.evaluationDuration
}
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (rule *RecordingRule) SetEvaluationTimestamp(ts time.Time) {
rule.mtx.Lock()
defer rule.mtx.Unlock()
rule.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (rule *RecordingRule) GetEvaluationTimestamp() time.Time {
rule.mtx.Lock()
defer rule.mtx.Unlock()
return rule.evaluationTimestamp
}
// HTMLSnippet returns an HTML snippet representing this rule.
func (rule *RecordingRule) HTMLSnippet(pathPrefix string) template.HTML {
ruleExpr := rule.vector.String()

View file

@ -53,11 +53,12 @@ type Target struct {
// Additional URL parmeters that are part of the target URL.
params url.Values
mtx sync.RWMutex
lastError error
lastScrape time.Time
health TargetHealth
metadata metricMetadataStore
mtx sync.RWMutex
lastError error
lastScrape time.Time
lastScrapeDuration time.Duration
health TargetHealth
metadata metricMetadataStore
}
// NewTarget creates a reasonably configured target for querying.
@ -206,6 +207,7 @@ func (t *Target) report(start time.Time, dur time.Duration, err error) {
t.lastError = err
t.lastScrape = start
t.lastScrapeDuration = dur
}
// LastError returns the error encountered during the last scrape.
@ -224,6 +226,14 @@ func (t *Target) LastScrape() time.Time {
return t.lastScrape
}
// LastScrapeDuration returns how long the last scrape of the target took.
func (t *Target) LastScrapeDuration() time.Duration {
t.mtx.RLock()
defer t.mtx.RUnlock()
return t.lastScrapeDuration
}
// Health returns the last known health state of the target.
func (t *Target) Health() TargetHealth {
t.mtx.RLock()

File diff suppressed because one or more lines are too long

View file

@ -10,6 +10,7 @@
<thead>
<tr>
<td colspan="3"><h2><a href="#{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}" name="{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}">{{.Name}}</h2></td>
<td><h2>{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}</h2></td>
<td><h2>{{humanizeDuration .GetEvaluationDuration.Seconds}}</h2></td>
</tr>
</thead>
@ -18,6 +19,7 @@
<td style="font-weight:bold">Rule</td>
<td style="font-weight:bold">State</td>
<td style="font-weight:bold">Error</td>
<td style="font-weight:bold">Last Evaluation</td>
<td style="font-weight:bold">Evaluation Time</td>
</tr>
{{range .Rules}}
@ -33,6 +35,9 @@
<span class="alert alert-danger state_indicator">{{.LastError}}</span>
{{end}}
</td>
<td>
{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}
</td>
<td>{{humanizeDuration .GetEvaluationDuration.Seconds}}</td>
</tr>
{{end}}

View file

@ -32,6 +32,7 @@
<th>State</th>
<th>Labels</th>
<th>Last Scrape</th>
<th>Scrape Duration</th>
<th>Error</th>
</tr>
</thead>
@ -64,6 +65,9 @@
<td class="last-scrape">
{{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}}
</td>
<td class="scrape-duration">
{{humanizeDuration .LastScrapeDuration.Seconds}}
</td>
<td class="errors">
{{if .LastError}}
<span class="alert alert-danger state_indicator">{{.LastError}}</span>