Updates to /targets and /rules (scrape duration, last evaluation time) (#4722)

* Add evaluationTimestamp (Last Evaluation) column to display on /rules
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Add lastScrapeDuration ("Scrape Duration") to display on /targets
Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Updates based on Julius' feedback

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update to set timestamp to when eval started (after eval completes)

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Update /rules to display time since last evaluation

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>

* Re-order Last Eval/Eval Time to be consistent with targets page

Signed-off-by: Will Hegedus <wbhegedus@liberty.edu>
This commit is contained in:
Will Hegedus 2018-10-12 12:26:59 -04:00 committed by Julius Volz
parent 3e6b9d43c3
commit 193ebe7e34
7 changed files with 149 additions and 78 deletions

View file

@ -118,13 +118,15 @@ type AlertingRule struct {
labels labels.Labels labels labels.Labels
// Non-identifying key/value pairs. // Non-identifying key/value pairs.
annotations labels.Labels annotations labels.Labels
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// true if old state has been restored. We start persisting samples for ALERT_FOR_STATE // true if old state has been restored. We start persisting samples for ALERT_FOR_STATE
// only after the restoration. // only after the restoration.
restored bool restored bool
// Protects the below. // Protects the below.
mtx sync.Mutex mtx sync.Mutex
// Time in seconds taken to evaluate rule.
evaluationDuration time.Duration
// Timestamp of last evaluation of rule.
evaluationTimestamp time.Time
// The health of the alerting rule. // The health of the alerting rule.
health RuleHealth health RuleHealth
// The last error seen by the alerting rule. // The last error seen by the alerting rule.
@ -258,6 +260,20 @@ func (r *AlertingRule) GetEvaluationDuration() time.Duration {
return r.evaluationDuration return r.evaluationDuration
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (r *AlertingRule) SetEvaluationTimestamp(ts time.Time) {
r.mtx.Lock()
defer r.mtx.Unlock()
r.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (r *AlertingRule) GetEvaluationTimestamp() time.Time {
r.mtx.Lock()
defer r.mtx.Unlock()
return r.evaluationTimestamp
}
// SetRestored updates the restoration state of the alerting rule. // SetRestored updates the restoration state of the alerting rule.
func (r *AlertingRule) SetRestored(restored bool) { func (r *AlertingRule) SetRestored(restored bool) {
r.restored = restored r.restored = restored

View file

@ -160,6 +160,8 @@ type Rule interface {
Health() RuleHealth Health() RuleHealth
SetEvaluationDuration(time.Duration) SetEvaluationDuration(time.Duration)
GetEvaluationDuration() time.Duration GetEvaluationDuration() time.Duration
SetEvaluationTimestamp(time.Time)
GetEvaluationTimestamp() time.Time
// HTMLSnippet returns a human-readable string representation of the rule, // HTMLSnippet returns a human-readable string representation of the rule,
// decorated with HTML elements for use the web frontend. // decorated with HTML elements for use the web frontend.
HTMLSnippet(pathPrefix string) html_template.HTML HTMLSnippet(pathPrefix string) html_template.HTML
@ -173,8 +175,9 @@ type Group struct {
rules []Rule rules []Rule
seriesInPreviousEval []map[string]labels.Labels // One per Rule. seriesInPreviousEval []map[string]labels.Labels // One per Rule.
opts *ManagerOptions opts *ManagerOptions
evaluationDuration time.Duration
mtx sync.Mutex mtx sync.Mutex
evaluationDuration time.Duration
evaluationTimestamp time.Time
shouldRestore bool shouldRestore bool
@ -232,6 +235,7 @@ func (g *Group) run(ctx context.Context) {
iterationDuration.Observe(timeSinceStart.Seconds()) iterationDuration.Observe(timeSinceStart.Seconds())
g.SetEvaluationDuration(timeSinceStart) g.SetEvaluationDuration(timeSinceStart)
g.SetEvaluationTimestamp(start)
} }
// The assumption here is that since the ticker was started after having // The assumption here is that since the ticker was started after having
@ -311,6 +315,20 @@ func (g *Group) SetEvaluationDuration(dur time.Duration) {
g.evaluationDuration = dur g.evaluationDuration = dur
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule group was last evaluated.
func (g *Group) SetEvaluationTimestamp(ts time.Time) {
g.mtx.Lock()
defer g.mtx.Unlock()
g.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the last evaluation of the rule group took place.
func (g *Group) GetEvaluationTimestamp() time.Time {
g.mtx.Lock()
defer g.mtx.Unlock()
return g.evaluationTimestamp
}
// evalTimestamp returns the immediately preceding consistently slotted evaluation time. // evalTimestamp returns the immediately preceding consistently slotted evaluation time.
func (g *Group) evalTimestamp() time.Time { func (g *Group) evalTimestamp() time.Time {
var ( var (
@ -377,6 +395,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
sp.Finish() sp.Finish()
evalDuration.Observe(time.Since(t).Seconds()) evalDuration.Observe(time.Since(t).Seconds())
rule.SetEvaluationDuration(time.Since(t)) rule.SetEvaluationDuration(time.Since(t))
rule.SetEvaluationTimestamp(t)
}(time.Now()) }(time.Now())
evalTotal.Inc() evalTotal.Inc()

View file

@ -38,8 +38,11 @@ type RecordingRule struct {
mtx sync.Mutex mtx sync.Mutex
// The health of the recording rule. // The health of the recording rule.
health RuleHealth health RuleHealth
// Timestamp of last evaluation of the recording rule.
evaluationTimestamp time.Time
// The last error seen by the recording rule. // The last error seen by the recording rule.
lastError error lastError error
// Duration of how long it took to evaluate the recording rule.
evaluationDuration time.Duration evaluationDuration time.Duration
} }
@ -156,6 +159,20 @@ func (rule *RecordingRule) GetEvaluationDuration() time.Duration {
return rule.evaluationDuration return rule.evaluationDuration
} }
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func (rule *RecordingRule) SetEvaluationTimestamp(ts time.Time) {
rule.mtx.Lock()
defer rule.mtx.Unlock()
rule.evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func (rule *RecordingRule) GetEvaluationTimestamp() time.Time {
rule.mtx.Lock()
defer rule.mtx.Unlock()
return rule.evaluationTimestamp
}
// HTMLSnippet returns an HTML snippet representing this rule. // HTMLSnippet returns an HTML snippet representing this rule.
func (rule *RecordingRule) HTMLSnippet(pathPrefix string) template.HTML { func (rule *RecordingRule) HTMLSnippet(pathPrefix string) template.HTML {
ruleExpr := rule.vector.String() ruleExpr := rule.vector.String()

View file

@ -56,6 +56,7 @@ type Target struct {
mtx sync.RWMutex mtx sync.RWMutex
lastError error lastError error
lastScrape time.Time lastScrape time.Time
lastScrapeDuration time.Duration
health TargetHealth health TargetHealth
metadata metricMetadataStore metadata metricMetadataStore
} }
@ -206,6 +207,7 @@ func (t *Target) report(start time.Time, dur time.Duration, err error) {
t.lastError = err t.lastError = err
t.lastScrape = start t.lastScrape = start
t.lastScrapeDuration = dur
} }
// LastError returns the error encountered during the last scrape. // LastError returns the error encountered during the last scrape.
@ -224,6 +226,14 @@ func (t *Target) LastScrape() time.Time {
return t.lastScrape return t.lastScrape
} }
// LastScrapeDuration returns how long the last scrape of the target took.
func (t *Target) LastScrapeDuration() time.Duration {
t.mtx.RLock()
defer t.mtx.RUnlock()
return t.lastScrapeDuration
}
// Health returns the last known health state of the target. // Health returns the last known health state of the target.
func (t *Target) Health() TargetHealth { func (t *Target) Health() TargetHealth {
t.mtx.RLock() t.mtx.RLock()

File diff suppressed because one or more lines are too long

View file

@ -10,6 +10,7 @@
<thead> <thead>
<tr> <tr>
<td colspan="3"><h2><a href="#{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}" name="{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}">{{.Name}}</h2></td> <td colspan="3"><h2><a href="#{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}" name="{{reReplaceAll "([^a-zA-Z0-9])" "$1" .Name}}">{{.Name}}</h2></td>
<td><h2>{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}</h2></td>
<td><h2>{{humanizeDuration .GetEvaluationDuration.Seconds}}</h2></td> <td><h2>{{humanizeDuration .GetEvaluationDuration.Seconds}}</h2></td>
</tr> </tr>
</thead> </thead>
@ -18,6 +19,7 @@
<td style="font-weight:bold">Rule</td> <td style="font-weight:bold">Rule</td>
<td style="font-weight:bold">State</td> <td style="font-weight:bold">State</td>
<td style="font-weight:bold">Error</td> <td style="font-weight:bold">Error</td>
<td style="font-weight:bold">Last Evaluation</td>
<td style="font-weight:bold">Evaluation Time</td> <td style="font-weight:bold">Evaluation Time</td>
</tr> </tr>
{{range .Rules}} {{range .Rules}}
@ -33,6 +35,9 @@
<span class="alert alert-danger state_indicator">{{.LastError}}</span> <span class="alert alert-danger state_indicator">{{.LastError}}</span>
{{end}} {{end}}
</td> </td>
<td>
{{if .GetEvaluationTimestamp.IsZero}}Never{{else}}{{since .GetEvaluationTimestamp}} ago{{end}}
</td>
<td>{{humanizeDuration .GetEvaluationDuration.Seconds}}</td> <td>{{humanizeDuration .GetEvaluationDuration.Seconds}}</td>
</tr> </tr>
{{end}} {{end}}

View file

@ -32,6 +32,7 @@
<th>State</th> <th>State</th>
<th>Labels</th> <th>Labels</th>
<th>Last Scrape</th> <th>Last Scrape</th>
<th>Scrape Duration</th>
<th>Error</th> <th>Error</th>
</tr> </tr>
</thead> </thead>
@ -64,6 +65,9 @@
<td class="last-scrape"> <td class="last-scrape">
{{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}} {{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}}
</td> </td>
<td class="scrape-duration">
{{humanizeDuration .LastScrapeDuration.Seconds}}
</td>
<td class="errors"> <td class="errors">
{{if .LastError}} {{if .LastError}}
<span class="alert alert-danger state_indicator">{{.LastError}}</span> <span class="alert alert-danger state_indicator">{{.LastError}}</span>