mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 05:34:05 -08:00
Head Cardinality Status Page (#6125)
* Adding TSDB Head Stats like cardinality to Status Page Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Moving mutx to Head Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Renaming variabls Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Renaming variabls and html Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Removing unwanted whitespaces Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Adding Tests, Banchmarks and Max Heap for Postings Stats Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Adding more tests for postingstats and web handler Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Adding more tests for postingstats and web handler Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Remove generated asset file that is no longer used Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com> * Changing comment and variable name for more readability Signed-off-by: Sharad Gaur <sgaur@splunk.com> * Using time.Duration in postings status function and removing refresh button from web page Signed-off-by: Sharad Gaur <sgaur@splunk.com>
This commit is contained in:
parent
1a38075f83
commit
e94503ff5c
24
tsdb/head.go
24
tsdb/head.go
|
@ -87,6 +87,10 @@ type Head struct {
|
|||
deleted map[uint64]int // Deleted series, and what WAL segment they must be kept until.
|
||||
|
||||
postings *index.MemPostings // postings lists for terms
|
||||
|
||||
cardinalityMutex sync.Mutex
|
||||
cardinalityCache *index.PostingsStats // posting stats cache which will expire after 30sec
|
||||
lastPostingsStatsCall time.Duration // last posting stats call (PostgingsCardinalityStats()) time for caching
|
||||
}
|
||||
|
||||
type headMetrics struct {
|
||||
|
@ -231,6 +235,26 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
|
|||
return m
|
||||
}
|
||||
|
||||
const cardinalityCacheExpirationTime = time.Duration(30) * time.Second
|
||||
|
||||
// PostingsCardinalityStats returns top 10 highest cardinality stats By label and value names.
|
||||
func (h *Head) PostingsCardinalityStats(statsByLabelName string) *index.PostingsStats {
|
||||
h.cardinalityMutex.Lock()
|
||||
defer h.cardinalityMutex.Unlock()
|
||||
currentTime := time.Duration(time.Now().Unix()) * time.Second
|
||||
seconds := currentTime - h.lastPostingsStatsCall
|
||||
if seconds > cardinalityCacheExpirationTime {
|
||||
h.cardinalityCache = nil
|
||||
}
|
||||
if h.cardinalityCache != nil {
|
||||
return h.cardinalityCache
|
||||
}
|
||||
h.cardinalityCache = h.postings.Stats(statsByLabelName)
|
||||
h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second
|
||||
|
||||
return h.cardinalityCache
|
||||
}
|
||||
|
||||
// NewHead opens the head block in dir.
|
||||
func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int64) (*Head, error) {
|
||||
if l == nil {
|
||||
|
|
|
@ -79,6 +79,57 @@ func (p *MemPostings) SortedKeys() []labels.Label {
|
|||
return keys
|
||||
}
|
||||
|
||||
// PostingsStats contains cardinality based statistics for postings.
|
||||
type PostingsStats struct {
|
||||
CardinalityMetricsStats []Stat
|
||||
CardinalityLabelStats []Stat
|
||||
LabelValueStats []Stat
|
||||
LabelValuePairsStats []Stat
|
||||
}
|
||||
|
||||
// Stats calculates the cardinality statistics from postings.
|
||||
func (p *MemPostings) Stats(label string) *PostingsStats {
|
||||
const maxNumOfRecords = 10
|
||||
var size uint64
|
||||
|
||||
p.mtx.RLock()
|
||||
|
||||
metrics := &maxHeap{}
|
||||
labels := &maxHeap{}
|
||||
labelValueLenght := &maxHeap{}
|
||||
labelValuePairs := &maxHeap{}
|
||||
|
||||
metrics.init(maxNumOfRecords)
|
||||
labels.init(maxNumOfRecords)
|
||||
labelValueLenght.init(maxNumOfRecords)
|
||||
labelValuePairs.init(maxNumOfRecords)
|
||||
|
||||
for n, e := range p.m {
|
||||
if n == "" {
|
||||
continue
|
||||
}
|
||||
labels.push(Stat{Name: n, Count: uint64(len(e))})
|
||||
size = 0
|
||||
for name, values := range e {
|
||||
if n == label {
|
||||
metrics.push(Stat{Name: name, Count: uint64(len(values))})
|
||||
}
|
||||
labelValuePairs.push(Stat{Name: n + "=" + name, Count: uint64(len(values))})
|
||||
size += uint64(len(name))
|
||||
}
|
||||
labelValueLenght.push(Stat{Name: n, Count: size})
|
||||
}
|
||||
|
||||
p.mtx.RUnlock()
|
||||
|
||||
return &PostingsStats{
|
||||
CardinalityMetricsStats: metrics.get(),
|
||||
CardinalityLabelStats: labels.get(),
|
||||
LabelValueStats: labelValueLenght.get(),
|
||||
LabelValuePairsStats: labelValuePairs.get(),
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns a postings list for the given label pair.
|
||||
func (p *MemPostings) Get(name, value string) Postings {
|
||||
var lp []uint64
|
||||
|
|
|
@ -814,6 +814,36 @@ func TestWithoutPostings(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkPostings_Stats(b *testing.B) {
|
||||
p := NewMemPostings()
|
||||
|
||||
createPostingsLabelValues := func(name, valuePrefix string, count int) {
|
||||
for n := 1; n < count; n++ {
|
||||
value := fmt.Sprintf("%s-%d", valuePrefix, n)
|
||||
p.Add(uint64(n), labels.FromStrings(name, value))
|
||||
}
|
||||
|
||||
}
|
||||
createPostingsLabelValues("__name__", "metrics_name_can_be_very_big_and_bad", 1e3)
|
||||
for i := 0; i < 20; i++ {
|
||||
createPostingsLabelValues(fmt.Sprintf("host-%d", i), "metrics_name_can_be_very_big_and_bad", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("instance-%d", i), "10.0.IP.", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("job-%d", i), "Small_Job_name", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("err-%d", i), "avg_namespace-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("team-%d", i), "team-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("container_name-%d", i), "pod-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("cluster-%d", i), "newcluster-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("uid-%d", i), "123412312312312311-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("area-%d", i), "new_area_of_work-", 1e3)
|
||||
createPostingsLabelValues(fmt.Sprintf("request_id-%d", i), "owner_name_work-", 1e3)
|
||||
}
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
p.Stats("__name__")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMemPostings_Delete(t *testing.T) {
|
||||
p := NewMemPostings()
|
||||
p.Add(1, labels.FromStrings("lbl1", "a"))
|
||||
|
|
69
tsdb/index/postingsstats.go
Normal file
69
tsdb/index/postingsstats.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package index
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Stat holds values for a single cardinality statistic.
|
||||
type Stat struct {
|
||||
Name string
|
||||
Count uint64
|
||||
}
|
||||
|
||||
type maxHeap struct {
|
||||
maxLength int
|
||||
minValue uint64
|
||||
minIndex int
|
||||
Items []Stat
|
||||
}
|
||||
|
||||
func (m *maxHeap) init(len int) {
|
||||
m.maxLength = len
|
||||
m.minValue = math.MaxUint64
|
||||
m.Items = make([]Stat, 0, len)
|
||||
}
|
||||
|
||||
func (m *maxHeap) push(item Stat) {
|
||||
if len(m.Items) < m.maxLength {
|
||||
if item.Count < m.minValue {
|
||||
m.minValue = item.Count
|
||||
m.minIndex = len(m.Items)
|
||||
}
|
||||
m.Items = append(m.Items, item)
|
||||
return
|
||||
}
|
||||
if item.Count < m.minValue {
|
||||
return
|
||||
}
|
||||
|
||||
m.Items[m.minIndex] = item
|
||||
m.minValue = item.Count
|
||||
|
||||
for i, stat := range m.Items {
|
||||
if stat.Count < m.minValue {
|
||||
m.minValue = stat.Count
|
||||
m.minIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (m *maxHeap) get() []Stat {
|
||||
sort.Slice(m.Items, func(i, j int) bool {
|
||||
return m.Items[i].Count > m.Items[j].Count
|
||||
})
|
||||
return m.Items
|
||||
}
|
77
tsdb/index/postingsstats_test.go
Normal file
77
tsdb/index/postingsstats_test.go
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package index
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
func TestPostingsStats(t *testing.T) {
|
||||
stats := &maxHeap{}
|
||||
max := 3000000
|
||||
heapLength := 10
|
||||
stats.init(heapLength)
|
||||
for i := 0; i < max; i++ {
|
||||
item := Stat{
|
||||
Name: "Label-da",
|
||||
Count: uint64(i),
|
||||
}
|
||||
stats.push(item)
|
||||
}
|
||||
stats.push(Stat{Name: "Stuff", Count: 3000000})
|
||||
|
||||
data := stats.get()
|
||||
testutil.Equals(t, 10, len(data))
|
||||
for i := 0; i < heapLength; i++ {
|
||||
fmt.Printf("%d", data[i].Count)
|
||||
testutil.Equals(t, uint64(max-i), data[i].Count)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestPostingsStats2(t *testing.T) {
|
||||
stats := &maxHeap{}
|
||||
heapLength := 10
|
||||
|
||||
stats.init(heapLength)
|
||||
stats.push(Stat{Name: "Stuff", Count: 10})
|
||||
stats.push(Stat{Name: "Stuff", Count: 11})
|
||||
stats.push(Stat{Name: "Stuff", Count: 1})
|
||||
stats.push(Stat{Name: "Stuff", Count: 6})
|
||||
|
||||
data := stats.get()
|
||||
|
||||
testutil.Equals(t, 4, len(data))
|
||||
testutil.Equals(t, uint64(11), data[0].Count)
|
||||
}
|
||||
func BenchmarkPostingStatsMaxHep(b *testing.B) {
|
||||
stats := &maxHeap{}
|
||||
max := 9000000
|
||||
heapLength := 10
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
stats.init(heapLength)
|
||||
for i := 0; i < max; i++ {
|
||||
item := Stat{
|
||||
Name: "Label-da",
|
||||
Count: uint64(i),
|
||||
}
|
||||
stats.push(item)
|
||||
}
|
||||
stats.get()
|
||||
}
|
||||
|
||||
}
|
|
@ -21,14 +21,6 @@
|
|||
<th>Last successful configuration reload</th>
|
||||
<td>{{.LastConfigTime.UTC}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Head chunks</th>
|
||||
<td>{{.ChunkCount}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Head time series</th>
|
||||
<td>{{.TimeSeriesCount}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>WAL corruptions</th>
|
||||
<td>{{.CorruptionCount}}</td>
|
||||
|
@ -101,5 +93,83 @@
|
|||
</tbody>
|
||||
</table>
|
||||
|
||||
<h2 id="headstatus">Head Stats</h2>
|
||||
<table class="table table-sm table-bordered table-striped table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Number Of Series </th>
|
||||
<th>Number of Chunks</th>
|
||||
<th>Current Max Time</th>
|
||||
<th>Current Min Time</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td scope="row">{{ .NumSeries}}</th>
|
||||
<td>{{.ChunkCount}}</td>
|
||||
<td>{{ .MaxTime}}</td>
|
||||
<td>{{ .MinTime}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div>Total Query Time: {{ .Duration }} Seconds</div>
|
||||
<h3 id="headstatus">Highest Cardinality Labels </h3>
|
||||
<table class="table table-sm table-bordered table-striped table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Count</th>
|
||||
</tr>
|
||||
{{ range .Stats.CardinalityLabelStats }}
|
||||
<tr>
|
||||
<td scope="row">{{.Name}}</th>
|
||||
<td>{{.Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
<h3 id="headstatus">Highest Cardinality Metric Names</h3>
|
||||
<table class="table table-sm table-bordered table-striped table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Count</th>
|
||||
</tr>
|
||||
{{ range .Stats.CardinalityMetricsStats }}
|
||||
<tr>
|
||||
<td scope="row">{{.Name}}</th>
|
||||
<td>{{.Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
<h3 id="headstatus">Label Names With Highest Cumulative Label Value Length</h3>
|
||||
<table class="table table-sm table-bordered table-striped table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Length</th>
|
||||
</tr>
|
||||
{{ range .Stats.LabelValueStats }}
|
||||
<tr>
|
||||
<td scope="row">{{.Name}}</th>
|
||||
<td>{{.Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
<h3 id="headstatus">Most Common Label Pairs</h3>
|
||||
<table class="table table-sm table-bordered table-striped table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Count</th>
|
||||
</tr>
|
||||
{{ range .Stats.LabelValuePairsStats }}
|
||||
<tr>
|
||||
<td scope="row">{{.Name}}</th>
|
||||
<td>{{.Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
|
14
web/web.go
14
web/web.go
|
@ -51,6 +51,7 @@ import (
|
|||
"github.com/prometheus/common/route"
|
||||
"github.com/prometheus/common/server"
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
"github.com/prometheus/prometheus/tsdb/index"
|
||||
"github.com/soheilhy/cmux"
|
||||
"golang.org/x/net/netutil"
|
||||
"google.golang.org/grpc"
|
||||
|
@ -715,6 +716,11 @@ func (h *Handler) status(w http.ResponseWriter, r *http.Request) {
|
|||
LastConfigTime time.Time
|
||||
ReloadConfigSuccess bool
|
||||
StorageRetention string
|
||||
NumSeries uint64
|
||||
MaxTime int64
|
||||
MinTime int64
|
||||
Stats *index.PostingsStats
|
||||
Duration string
|
||||
}{
|
||||
Birth: h.birth,
|
||||
CWD: h.cwd,
|
||||
|
@ -755,6 +761,14 @@ func (h *Handler) status(w http.ResponseWriter, r *http.Request) {
|
|||
status.LastConfigTime = time.Unix(int64(toFloat64(mF)), 0)
|
||||
}
|
||||
}
|
||||
db := h.tsdb()
|
||||
startTime := time.Now().UnixNano()
|
||||
status.Stats = db.Head().PostingsCardinalityStats("__name__")
|
||||
status.Duration = fmt.Sprintf("%.3f", float64(time.Now().UnixNano()-startTime)/float64(1e9))
|
||||
status.NumSeries = db.Head().NumSeries()
|
||||
status.MaxTime = db.Head().MaxTime()
|
||||
status.MinTime = db.Head().MaxTime()
|
||||
|
||||
h.executeTemplate(w, "status.html", status)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue