Head Cardinality Status Page (#6125)

* Adding TSDB Head Stats like cardinality to Status Page

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Moving mutx to Head

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Renaming variabls

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Renaming variabls and html

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Removing unwanted whitespaces

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Adding Tests, Banchmarks and Max Heap for Postings Stats

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Adding more tests for postingstats and web handler

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Adding more tests for postingstats and web handler

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Remove generated asset file that is no longer used

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>

* Changing comment and variable name for more readability

Signed-off-by: Sharad Gaur <sgaur@splunk.com>

* Using time.Duration in postings status function and removing refresh button from web page

Signed-off-by: Sharad Gaur <sgaur@splunk.com>
This commit is contained in:
Sharad Gaur 2019-11-04 19:06:13 -07:00 committed by Chris Marchbanks
parent 1a38075f83
commit e94503ff5c
7 changed files with 343 additions and 8 deletions

View file

@ -87,6 +87,10 @@ type Head struct {
deleted map[uint64]int // Deleted series, and what WAL segment they must be kept until.
postings *index.MemPostings // postings lists for terms
cardinalityMutex sync.Mutex
cardinalityCache *index.PostingsStats // posting stats cache which will expire after 30sec
lastPostingsStatsCall time.Duration // last posting stats call (PostgingsCardinalityStats()) time for caching
}
type headMetrics struct {
@ -231,6 +235,26 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
return m
}
const cardinalityCacheExpirationTime = time.Duration(30) * time.Second
// PostingsCardinalityStats returns top 10 highest cardinality stats By label and value names.
func (h *Head) PostingsCardinalityStats(statsByLabelName string) *index.PostingsStats {
h.cardinalityMutex.Lock()
defer h.cardinalityMutex.Unlock()
currentTime := time.Duration(time.Now().Unix()) * time.Second
seconds := currentTime - h.lastPostingsStatsCall
if seconds > cardinalityCacheExpirationTime {
h.cardinalityCache = nil
}
if h.cardinalityCache != nil {
return h.cardinalityCache
}
h.cardinalityCache = h.postings.Stats(statsByLabelName)
h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second
return h.cardinalityCache
}
// NewHead opens the head block in dir.
func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int64) (*Head, error) {
if l == nil {

View file

@ -79,6 +79,57 @@ func (p *MemPostings) SortedKeys() []labels.Label {
return keys
}
// PostingsStats contains cardinality based statistics for postings.
type PostingsStats struct {
CardinalityMetricsStats []Stat
CardinalityLabelStats []Stat
LabelValueStats []Stat
LabelValuePairsStats []Stat
}
// Stats calculates the cardinality statistics from postings.
func (p *MemPostings) Stats(label string) *PostingsStats {
const maxNumOfRecords = 10
var size uint64
p.mtx.RLock()
metrics := &maxHeap{}
labels := &maxHeap{}
labelValueLenght := &maxHeap{}
labelValuePairs := &maxHeap{}
metrics.init(maxNumOfRecords)
labels.init(maxNumOfRecords)
labelValueLenght.init(maxNumOfRecords)
labelValuePairs.init(maxNumOfRecords)
for n, e := range p.m {
if n == "" {
continue
}
labels.push(Stat{Name: n, Count: uint64(len(e))})
size = 0
for name, values := range e {
if n == label {
metrics.push(Stat{Name: name, Count: uint64(len(values))})
}
labelValuePairs.push(Stat{Name: n + "=" + name, Count: uint64(len(values))})
size += uint64(len(name))
}
labelValueLenght.push(Stat{Name: n, Count: size})
}
p.mtx.RUnlock()
return &PostingsStats{
CardinalityMetricsStats: metrics.get(),
CardinalityLabelStats: labels.get(),
LabelValueStats: labelValueLenght.get(),
LabelValuePairsStats: labelValuePairs.get(),
}
}
// Get returns a postings list for the given label pair.
func (p *MemPostings) Get(name, value string) Postings {
var lp []uint64

View file

@ -814,6 +814,36 @@ func TestWithoutPostings(t *testing.T) {
}
}
func BenchmarkPostings_Stats(b *testing.B) {
p := NewMemPostings()
createPostingsLabelValues := func(name, valuePrefix string, count int) {
for n := 1; n < count; n++ {
value := fmt.Sprintf("%s-%d", valuePrefix, n)
p.Add(uint64(n), labels.FromStrings(name, value))
}
}
createPostingsLabelValues("__name__", "metrics_name_can_be_very_big_and_bad", 1e3)
for i := 0; i < 20; i++ {
createPostingsLabelValues(fmt.Sprintf("host-%d", i), "metrics_name_can_be_very_big_and_bad", 1e3)
createPostingsLabelValues(fmt.Sprintf("instance-%d", i), "10.0.IP.", 1e3)
createPostingsLabelValues(fmt.Sprintf("job-%d", i), "Small_Job_name", 1e3)
createPostingsLabelValues(fmt.Sprintf("err-%d", i), "avg_namespace-", 1e3)
createPostingsLabelValues(fmt.Sprintf("team-%d", i), "team-", 1e3)
createPostingsLabelValues(fmt.Sprintf("container_name-%d", i), "pod-", 1e3)
createPostingsLabelValues(fmt.Sprintf("cluster-%d", i), "newcluster-", 1e3)
createPostingsLabelValues(fmt.Sprintf("uid-%d", i), "123412312312312311-", 1e3)
createPostingsLabelValues(fmt.Sprintf("area-%d", i), "new_area_of_work-", 1e3)
createPostingsLabelValues(fmt.Sprintf("request_id-%d", i), "owner_name_work-", 1e3)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
p.Stats("__name__")
}
}
func TestMemPostings_Delete(t *testing.T) {
p := NewMemPostings()
p.Add(1, labels.FromStrings("lbl1", "a"))

View file

@ -0,0 +1,69 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package index
import (
"math"
"sort"
)
// Stat holds values for a single cardinality statistic.
type Stat struct {
Name string
Count uint64
}
type maxHeap struct {
maxLength int
minValue uint64
minIndex int
Items []Stat
}
func (m *maxHeap) init(len int) {
m.maxLength = len
m.minValue = math.MaxUint64
m.Items = make([]Stat, 0, len)
}
func (m *maxHeap) push(item Stat) {
if len(m.Items) < m.maxLength {
if item.Count < m.minValue {
m.minValue = item.Count
m.minIndex = len(m.Items)
}
m.Items = append(m.Items, item)
return
}
if item.Count < m.minValue {
return
}
m.Items[m.minIndex] = item
m.minValue = item.Count
for i, stat := range m.Items {
if stat.Count < m.minValue {
m.minValue = stat.Count
m.minIndex = i
}
}
}
func (m *maxHeap) get() []Stat {
sort.Slice(m.Items, func(i, j int) bool {
return m.Items[i].Count > m.Items[j].Count
})
return m.Items
}

View file

@ -0,0 +1,77 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package index
import (
"fmt"
"testing"
"github.com/prometheus/prometheus/util/testutil"
)
func TestPostingsStats(t *testing.T) {
stats := &maxHeap{}
max := 3000000
heapLength := 10
stats.init(heapLength)
for i := 0; i < max; i++ {
item := Stat{
Name: "Label-da",
Count: uint64(i),
}
stats.push(item)
}
stats.push(Stat{Name: "Stuff", Count: 3000000})
data := stats.get()
testutil.Equals(t, 10, len(data))
for i := 0; i < heapLength; i++ {
fmt.Printf("%d", data[i].Count)
testutil.Equals(t, uint64(max-i), data[i].Count)
}
}
func TestPostingsStats2(t *testing.T) {
stats := &maxHeap{}
heapLength := 10
stats.init(heapLength)
stats.push(Stat{Name: "Stuff", Count: 10})
stats.push(Stat{Name: "Stuff", Count: 11})
stats.push(Stat{Name: "Stuff", Count: 1})
stats.push(Stat{Name: "Stuff", Count: 6})
data := stats.get()
testutil.Equals(t, 4, len(data))
testutil.Equals(t, uint64(11), data[0].Count)
}
func BenchmarkPostingStatsMaxHep(b *testing.B) {
stats := &maxHeap{}
max := 9000000
heapLength := 10
b.ResetTimer()
for n := 0; n < b.N; n++ {
stats.init(heapLength)
for i := 0; i < max; i++ {
item := Stat{
Name: "Label-da",
Count: uint64(i),
}
stats.push(item)
}
stats.get()
}
}

View file

@ -21,14 +21,6 @@
<th>Last successful configuration reload</th>
<td>{{.LastConfigTime.UTC}}</td>
</tr>
<tr>
<th>Head chunks</th>
<td>{{.ChunkCount}}</td>
</tr>
<tr>
<th>Head time series</th>
<td>{{.TimeSeriesCount}}</td>
</tr>
<tr>
<th>WAL corruptions</th>
<td>{{.CorruptionCount}}</td>
@ -101,5 +93,83 @@
</tbody>
</table>
<h2 id="headstatus">Head Stats</h2>
<table class="table table-sm table-bordered table-striped table-hover">
<tbody>
<tr>
<th>Number Of Series </th>
<th>Number of Chunks</th>
<th>Current Max Time</th>
<th>Current Min Time</th>
</tr>
<tr>
<td scope="row">{{ .NumSeries}}</th>
<td>{{.ChunkCount}}</td>
<td>{{ .MaxTime}}</td>
<td>{{ .MinTime}}</td>
</tr>
</tbody>
</table>
<div>Total Query Time: {{ .Duration }} Seconds</div>
<h3 id="headstatus">Highest Cardinality Labels </h3>
<table class="table table-sm table-bordered table-striped table-hover">
<tbody>
<tr>
<th>Name</th>
<th>Count</th>
</tr>
{{ range .Stats.CardinalityLabelStats }}
<tr>
<td scope="row">{{.Name}}</th>
<td>{{.Count}}</td>
</tr>
{{end}}
</tbody>
</table>
<h3 id="headstatus">Highest Cardinality Metric Names</h3>
<table class="table table-sm table-bordered table-striped table-hover">
<tbody>
<tr>
<th>Name</th>
<th>Count</th>
</tr>
{{ range .Stats.CardinalityMetricsStats }}
<tr>
<td scope="row">{{.Name}}</th>
<td>{{.Count}}</td>
</tr>
{{end}}
</tbody>
</table>
<h3 id="headstatus">Label Names With Highest Cumulative Label Value Length</h3>
<table class="table table-sm table-bordered table-striped table-hover">
<tbody>
<tr>
<th>Name</th>
<th>Length</th>
</tr>
{{ range .Stats.LabelValueStats }}
<tr>
<td scope="row">{{.Name}}</th>
<td>{{.Count}}</td>
</tr>
{{end}}
</tbody>
</table>
<h3 id="headstatus">Most Common Label Pairs</h3>
<table class="table table-sm table-bordered table-striped table-hover">
<tbody>
<tr>
<th>Name</th>
<th>Count</th>
</tr>
{{ range .Stats.LabelValuePairsStats }}
<tr>
<td scope="row">{{.Name}}</th>
<td>{{.Count}}</td>
</tr>
{{end}}
</tbody>
</table>
</div>
{{end}}

View file

@ -51,6 +51,7 @@ import (
"github.com/prometheus/common/route"
"github.com/prometheus/common/server"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/index"
"github.com/soheilhy/cmux"
"golang.org/x/net/netutil"
"google.golang.org/grpc"
@ -715,6 +716,11 @@ func (h *Handler) status(w http.ResponseWriter, r *http.Request) {
LastConfigTime time.Time
ReloadConfigSuccess bool
StorageRetention string
NumSeries uint64
MaxTime int64
MinTime int64
Stats *index.PostingsStats
Duration string
}{
Birth: h.birth,
CWD: h.cwd,
@ -755,6 +761,14 @@ func (h *Handler) status(w http.ResponseWriter, r *http.Request) {
status.LastConfigTime = time.Unix(int64(toFloat64(mF)), 0)
}
}
db := h.tsdb()
startTime := time.Now().UnixNano()
status.Stats = db.Head().PostingsCardinalityStats("__name__")
status.Duration = fmt.Sprintf("%.3f", float64(time.Now().UnixNano()-startTime)/float64(1e9))
status.NumSeries = db.Head().NumSeries()
status.MaxTime = db.Head().MaxTime()
status.MinTime = db.Head().MaxTime()
h.executeTemplate(w, "status.html", status)
}