Support specifying series matchers to analyze tsdb (#12842)

* support specifying series matchers to analyze tsdb

Signed-off-by: Ben Ye <benye@amazon.com>

* fix cli docs

Signed-off-by: Ben Ye <benye@amazon.com>

---------

Signed-off-by: Ben Ye <benye@amazon.com>
This commit is contained in:
Ben Ye 2023-09-20 03:37:32 -07:00 committed by GitHub
parent c173cd57c9
commit c78124427e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 56 additions and 14 deletions

View file

@ -218,6 +218,7 @@ func main() {
analyzeBlockID := tsdbAnalyzeCmd.Arg("block id", "Block to analyze (default is the last block).").String()
analyzeLimit := tsdbAnalyzeCmd.Flag("limit", "How many items to show in each list.").Default("20").Int()
analyzeRunExtended := tsdbAnalyzeCmd.Flag("extended", "Run extended analysis.").Bool()
analyzeMatchers := tsdbAnalyzeCmd.Flag("match", "Series selector to analyze. Only 1 set of matchers is supported now.").String()
tsdbListCmd := tsdbCmd.Command("list", "List tsdb blocks.")
listHumanReadable := tsdbListCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
@ -372,7 +373,7 @@ func main() {
os.Exit(checkErr(benchmarkWrite(*benchWriteOutPath, *benchSamplesFile, *benchWriteNumMetrics, *benchWriteNumScrapes)))
case tsdbAnalyzeCmd.FullCommand():
os.Exit(checkErr(analyzeBlock(ctx, *analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended)))
os.Exit(checkErr(analyzeBlock(ctx, *analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended, *analyzeMatchers)))
case tsdbListCmd.FullCommand():
os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable)))

View file

@ -413,7 +413,17 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error)
return db, b, nil
}
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool) error {
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool, matchers string) error {
var (
selectors []*labels.Matcher
err error
)
if len(matchers) > 0 {
selectors, err = parser.ParseMetricSelector(matchers)
if err != nil {
return err
}
}
db, block, err := openBlock(path, blockID)
if err != nil {
return err
@ -426,14 +436,17 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
fmt.Printf("Block ID: %s\n", meta.ULID)
// Presume 1ms resolution that Prometheus uses.
fmt.Printf("Duration: %s\n", (time.Duration(meta.MaxTime-meta.MinTime) * 1e6).String())
fmt.Printf("Series: %d\n", meta.Stats.NumSeries)
fmt.Printf("Total Series: %d\n", meta.Stats.NumSeries)
if len(matchers) > 0 {
fmt.Printf("Matcher: %s\n", matchers)
}
ir, err := block.Index()
if err != nil {
return err
}
defer ir.Close()
allLabelNames, err := ir.LabelNames(ctx)
allLabelNames, err := ir.LabelNames(ctx, selectors...)
if err != nil {
return err
}
@ -460,10 +473,30 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
labelpairsUncovered := map[string]uint64{}
labelpairsCount := map[string]uint64{}
entries := 0
p, err := ir.Postings(ctx, "", "") // The special all key.
if err != nil {
return err
var (
p index.Postings
refs []storage.SeriesRef
)
if len(matchers) > 0 {
p, err = tsdb.PostingsForMatchers(ir, selectors...)
if err != nil {
return err
}
// Expand refs first and cache in memory.
// So later we don't have to expand again.
refs, err = index.ExpandPostings(p)
if err != nil {
return err
}
fmt.Printf("Matched series: %d\n", len(refs))
p = index.NewListPostings(refs)
} else {
p, err = ir.Postings(ctx, "", "") // The special all key.
if err != nil {
return err
}
}
chks := []chunks.Meta{}
builder := labels.ScratchBuilder{}
for p.Next() {
@ -512,7 +545,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
values, err := ir.SortedLabelValues(ctx, n)
values, err := ir.SortedLabelValues(ctx, n, selectors...)
if err != nil {
return err
}
@ -528,7 +561,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
lv, err := ir.SortedLabelValues(ctx, n)
lv, err := ir.SortedLabelValues(ctx, n, selectors...)
if err != nil {
return err
}
@ -538,7 +571,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
printInfo(postingInfos)
postingInfos = postingInfos[:0]
lv, err := ir.SortedLabelValues(ctx, "__name__")
lv, err := ir.SortedLabelValues(ctx, "__name__", selectors...)
if err != nil {
return err
}
@ -547,6 +580,7 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
if err != nil {
return err
}
postings = index.Intersect(postings, index.NewListPostings(refs))
count := 0
for postings.Next() {
count++
@ -560,18 +594,24 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
printInfo(postingInfos)
if runExtended {
return analyzeCompaction(ctx, block, ir)
return analyzeCompaction(ctx, block, ir, selectors)
}
return nil
}
func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.IndexReader) (err error) {
n, v := index.AllPostingsKey()
postingsr, err := indexr.Postings(ctx, n, v)
func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.IndexReader, matchers []*labels.Matcher) (err error) {
var postingsr index.Postings
if len(matchers) > 0 {
postingsr, err = tsdb.PostingsForMatchers(indexr, matchers...)
} else {
n, v := index.AllPostingsKey()
postingsr, err = indexr.Postings(ctx, n, v)
}
if err != nil {
return err
}
chunkr, err := block.Chunks()
if err != nil {
return err

View file

@ -488,6 +488,7 @@ Analyze churn, label pair cardinality and compaction efficiency.
| --- | --- | --- |
| <code class="text-nowrap">--limit</code> | How many items to show in each list. | `20` |
| <code class="text-nowrap">--extended</code> | Run extended analysis. | |
| <code class="text-nowrap">--match</code> | Series selector to analyze. Only 1 set of matchers is supported now. | |