Add '--weight' flag to 'promtool check metrics' command (#10045)

This commit is contained in:
chenlujjj 2022-01-08 05:58:28 +08:00 committed by GitHub
parent 931acc3ee8
commit 2ce94ac196
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 144 additions and 3 deletions

View file

@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"math"
"net/http"
@ -27,6 +28,7 @@ import (
"sort"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/go-kit/log"
@ -43,6 +45,9 @@ import (
"gopkg.in/alecthomas/kingpin.v2"
yaml "gopkg.in/yaml.v2"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/file"
@ -95,6 +100,7 @@ func main() {
).Required().ExistingFiles()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
queryCmd := app.Command("query", "Run query against a Prometheus server.")
@ -228,7 +234,7 @@ func main() {
os.Exit(CheckRules(*ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics())
os.Exit(CheckMetrics(*checkMetricsExtended))
case queryInstantCmd.FullCommand():
os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p))
@ -629,8 +635,10 @@ $ curl -s http://localhost:9090/metrics | promtool check metrics
`)
// CheckMetrics performs a linting pass on input metrics.
func CheckMetrics() int {
l := promlint.New(os.Stdin)
func CheckMetrics(extended bool) int {
var buf bytes.Buffer
tee := io.TeeReader(os.Stdin, &buf)
l := promlint.New(tee)
problems, err := l.Lint()
if err != nil {
fmt.Fprintln(os.Stderr, "error while linting:", err)
@ -645,9 +653,70 @@ func CheckMetrics() int {
return lintErrExitCode
}
if extended {
stats, total, err := checkMetricsExtended(&buf)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
w := tabwriter.NewWriter(os.Stdout, 4, 4, 4, ' ', tabwriter.TabIndent)
fmt.Fprintf(w, "Metric\tCardinality\tPercentage\t\n")
for _, stat := range stats {
fmt.Fprintf(w, "%s\t%d\t%.2f%%\t\n", stat.name, stat.cardinality, stat.percentage*100)
}
fmt.Fprintf(w, "Total\t%d\t%.f%%\t\n", total, 100.)
w.Flush()
}
return successExitCode
}
type metricStat struct {
name string
cardinality int
percentage float64
}
func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) {
p := expfmt.TextParser{}
metricFamilies, err := p.TextToMetricFamilies(r)
if err != nil {
return nil, 0, fmt.Errorf("error while parsing text to metric families: %w", err)
}
var total int
stats := make([]metricStat, 0, len(metricFamilies))
for _, mf := range metricFamilies {
var cardinality int
switch mf.GetType() {
case dto.MetricType_COUNTER, dto.MetricType_GAUGE, dto.MetricType_UNTYPED:
cardinality = len(mf.Metric)
case dto.MetricType_HISTOGRAM:
// Histogram metrics includes sum, count, buckets.
buckets := len(mf.Metric[0].Histogram.Bucket)
cardinality = len(mf.Metric) * (2 + buckets)
case dto.MetricType_SUMMARY:
// Summary metrics includes sum, count, quantiles.
quantiles := len(mf.Metric[0].Summary.Quantile)
cardinality = len(mf.Metric) * (2 + quantiles)
default:
cardinality = len(mf.Metric)
}
stats = append(stats, metricStat{name: mf.GetName(), cardinality: cardinality})
total += cardinality
}
for i := range stats {
stats[i].percentage = float64(stats[i].cardinality) / float64(total)
}
sort.SliceStable(stats, func(i, j int) bool {
return stats[i].cardinality > stats[j].cardinality
})
return stats, total, nil
}
// QueryInstant performs an instant query against a Prometheus server.
func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
if url.Scheme == "" {

View file

@ -18,6 +18,7 @@ import (
"net/http"
"net/http/httptest"
"net/url"
"os"
"runtime"
"strings"
"testing"
@ -322,3 +323,39 @@ func TestAuthorizationConfig(t *testing.T) {
})
}
}
func TestCheckMetricsExtended(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping on windows")
}
f, err := os.Open("testdata/metrics-test.prom")
require.NoError(t, err)
defer f.Close()
stats, total, err := checkMetricsExtended(f)
require.NoError(t, err)
require.Equal(t, 27, total)
require.Equal(t, []metricStat{
{
name: "prometheus_tsdb_compaction_chunk_size_bytes",
cardinality: 15,
percentage: float64(15) / float64(27),
},
{
name: "go_gc_duration_seconds",
cardinality: 7,
percentage: float64(7) / float64(27),
},
{
name: "net_conntrack_dialer_conn_attempted_total",
cardinality: 4,
percentage: float64(4) / float64(27),
},
{
name: "go_info",
cardinality: 1,
percentage: float64(1) / float64(27),
},
}, stats)
}

35
cmd/promtool/testdata/metrics-test.prom vendored Normal file
View file

@ -0,0 +1,35 @@
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 2.391e-05
go_gc_duration_seconds{quantile="0.25"} 9.4402e-05
go_gc_duration_seconds{quantile="0.5"} 0.000118953
go_gc_duration_seconds{quantile="0.75"} 0.000145884
go_gc_duration_seconds{quantile="1"} 0.005201208
go_gc_duration_seconds_sum 0.036134048
go_gc_duration_seconds_count 232
# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction
# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 662
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 1460
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 2266
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 3958
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4861
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 5721
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 10493
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 12464
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 13254
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 13699
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 13806
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 13852
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 13867
prometheus_tsdb_compaction_chunk_size_bytes_sum 3.886707e+06
prometheus_tsdb_compaction_chunk_size_bytes_count 13867
# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name.
# TYPE net_conntrack_dialer_conn_attempted_total counter
net_conntrack_dialer_conn_attempted_total{dialer_name="blackbox"} 5210
net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0
net_conntrack_dialer_conn_attempted_total{dialer_name="node"} 21
net_conntrack_dialer_conn_attempted_total{dialer_name="prometheus"} 21
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.17"} 1