mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 13:57:36 -08:00
Add '--weight' flag to 'promtool check metrics' command (#10045)
This commit is contained in:
parent
931acc3ee8
commit
2ce94ac196
|
@ -18,6 +18,7 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"net/http"
|
||||
|
@ -27,6 +28,7 @@ import (
|
|||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
|
@ -43,6 +45,9 @@ import (
|
|||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery"
|
||||
"github.com/prometheus/prometheus/discovery/file"
|
||||
|
@ -95,6 +100,7 @@ func main() {
|
|||
).Required().ExistingFiles()
|
||||
|
||||
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
|
||||
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
|
||||
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
|
||||
|
||||
queryCmd := app.Command("query", "Run query against a Prometheus server.")
|
||||
|
@ -228,7 +234,7 @@ func main() {
|
|||
os.Exit(CheckRules(*ruleFiles...))
|
||||
|
||||
case checkMetricsCmd.FullCommand():
|
||||
os.Exit(CheckMetrics())
|
||||
os.Exit(CheckMetrics(*checkMetricsExtended))
|
||||
|
||||
case queryInstantCmd.FullCommand():
|
||||
os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p))
|
||||
|
@ -629,8 +635,10 @@ $ curl -s http://localhost:9090/metrics | promtool check metrics
|
|||
`)
|
||||
|
||||
// CheckMetrics performs a linting pass on input metrics.
|
||||
func CheckMetrics() int {
|
||||
l := promlint.New(os.Stdin)
|
||||
func CheckMetrics(extended bool) int {
|
||||
var buf bytes.Buffer
|
||||
tee := io.TeeReader(os.Stdin, &buf)
|
||||
l := promlint.New(tee)
|
||||
problems, err := l.Lint()
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error while linting:", err)
|
||||
|
@ -645,9 +653,70 @@ func CheckMetrics() int {
|
|||
return lintErrExitCode
|
||||
}
|
||||
|
||||
if extended {
|
||||
stats, total, err := checkMetricsExtended(&buf)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return failureExitCode
|
||||
}
|
||||
w := tabwriter.NewWriter(os.Stdout, 4, 4, 4, ' ', tabwriter.TabIndent)
|
||||
fmt.Fprintf(w, "Metric\tCardinality\tPercentage\t\n")
|
||||
for _, stat := range stats {
|
||||
fmt.Fprintf(w, "%s\t%d\t%.2f%%\t\n", stat.name, stat.cardinality, stat.percentage*100)
|
||||
}
|
||||
fmt.Fprintf(w, "Total\t%d\t%.f%%\t\n", total, 100.)
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
return successExitCode
|
||||
}
|
||||
|
||||
type metricStat struct {
|
||||
name string
|
||||
cardinality int
|
||||
percentage float64
|
||||
}
|
||||
|
||||
func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) {
|
||||
p := expfmt.TextParser{}
|
||||
metricFamilies, err := p.TextToMetricFamilies(r)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("error while parsing text to metric families: %w", err)
|
||||
}
|
||||
|
||||
var total int
|
||||
stats := make([]metricStat, 0, len(metricFamilies))
|
||||
for _, mf := range metricFamilies {
|
||||
var cardinality int
|
||||
switch mf.GetType() {
|
||||
case dto.MetricType_COUNTER, dto.MetricType_GAUGE, dto.MetricType_UNTYPED:
|
||||
cardinality = len(mf.Metric)
|
||||
case dto.MetricType_HISTOGRAM:
|
||||
// Histogram metrics includes sum, count, buckets.
|
||||
buckets := len(mf.Metric[0].Histogram.Bucket)
|
||||
cardinality = len(mf.Metric) * (2 + buckets)
|
||||
case dto.MetricType_SUMMARY:
|
||||
// Summary metrics includes sum, count, quantiles.
|
||||
quantiles := len(mf.Metric[0].Summary.Quantile)
|
||||
cardinality = len(mf.Metric) * (2 + quantiles)
|
||||
default:
|
||||
cardinality = len(mf.Metric)
|
||||
}
|
||||
stats = append(stats, metricStat{name: mf.GetName(), cardinality: cardinality})
|
||||
total += cardinality
|
||||
}
|
||||
|
||||
for i := range stats {
|
||||
stats[i].percentage = float64(stats[i].cardinality) / float64(total)
|
||||
}
|
||||
|
||||
sort.SliceStable(stats, func(i, j int) bool {
|
||||
return stats[i].cardinality > stats[j].cardinality
|
||||
})
|
||||
|
||||
return stats, total, nil
|
||||
}
|
||||
|
||||
// QueryInstant performs an instant query against a Prometheus server.
|
||||
func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
|
||||
if url.Scheme == "" {
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -322,3 +323,39 @@ func TestAuthorizationConfig(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckMetricsExtended(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("Skipping on windows")
|
||||
}
|
||||
|
||||
f, err := os.Open("testdata/metrics-test.prom")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
|
||||
stats, total, err := checkMetricsExtended(f)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 27, total)
|
||||
require.Equal(t, []metricStat{
|
||||
{
|
||||
name: "prometheus_tsdb_compaction_chunk_size_bytes",
|
||||
cardinality: 15,
|
||||
percentage: float64(15) / float64(27),
|
||||
},
|
||||
{
|
||||
name: "go_gc_duration_seconds",
|
||||
cardinality: 7,
|
||||
percentage: float64(7) / float64(27),
|
||||
},
|
||||
{
|
||||
name: "net_conntrack_dialer_conn_attempted_total",
|
||||
cardinality: 4,
|
||||
percentage: float64(4) / float64(27),
|
||||
},
|
||||
{
|
||||
name: "go_info",
|
||||
cardinality: 1,
|
||||
percentage: float64(1) / float64(27),
|
||||
},
|
||||
}, stats)
|
||||
}
|
||||
|
|
35
cmd/promtool/testdata/metrics-test.prom
vendored
Normal file
35
cmd/promtool/testdata/metrics-test.prom
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
|
||||
# TYPE go_gc_duration_seconds summary
|
||||
go_gc_duration_seconds{quantile="0"} 2.391e-05
|
||||
go_gc_duration_seconds{quantile="0.25"} 9.4402e-05
|
||||
go_gc_duration_seconds{quantile="0.5"} 0.000118953
|
||||
go_gc_duration_seconds{quantile="0.75"} 0.000145884
|
||||
go_gc_duration_seconds{quantile="1"} 0.005201208
|
||||
go_gc_duration_seconds_sum 0.036134048
|
||||
go_gc_duration_seconds_count 232
|
||||
# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction
|
||||
# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 662
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 1460
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 2266
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 3958
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4861
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 5721
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 10493
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 12464
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 13254
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 13699
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 13806
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 13852
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 13867
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_sum 3.886707e+06
|
||||
prometheus_tsdb_compaction_chunk_size_bytes_count 13867
|
||||
# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name.
|
||||
# TYPE net_conntrack_dialer_conn_attempted_total counter
|
||||
net_conntrack_dialer_conn_attempted_total{dialer_name="blackbox"} 5210
|
||||
net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0
|
||||
net_conntrack_dialer_conn_attempted_total{dialer_name="node"} 21
|
||||
net_conntrack_dialer_conn_attempted_total{dialer_name="prometheus"} 21
|
||||
# HELP go_info Information about the Go environment.
|
||||
# TYPE go_info gauge
|
||||
go_info{version="go1.17"} 1
|
Loading…
Reference in a new issue