mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-24 05:04:05 -08:00
Add '--weight' flag to 'promtool check metrics' command (#10045)
This commit is contained in:
parent
931acc3ee8
commit
2ce94ac196
|
@ -18,6 +18,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math"
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
@ -27,6 +28,7 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"text/tabwriter"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-kit/log"
|
"github.com/go-kit/log"
|
||||||
|
@ -43,6 +45,9 @@ import (
|
||||||
"gopkg.in/alecthomas/kingpin.v2"
|
"gopkg.in/alecthomas/kingpin.v2"
|
||||||
yaml "gopkg.in/yaml.v2"
|
yaml "gopkg.in/yaml.v2"
|
||||||
|
|
||||||
|
dto "github.com/prometheus/client_model/go"
|
||||||
|
"github.com/prometheus/common/expfmt"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
"github.com/prometheus/prometheus/discovery"
|
"github.com/prometheus/prometheus/discovery"
|
||||||
"github.com/prometheus/prometheus/discovery/file"
|
"github.com/prometheus/prometheus/discovery/file"
|
||||||
|
@ -95,6 +100,7 @@ func main() {
|
||||||
).Required().ExistingFiles()
|
).Required().ExistingFiles()
|
||||||
|
|
||||||
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
|
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
|
||||||
|
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
|
||||||
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
|
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
|
||||||
|
|
||||||
queryCmd := app.Command("query", "Run query against a Prometheus server.")
|
queryCmd := app.Command("query", "Run query against a Prometheus server.")
|
||||||
|
@ -228,7 +234,7 @@ func main() {
|
||||||
os.Exit(CheckRules(*ruleFiles...))
|
os.Exit(CheckRules(*ruleFiles...))
|
||||||
|
|
||||||
case checkMetricsCmd.FullCommand():
|
case checkMetricsCmd.FullCommand():
|
||||||
os.Exit(CheckMetrics())
|
os.Exit(CheckMetrics(*checkMetricsExtended))
|
||||||
|
|
||||||
case queryInstantCmd.FullCommand():
|
case queryInstantCmd.FullCommand():
|
||||||
os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p))
|
os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p))
|
||||||
|
@ -629,8 +635,10 @@ $ curl -s http://localhost:9090/metrics | promtool check metrics
|
||||||
`)
|
`)
|
||||||
|
|
||||||
// CheckMetrics performs a linting pass on input metrics.
|
// CheckMetrics performs a linting pass on input metrics.
|
||||||
func CheckMetrics() int {
|
func CheckMetrics(extended bool) int {
|
||||||
l := promlint.New(os.Stdin)
|
var buf bytes.Buffer
|
||||||
|
tee := io.TeeReader(os.Stdin, &buf)
|
||||||
|
l := promlint.New(tee)
|
||||||
problems, err := l.Lint()
|
problems, err := l.Lint()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintln(os.Stderr, "error while linting:", err)
|
fmt.Fprintln(os.Stderr, "error while linting:", err)
|
||||||
|
@ -645,9 +653,70 @@ func CheckMetrics() int {
|
||||||
return lintErrExitCode
|
return lintErrExitCode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if extended {
|
||||||
|
stats, total, err := checkMetricsExtended(&buf)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, err)
|
||||||
|
return failureExitCode
|
||||||
|
}
|
||||||
|
w := tabwriter.NewWriter(os.Stdout, 4, 4, 4, ' ', tabwriter.TabIndent)
|
||||||
|
fmt.Fprintf(w, "Metric\tCardinality\tPercentage\t\n")
|
||||||
|
for _, stat := range stats {
|
||||||
|
fmt.Fprintf(w, "%s\t%d\t%.2f%%\t\n", stat.name, stat.cardinality, stat.percentage*100)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(w, "Total\t%d\t%.f%%\t\n", total, 100.)
|
||||||
|
w.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
return successExitCode
|
return successExitCode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type metricStat struct {
|
||||||
|
name string
|
||||||
|
cardinality int
|
||||||
|
percentage float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) {
|
||||||
|
p := expfmt.TextParser{}
|
||||||
|
metricFamilies, err := p.TextToMetricFamilies(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("error while parsing text to metric families: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var total int
|
||||||
|
stats := make([]metricStat, 0, len(metricFamilies))
|
||||||
|
for _, mf := range metricFamilies {
|
||||||
|
var cardinality int
|
||||||
|
switch mf.GetType() {
|
||||||
|
case dto.MetricType_COUNTER, dto.MetricType_GAUGE, dto.MetricType_UNTYPED:
|
||||||
|
cardinality = len(mf.Metric)
|
||||||
|
case dto.MetricType_HISTOGRAM:
|
||||||
|
// Histogram metrics includes sum, count, buckets.
|
||||||
|
buckets := len(mf.Metric[0].Histogram.Bucket)
|
||||||
|
cardinality = len(mf.Metric) * (2 + buckets)
|
||||||
|
case dto.MetricType_SUMMARY:
|
||||||
|
// Summary metrics includes sum, count, quantiles.
|
||||||
|
quantiles := len(mf.Metric[0].Summary.Quantile)
|
||||||
|
cardinality = len(mf.Metric) * (2 + quantiles)
|
||||||
|
default:
|
||||||
|
cardinality = len(mf.Metric)
|
||||||
|
}
|
||||||
|
stats = append(stats, metricStat{name: mf.GetName(), cardinality: cardinality})
|
||||||
|
total += cardinality
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range stats {
|
||||||
|
stats[i].percentage = float64(stats[i].cardinality) / float64(total)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.SliceStable(stats, func(i, j int) bool {
|
||||||
|
return stats[i].cardinality > stats[j].cardinality
|
||||||
|
})
|
||||||
|
|
||||||
|
return stats, total, nil
|
||||||
|
}
|
||||||
|
|
||||||
// QueryInstant performs an instant query against a Prometheus server.
|
// QueryInstant performs an instant query against a Prometheus server.
|
||||||
func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
|
func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
|
||||||
if url.Scheme == "" {
|
if url.Scheme == "" {
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -322,3 +323,39 @@ func TestAuthorizationConfig(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCheckMetricsExtended(t *testing.T) {
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
t.Skip("Skipping on windows")
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Open("testdata/metrics-test.prom")
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
stats, total, err := checkMetricsExtended(f)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, 27, total)
|
||||||
|
require.Equal(t, []metricStat{
|
||||||
|
{
|
||||||
|
name: "prometheus_tsdb_compaction_chunk_size_bytes",
|
||||||
|
cardinality: 15,
|
||||||
|
percentage: float64(15) / float64(27),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "go_gc_duration_seconds",
|
||||||
|
cardinality: 7,
|
||||||
|
percentage: float64(7) / float64(27),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "net_conntrack_dialer_conn_attempted_total",
|
||||||
|
cardinality: 4,
|
||||||
|
percentage: float64(4) / float64(27),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "go_info",
|
||||||
|
cardinality: 1,
|
||||||
|
percentage: float64(1) / float64(27),
|
||||||
|
},
|
||||||
|
}, stats)
|
||||||
|
}
|
||||||
|
|
35
cmd/promtool/testdata/metrics-test.prom
vendored
Normal file
35
cmd/promtool/testdata/metrics-test.prom
vendored
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
|
||||||
|
# TYPE go_gc_duration_seconds summary
|
||||||
|
go_gc_duration_seconds{quantile="0"} 2.391e-05
|
||||||
|
go_gc_duration_seconds{quantile="0.25"} 9.4402e-05
|
||||||
|
go_gc_duration_seconds{quantile="0.5"} 0.000118953
|
||||||
|
go_gc_duration_seconds{quantile="0.75"} 0.000145884
|
||||||
|
go_gc_duration_seconds{quantile="1"} 0.005201208
|
||||||
|
go_gc_duration_seconds_sum 0.036134048
|
||||||
|
go_gc_duration_seconds_count 232
|
||||||
|
# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction
|
||||||
|
# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 662
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 1460
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 2266
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 3958
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4861
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 5721
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 10493
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 12464
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 13254
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 13699
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 13806
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 13852
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 13867
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_sum 3.886707e+06
|
||||||
|
prometheus_tsdb_compaction_chunk_size_bytes_count 13867
|
||||||
|
# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name.
|
||||||
|
# TYPE net_conntrack_dialer_conn_attempted_total counter
|
||||||
|
net_conntrack_dialer_conn_attempted_total{dialer_name="blackbox"} 5210
|
||||||
|
net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0
|
||||||
|
net_conntrack_dialer_conn_attempted_total{dialer_name="node"} 21
|
||||||
|
net_conntrack_dialer_conn_attempted_total{dialer_name="prometheus"} 21
|
||||||
|
# HELP go_info Information about the Go environment.
|
||||||
|
# TYPE go_info gauge
|
||||||
|
go_info{version="go1.17"} 1
|
Loading…
Reference in a new issue