Merge branch 'main' of github.com:prometheus/prometheus into owilliams/utf8

This commit is contained in:
Owen Williams 2024-01-18 11:36:04 -05:00
commit ad78467f33
114 changed files with 4292 additions and 19735 deletions

View file

@ -197,7 +197,7 @@ jobs:
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- name: Install nodejs
uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1
uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"

View file

@ -30,12 +30,12 @@ jobs:
go-version: 1.21.x
- name: Initialize CodeQL
uses: github/codeql-action/init@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
uses: github/codeql-action/autobuild@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
uses: github/codeql-action/analyze@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12

View file

@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # tag=v2.22.8
uses: github/codeql-action/upload-sarif@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # tag=v3.22.12
with:
sarif_file: results.sarif

View file

@ -5,6 +5,45 @@
* [ENHANCEMENT] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224
* [BUGFIX] Agent: Participate in notify calls. #13223
## 2.49.0 / 2024-01-15
* [FEATURE] Promtool: Add `--run` flag promtool test rules command. #12206
* [FEATURE] SD: Add support for `NS` records to DNS SD. #13219
* [FEATURE] UI: Add heatmap visualization setting in the Graph tab, useful histograms. #13096 #13371
* [FEATURE] Scraping: Add `scrape_config.enable_compression` (default true) to disable gzip compression when scraping the target. #13166
* [FEATURE] PromQL: Add a `promql-experimental-functions` feature flag containing some new experimental PromQL functions. #13103 NOTE: More experimental functions might be added behind the same feature flag in the future. Added functions:
* Experimental `mad_over_time` (median absolute deviation around the median) function. #13059
* Experimental `sort_by_label` and `sort_by_label_desc` functions allowing sorting returned series by labels. #11299
* [FEATURE] SD: Add `__meta_linode_gpus` label to Linode SD. #13097
* [FEATURE] API: Add `exclude_alerts` query parameter to `/api/v1/rules` to only return recording rules. #12999
* [FEATURE] TSDB: --storage.tsdb.retention.time flag value is now exposed as a `prometheus_tsdb_retention_limit_seconds` metric. #12986
* [FEATURE] Scraping: Add ability to specify priority of scrape protocols to accept during scrape (e.g. to scrape Prometheus proto format for certain jobs). This can be changed by setting `global.scrape_protocols` and `scrape_config.scrape_protocols`. #12738
* [ENHANCEMENT] Scraping: Automated handling of scraping histograms that violate `scrape_config.native_histogram_bucket_limit` setting. #13129
* [ENHANCEMENT] Scraping: Optimized memory allocations when scraping. #12992
* [ENHANCEMENT] SD: Added cache for Azure SD to avoid rate-limits. #12622
* [ENHANCEMENT] TSDB: Various improvements to OOO exemplar scraping. E.g. allowing ingestion of exemplars with the same timestamp, but with different labels. #13021
* [ENHANCEMENT] API: Optimize `/api/v1/labels` and `/api/v1/label/<label_name>/values` when 1 set of matchers are used. #12888
* [ENHANCEMENT] TSDB: Various optimizations for TSDB block index, head mmap chunks and WAL, reducing latency and memory allocations (improving API calls, compaction queries etc). #12997 #13058 #13056 #13040
* [ENHANCEMENT] PromQL: Optimize memory allocations and latency when querying float histograms. #12954
* [ENHANCEMENT] Rules: Instrument TraceID in log lines for rule evaluations. #13034
* [ENHANCEMENT] PromQL: Optimize memory allocations in query_range calls. #13043
* [ENHANCEMENT] Promtool: unittest interval now defaults to evaluation_intervals when not set. #12729
* [BUGFIX] SD: Fixed Azure SD public IP reporting #13241
* [BUGFIX] API: Fix inaccuracies in posting cardinality statistics. #12653
* [BUGFIX] PromQL: Fix inaccuracies of `histogram_quantile` with classic histograms. #13153
* [BUGFIX] TSDB: Fix rare fails or inaccurate queries with OOO samples. #13115
* [BUGFIX] TSDB: Fix rare panics on append commit when exemplars are used. #13092
* [BUGFIX] TSDB: Fix exemplar WAL storage, so remote write can send/receive samples before exemplars. #13113
* [BUGFIX] Mixins: Fix `url` filter on remote write dashboards. #10721
* [BUGFIX] PromQL/TSDB: Various fixes to float histogram operations. #12891 #12977 #12609 #13190 #13189 #13191 #13201 #13212 #13208
* [BUGFIX] Promtool: Fix int32 overflow issues for 32-bit architectures. #12978
* [BUGFIX] SD: Fix Azure VM Scale Set NIC issue. #13283
## 2.48.1 / 2023-12-07
* [BUGFIX] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224
* [BUGFIX] Agent: Participate in notify calls (fixes slow down in remote write handling introduced in 2.45). #13223
## 2.48.0 / 2023-11-16
* [CHANGE] Remote-write: respect Retry-After header on 5xx errors. #12677

View file

@ -54,7 +54,8 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
| v2.50 | 2024-01-16 | **searching for volunteer** |
| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) |
| v2.51 | 2024-02-13 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.

View file

@ -1 +1 @@
2.48.0
2.49.0

View file

@ -1646,6 +1646,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown,
EnableNativeHistograms: opts.EnableNativeHistograms,
OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow,
EnableOverlappingCompaction: true,
}
}

View file

@ -12,7 +12,6 @@
// limitations under the License.
//
//go:build !windows
// +build !windows
package main

370
cmd/promtool/analyze.go Normal file
View file

@ -0,0 +1,370 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"errors"
"fmt"
"io"
"math"
"net/http"
"net/url"
"os"
"sort"
"strconv"
"strings"
"time"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
)
var (
errNotNativeHistogram = fmt.Errorf("not a native histogram")
errNotEnoughData = fmt.Errorf("not enough data")
outputHeader = `Bucket stats for each histogram series over time
------------------------------------------------
First the min, avg, and max number of populated buckets, followed by the total
number of buckets (only if different from the max number of populated buckets
which is typical for classic but not native histograms).`
outputFooter = `Aggregated bucket stats
-----------------------
Each line shows min/avg/max over the series above.`
)
type QueryAnalyzeConfig struct {
metricType string
duration time.Duration
time string
matchers []string
}
// run retrieves metrics that look like conventional histograms (i.e. have _bucket
// suffixes) or native histograms, depending on metricType flag.
func (c *QueryAnalyzeConfig) run(url *url.URL, roundtripper http.RoundTripper) error {
if c.metricType != "histogram" {
return fmt.Errorf("analyze type is %s, must be 'histogram'", c.metricType)
}
ctx := context.Background()
api, err := newAPI(url, roundtripper, nil)
if err != nil {
return err
}
var endTime time.Time
if c.time != "" {
endTime, err = parseTime(c.time)
if err != nil {
return fmt.Errorf("error parsing time '%s': %w", c.time, err)
}
} else {
endTime = time.Now()
}
return c.getStatsFromMetrics(ctx, api, endTime, os.Stdout, c.matchers)
}
func (c *QueryAnalyzeConfig) getStatsFromMetrics(ctx context.Context, api v1.API, endTime time.Time, out io.Writer, matchers []string) error {
fmt.Fprintf(out, "%s\n\n", outputHeader)
metastatsNative := newMetaStatistics()
metastatsClassic := newMetaStatistics()
for _, matcher := range matchers {
seriesSel := seriesSelector(matcher, c.duration)
matrix, err := querySamples(ctx, api, seriesSel, endTime)
if err != nil {
return err
}
matrices := make(map[string]model.Matrix)
for _, series := range matrix {
// We do not handle mixed types. If there are float values, we assume it is a
// classic histogram, otherwise we assume it is a native histogram, and we
// ignore series with errors if they do not match the expected type.
if len(series.Values) == 0 {
stats, err := calcNativeBucketStatistics(series)
if err != nil {
if errors.Is(err, errNotNativeHistogram) || errors.Is(err, errNotEnoughData) {
continue
}
return err
}
fmt.Fprintf(out, "- %s (native): %v\n", series.Metric, *stats)
metastatsNative.update(stats)
} else {
lbs := model.LabelSet(series.Metric).Clone()
if _, ok := lbs["le"]; !ok {
continue
}
metricName := string(lbs[labels.MetricName])
if !strings.HasSuffix(metricName, "_bucket") {
continue
}
delete(lbs, labels.MetricName)
delete(lbs, "le")
key := formatSeriesName(metricName, lbs)
matrices[key] = append(matrices[key], series)
}
}
for key, matrix := range matrices {
stats, err := calcClassicBucketStatistics(matrix)
if err != nil {
if errors.Is(err, errNotEnoughData) {
continue
}
return err
}
fmt.Fprintf(out, "- %s (classic): %v\n", key, *stats)
metastatsClassic.update(stats)
}
}
fmt.Fprintf(out, "\n%s\n", outputFooter)
if metastatsNative.Count() > 0 {
fmt.Fprintf(out, "\nNative %s\n", metastatsNative)
}
if metastatsClassic.Count() > 0 {
fmt.Fprintf(out, "\nClassic %s\n", metastatsClassic)
}
return nil
}
func seriesSelector(metricName string, duration time.Duration) string {
builder := strings.Builder{}
builder.WriteString(metricName)
builder.WriteRune('[')
builder.WriteString(duration.String())
builder.WriteRune(']')
return builder.String()
}
func formatSeriesName(metricName string, lbs model.LabelSet) string {
builder := strings.Builder{}
builder.WriteString(metricName)
builder.WriteString(lbs.String())
return builder.String()
}
func querySamples(ctx context.Context, api v1.API, query string, end time.Time) (model.Matrix, error) {
values, _, err := api.Query(ctx, query, end)
if err != nil {
return nil, err
}
matrix, ok := values.(model.Matrix)
if !ok {
return nil, fmt.Errorf("query of buckets resulted in non-Matrix")
}
return matrix, nil
}
// minPop/avgPop/maxPop is for the number of populated (non-zero) buckets.
// total is the total number of buckets across all samples in the series,
// populated or not.
type statistics struct {
minPop, maxPop, total int
avgPop float64
}
func (s statistics) String() string {
if s.maxPop == s.total {
return fmt.Sprintf("%d/%.3f/%d", s.minPop, s.avgPop, s.maxPop)
}
return fmt.Sprintf("%d/%.3f/%d/%d", s.minPop, s.avgPop, s.maxPop, s.total)
}
func calcClassicBucketStatistics(matrix model.Matrix) (*statistics, error) {
numBuckets := len(matrix)
stats := &statistics{
minPop: math.MaxInt,
total: numBuckets,
}
if numBuckets == 0 || len(matrix[0].Values) < 2 {
return stats, errNotEnoughData
}
numSamples := len(matrix[0].Values)
sortMatrix(matrix)
totalPop := 0
for timeIdx := 0; timeIdx < numSamples; timeIdx++ {
curr, err := getBucketCountsAtTime(matrix, numBuckets, timeIdx)
if err != nil {
return stats, err
}
countPop := 0
for _, b := range curr {
if b != 0 {
countPop++
}
}
totalPop += countPop
if stats.minPop > countPop {
stats.minPop = countPop
}
if stats.maxPop < countPop {
stats.maxPop = countPop
}
}
stats.avgPop = float64(totalPop) / float64(numSamples)
return stats, nil
}
func sortMatrix(matrix model.Matrix) {
sort.SliceStable(matrix, func(i, j int) bool {
return getLe(matrix[i]) < getLe(matrix[j])
})
}
func getLe(series *model.SampleStream) float64 {
lbs := model.LabelSet(series.Metric)
le, _ := strconv.ParseFloat(string(lbs["le"]), 64)
return le
}
func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int, error) {
counts := make([]int, numBuckets)
if timeIdx >= len(matrix[0].Values) {
// Just return zeroes instead of erroring out so we can get partial results.
return counts, nil
}
counts[0] = int(matrix[0].Values[timeIdx].Value)
for i, bucket := range matrix[1:] {
if timeIdx >= len(bucket.Values) {
// Just return zeroes instead of erroring out so we can get partial results.
return counts, nil
}
curr := bucket.Values[timeIdx]
prev := matrix[i].Values[timeIdx]
// Assume the results are nicely aligned.
if curr.Timestamp != prev.Timestamp {
return counts, fmt.Errorf("matrix result is not time aligned")
}
counts[i+1] = int(curr.Value - prev.Value)
}
return counts, nil
}
type bucketBounds struct {
boundaries int32
upper, lower float64
}
func makeBucketBounds(b *model.HistogramBucket) bucketBounds {
return bucketBounds{
boundaries: b.Boundaries,
upper: float64(b.Upper),
lower: float64(b.Lower),
}
}
func calcNativeBucketStatistics(series *model.SampleStream) (*statistics, error) {
stats := &statistics{
minPop: math.MaxInt,
}
overall := make(map[bucketBounds]struct{})
totalPop := 0
if len(series.Histograms) == 0 {
return nil, errNotNativeHistogram
}
if len(series.Histograms) == 1 {
return nil, errNotEnoughData
}
for _, histogram := range series.Histograms {
for _, bucket := range histogram.Histogram.Buckets {
bb := makeBucketBounds(bucket)
overall[bb] = struct{}{}
}
countPop := len(histogram.Histogram.Buckets)
totalPop += countPop
if stats.minPop > countPop {
stats.minPop = countPop
}
if stats.maxPop < countPop {
stats.maxPop = countPop
}
}
stats.avgPop = float64(totalPop) / float64(len(series.Histograms))
stats.total = len(overall)
return stats, nil
}
type distribution struct {
min, max, count int
avg float64
}
func newDistribution() distribution {
return distribution{
min: math.MaxInt,
}
}
func (d *distribution) update(num int) {
if d.min > num {
d.min = num
}
if d.max < num {
d.max = num
}
d.count++
d.avg += float64(num)/float64(d.count) - d.avg/float64(d.count)
}
func (d distribution) String() string {
return fmt.Sprintf("%d/%.3f/%d", d.min, d.avg, d.max)
}
type metaStatistics struct {
minPop, avgPop, maxPop, total distribution
}
func newMetaStatistics() *metaStatistics {
return &metaStatistics{
minPop: newDistribution(),
avgPop: newDistribution(),
maxPop: newDistribution(),
total: newDistribution(),
}
}
func (ms metaStatistics) Count() int {
return ms.minPop.count
}
func (ms metaStatistics) String() string {
if ms.maxPop == ms.total {
return fmt.Sprintf("histogram series (%d in total):\n- min populated: %v\n- avg populated: %v\n- max populated: %v", ms.Count(), ms.minPop, ms.avgPop, ms.maxPop)
}
return fmt.Sprintf("histogram series (%d in total):\n- min populated: %v\n- avg populated: %v\n- max populated: %v\n- total: %v", ms.Count(), ms.minPop, ms.avgPop, ms.maxPop, ms.total)
}
func (ms *metaStatistics) update(s *statistics) {
ms.minPop.update(s.minPop)
ms.avgPop.update(int(s.avgPop))
ms.maxPop.update(s.maxPop)
ms.total.update(s.total)
}

View file

@ -0,0 +1,170 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/common/model"
)
var (
exampleMatrix = model.Matrix{
&model.SampleStream{
Metric: model.Metric{
"le": "+Inf",
},
Values: []model.SamplePair{
{
Value: 31,
Timestamp: 100,
},
{
Value: 32,
Timestamp: 200,
},
{
Value: 40,
Timestamp: 300,
},
},
},
&model.SampleStream{
Metric: model.Metric{
"le": "0.5",
},
Values: []model.SamplePair{
{
Value: 10,
Timestamp: 100,
},
{
Value: 11,
Timestamp: 200,
},
{
Value: 11,
Timestamp: 300,
},
},
},
&model.SampleStream{
Metric: model.Metric{
"le": "10",
},
Values: []model.SamplePair{
{
Value: 30,
Timestamp: 100,
},
{
Value: 31,
Timestamp: 200,
},
{
Value: 37,
Timestamp: 300,
},
},
},
&model.SampleStream{
Metric: model.Metric{
"le": "2",
},
Values: []model.SamplePair{
{
Value: 25,
Timestamp: 100,
},
{
Value: 26,
Timestamp: 200,
},
{
Value: 27,
Timestamp: 300,
},
},
},
}
exampleMatrixLength = len(exampleMatrix)
)
func init() {
sortMatrix(exampleMatrix)
}
func TestGetBucketCountsAtTime(t *testing.T) {
cases := []struct {
matrix model.Matrix
length int
timeIdx int
expected []int
}{
{
exampleMatrix,
exampleMatrixLength,
0,
[]int{10, 15, 5, 1},
},
{
exampleMatrix,
exampleMatrixLength,
1,
[]int{11, 15, 5, 1},
},
{
exampleMatrix,
exampleMatrixLength,
2,
[]int{11, 16, 10, 3},
},
}
for _, c := range cases {
t.Run(fmt.Sprintf("exampleMatrix@%d", c.timeIdx), func(t *testing.T) {
res, err := getBucketCountsAtTime(c.matrix, c.length, c.timeIdx)
require.NoError(t, err)
require.Equal(t, c.expected, res)
})
}
}
func TestCalcClassicBucketStatistics(t *testing.T) {
cases := []struct {
matrix model.Matrix
expected *statistics
}{
{
exampleMatrix,
&statistics{
minPop: 4,
avgPop: 4,
maxPop: 4,
total: 4,
},
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
res, err := calcClassicBucketStatistics(c.matrix)
require.NoError(t, err)
require.Equal(t, c.expected, res)
})
}
}

View file

@ -35,9 +35,7 @@ import (
"github.com/go-kit/log"
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -185,6 +183,14 @@ func main() {
queryLabelsEnd := queryLabelsCmd.Flag("end", "End time (RFC3339 or Unix timestamp).").String()
queryLabelsMatch := queryLabelsCmd.Flag("match", "Series selector. Can be specified multiple times.").Strings()
queryAnalyzeCfg := &QueryAnalyzeConfig{}
queryAnalyzeCmd := queryCmd.Command("analyze", "Run queries against your Prometheus to analyze the usage pattern of certain metrics.")
queryAnalyzeCmd.Flag("server", "Prometheus server to query.").Required().URLVar(&serverURL)
queryAnalyzeCmd.Flag("type", "Type of metric: histogram.").Required().StringVar(&queryAnalyzeCfg.metricType)
queryAnalyzeCmd.Flag("duration", "Time frame to analyze.").Default("1h").DurationVar(&queryAnalyzeCfg.duration)
queryAnalyzeCmd.Flag("time", "Query time (RFC3339 or Unix timestamp), defaults to now.").StringVar(&queryAnalyzeCfg.time)
queryAnalyzeCmd.Flag("match", "Series selector. Can be specified multiple times.").Required().StringsVar(&queryAnalyzeCfg.matchers)
pushCmd := app.Command("push", "Push to a Prometheus server.")
pushCmd.Flag("http.config.file", "HTTP client configuration file for promtool to connect to Prometheus.").PlaceHolder("<filename>").ExistingFileVar(&httpConfigFilePath)
pushMetricsCmd := pushCmd.Command("metrics", "Push metrics to a prometheus remote write (for testing purpose only).")
@ -204,6 +210,7 @@ func main() {
"test-rule-file",
"The unit test file.",
).Required().ExistingFiles()
testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool()
defaultDBPath := "data/"
tsdbCmd := app.Command("tsdb", "Run tsdb commands.")
@ -230,7 +237,7 @@ func main() {
dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector.").Default("{__name__=~'(?s:.*)'}").String()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.")
importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
@ -369,6 +376,7 @@ func main() {
EnableNegativeOffset: true,
},
*testRulesRun,
*testRulesDiff,
*testRulesFiles...),
)
@ -390,6 +398,9 @@ func main() {
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
case queryAnalyzeCmd.FullCommand():
os.Exit(checkErr(queryAnalyzeCfg.run(serverURL, httpRoundTripper)))
case documentationCmd.FullCommand():
os.Exit(checkErr(documentcli.GenerateMarkdown(app.Model(), os.Stdout)))
@ -997,246 +1008,6 @@ func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) {
return stats, total, nil
}
// QueryInstant performs an instant query against a Prometheus server.
func QueryInstant(url *url.URL, roundTripper http.RoundTripper, query, evalTime string, p printer) int {
if url.Scheme == "" {
url.Scheme = "http"
}
config := api.Config{
Address: url.String(),
RoundTripper: roundTripper,
}
// Create new client.
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
eTime := time.Now()
if evalTime != "" {
eTime, err = parseTime(evalTime)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err)
return failureExitCode
}
}
// Run query against client.
api := v1.NewAPI(c)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.Query(ctx, query, eTime) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printValue(val)
return successExitCode
}
// QueryRange performs a range query against a Prometheus server.
func QueryRange(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, query, start, end string, step time.Duration, p printer) int {
if url.Scheme == "" {
url.Scheme = "http"
}
config := api.Config{
Address: url.String(),
RoundTripper: roundTripper,
}
if len(headers) > 0 {
config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
for key, value := range headers {
req.Header.Add(key, value)
}
return roundTripper.RoundTrip(req)
})
}
// Create new client.
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
var stime, etime time.Time
if end == "" {
etime = time.Now()
} else {
etime, err = parseTime(end)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing end time:", err)
return failureExitCode
}
}
if start == "" {
stime = etime.Add(-5 * time.Minute)
} else {
stime, err = parseTime(start)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing start time:", err)
return failureExitCode
}
}
if !stime.Before(etime) {
fmt.Fprintln(os.Stderr, "start time is not before end time")
return failureExitCode
}
if step == 0 {
resolution := math.Max(math.Floor(etime.Sub(stime).Seconds()/250), 1)
// Convert seconds to nanoseconds such that time.Duration parses correctly.
step = time.Duration(resolution) * time.Second
}
// Run query against client.
api := v1.NewAPI(c)
r := v1.Range{Start: stime, End: etime, Step: step}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printValue(val)
return successExitCode
}
// QuerySeries queries for a series against a Prometheus server.
func QuerySeries(url *url.URL, roundTripper http.RoundTripper, matchers []string, start, end string, p printer) int {
if url.Scheme == "" {
url.Scheme = "http"
}
config := api.Config{
Address: url.String(),
RoundTripper: roundTripper,
}
// Create new client.
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// Run query against client.
api := v1.NewAPI(c)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printSeries(val)
return successExitCode
}
// QueryLabels queries for label values against a Prometheus server.
func QueryLabels(url *url.URL, roundTripper http.RoundTripper, matchers []string, name, start, end string, p printer) int {
if url.Scheme == "" {
url.Scheme = "http"
}
config := api.Config{
Address: url.String(),
RoundTripper: roundTripper,
}
// Create new client.
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// Run query against client.
api := v1.NewAPI(c)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, warn, err := api.LabelValues(ctx, name, matchers, stime, etime)
cancel()
for _, v := range warn {
fmt.Fprintln(os.Stderr, "query warning:", v)
}
if err != nil {
return handleAPIError(err)
}
p.printLabelValues(val)
return successExitCode
}
func handleAPIError(err error) int {
var apiErr *v1.Error
if errors.As(err, &apiErr) && apiErr.Detail != "" {
fmt.Fprintf(os.Stderr, "query error: %v (detail: %s)\n", apiErr, strings.TrimSpace(apiErr.Detail))
} else {
fmt.Fprintln(os.Stderr, "query error:", err)
}
return failureExitCode
}
func parseStartTimeAndEndTime(start, end string) (time.Time, time.Time, error) {
var (
minTime = time.Now().Add(-9999 * time.Hour)
maxTime = time.Now().Add(9999 * time.Hour)
err error
)
stime := minTime
etime := maxTime
if start != "" {
stime, err = parseTime(start)
if err != nil {
return stime, etime, fmt.Errorf("error parsing start time: %w", err)
}
}
if end != "" {
etime, err = parseTime(end)
if err != nil {
return stime, etime, fmt.Errorf("error parsing end time: %w", err)
}
}
return stime, etime, nil
}
func parseTime(s string) (time.Time, error) {
if t, err := strconv.ParseFloat(s, 64); err == nil {
s, ns := math.Modf(t)
return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC(), nil
}
if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
return t, nil
}
return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s)
}
type endpointsGroup struct {
urlToFilename map[string]string
postProcess func(b []byte) ([]byte, error)
@ -1390,15 +1161,12 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu
evalInterval: evalInterval,
maxBlockDuration: maxBlockDuration,
}
client, err := api.NewClient(api.Config{
Address: url.String(),
RoundTripper: roundTripper,
})
api, err := newAPI(url, roundTripper, nil)
if err != nil {
return fmt.Errorf("new api client error: %w", err)
}
ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, v1.NewAPI(client))
ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api)
errs := ruleImporter.loadGroups(ctx, files)
for _, err := range errs {
if err != nil {

251
cmd/promtool/query.go Normal file
View file

@ -0,0 +1,251 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"errors"
"fmt"
"math"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/client_golang/prometheus/promhttp"
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
)
func newAPI(url *url.URL, roundTripper http.RoundTripper, headers map[string]string) (v1.API, error) {
if url.Scheme == "" {
url.Scheme = "http"
}
config := api.Config{
Address: url.String(),
RoundTripper: roundTripper,
}
if len(headers) > 0 {
config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
for key, value := range headers {
req.Header.Add(key, value)
}
return roundTripper.RoundTrip(req)
})
}
// Create new client.
client, err := api.NewClient(config)
if err != nil {
return nil, err
}
api := v1.NewAPI(client)
return api, nil
}
// QueryInstant performs an instant query against a Prometheus server.
func QueryInstant(url *url.URL, roundTripper http.RoundTripper, query, evalTime string, p printer) int {
api, err := newAPI(url, roundTripper, nil)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
eTime := time.Now()
if evalTime != "" {
eTime, err = parseTime(evalTime)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err)
return failureExitCode
}
}
// Run query against client.
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.Query(ctx, query, eTime) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printValue(val)
return successExitCode
}
// QueryRange performs a range query against a Prometheus server.
func QueryRange(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, query, start, end string, step time.Duration, p printer) int {
api, err := newAPI(url, roundTripper, headers)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
var stime, etime time.Time
if end == "" {
etime = time.Now()
} else {
etime, err = parseTime(end)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing end time:", err)
return failureExitCode
}
}
if start == "" {
stime = etime.Add(-5 * time.Minute)
} else {
stime, err = parseTime(start)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing start time:", err)
return failureExitCode
}
}
if !stime.Before(etime) {
fmt.Fprintln(os.Stderr, "start time is not before end time")
return failureExitCode
}
if step == 0 {
resolution := math.Max(math.Floor(etime.Sub(stime).Seconds()/250), 1)
// Convert seconds to nanoseconds such that time.Duration parses correctly.
step = time.Duration(resolution) * time.Second
}
// Run query against client.
r := v1.Range{Start: stime, End: etime, Step: step}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printValue(val)
return successExitCode
}
// QuerySeries queries for a series against a Prometheus server.
func QuerySeries(url *url.URL, roundTripper http.RoundTripper, matchers []string, start, end string, p printer) int {
api, err := newAPI(url, roundTripper, nil)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// Run query against client.
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now.
cancel()
if err != nil {
return handleAPIError(err)
}
p.printSeries(val)
return successExitCode
}
// QueryLabels queries for label values against a Prometheus server.
func QueryLabels(url *url.URL, roundTripper http.RoundTripper, matchers []string, name, start, end string, p printer) int {
api, err := newAPI(url, roundTripper, nil)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// Run query against client.
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
val, warn, err := api.LabelValues(ctx, name, matchers, stime, etime)
cancel()
for _, v := range warn {
fmt.Fprintln(os.Stderr, "query warning:", v)
}
if err != nil {
return handleAPIError(err)
}
p.printLabelValues(val)
return successExitCode
}
func handleAPIError(err error) int {
var apiErr *v1.Error
if errors.As(err, &apiErr) && apiErr.Detail != "" {
fmt.Fprintf(os.Stderr, "query error: %v (detail: %s)\n", apiErr, strings.TrimSpace(apiErr.Detail))
} else {
fmt.Fprintln(os.Stderr, "query error:", err)
}
return failureExitCode
}
func parseStartTimeAndEndTime(start, end string) (time.Time, time.Time, error) {
var (
minTime = time.Now().Add(-9999 * time.Hour)
maxTime = time.Now().Add(9999 * time.Hour)
err error
)
stime := minTime
etime := maxTime
if start != "" {
stime, err = parseTime(start)
if err != nil {
return stime, etime, fmt.Errorf("error parsing start time: %w", err)
}
}
if end != "" {
etime, err = parseTime(end)
if err != nil {
return stime, etime, fmt.Errorf("error parsing end time: %w", err)
}
}
return stime, etime, nil
}
func parseTime(s string) (time.Time, error) {
if t, err := strconv.ParseFloat(s, 64); err == nil {
s, ns := math.Modf(t)
return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC(), nil
}
if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
return t, nil
}
return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s)
}

15
cmd/promtool/testdata/dump-test-1.prom vendored Normal file
View file

@ -0,0 +1,15 @@
{__name__="heavy_metric", foo="bar"} 5 0
{__name__="heavy_metric", foo="bar"} 4 60000
{__name__="heavy_metric", foo="bar"} 3 120000
{__name__="heavy_metric", foo="bar"} 2 180000
{__name__="heavy_metric", foo="bar"} 1 240000
{__name__="heavy_metric", foo="foo"} 5 0
{__name__="heavy_metric", foo="foo"} 4 60000
{__name__="heavy_metric", foo="foo"} 3 120000
{__name__="heavy_metric", foo="foo"} 2 180000
{__name__="heavy_metric", foo="foo"} 1 240000
{__name__="metric", baz="abc", foo="bar"} 1 0
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000
{__name__="metric", baz="abc", foo="bar"} 4 180000
{__name__="metric", baz="abc", foo="bar"} 5 240000

10
cmd/promtool/testdata/dump-test-2.prom vendored Normal file
View file

@ -0,0 +1,10 @@
{__name__="heavy_metric", foo="foo"} 5 0
{__name__="heavy_metric", foo="foo"} 4 60000
{__name__="heavy_metric", foo="foo"} 3 120000
{__name__="heavy_metric", foo="foo"} 2 180000
{__name__="heavy_metric", foo="foo"} 1 240000
{__name__="metric", baz="abc", foo="bar"} 1 0
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000
{__name__="metric", baz="abc", foo="bar"} 4 180000
{__name__="metric", baz="abc", foo="bar"} 5 240000

View file

@ -0,0 +1,2 @@
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000

View file

@ -706,7 +706,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
return nil
}
func dumpSamples(ctx context.Context, path string, mint, maxt int64, match string) (err error) {
func dumpSamples(ctx context.Context, path string, mint, maxt int64, match []string) (err error) {
db, err := tsdb.OpenDBReadOnly(path, nil)
if err != nil {
return err
@ -720,11 +720,21 @@ func dumpSamples(ctx context.Context, path string, mint, maxt int64, match strin
}
defer q.Close()
matchers, err := parser.ParseMetricSelector(match)
matcherSets, err := parser.ParseMetricSelectors(match)
if err != nil {
return err
}
ss := q.Select(ctx, false, nil, matchers...)
var ss storage.SeriesSet
if len(matcherSets) > 1 {
var sets []storage.SeriesSet
for _, mset := range matcherSets {
sets = append(sets, q.Select(ctx, true, nil, mset...))
}
ss = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
} else {
ss = q.Select(ctx, false, nil, matcherSets[0]...)
}
for ss.Next() {
series := ss.At()

View file

@ -14,9 +14,18 @@
package main
import (
"bytes"
"context"
"io"
"math"
"os"
"runtime"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql"
)
func TestGenerateBucket(t *testing.T) {
@ -41,3 +50,101 @@ func TestGenerateBucket(t *testing.T) {
require.Equal(t, tc.step, step)
}
}
// getDumpedSamples dumps samples and returns them.
func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string) string {
t.Helper()
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
err := dumpSamples(
context.Background(),
path,
mint,
maxt,
match,
)
require.NoError(t, err)
w.Close()
os.Stdout = oldStdout
var buf bytes.Buffer
io.Copy(&buf, r)
return buf.String()
}
func TestTSDBDump(t *testing.T) {
storage := promql.LoadedStorage(t, `
load 1m
metric{foo="bar", baz="abc"} 1 2 3 4 5
heavy_metric{foo="bar"} 5 4 3 2 1
heavy_metric{foo="foo"} 5 4 3 2 1
`)
tests := []struct {
name string
mint int64
maxt int64
match []string
expectedDump string
}{
{
name: "default match",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "same matcher twice",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{foo=~'.+'}", "{foo=~'.+'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "no duplication",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__=~'(?s:.*)'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "well merged",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric'}", "{baz='abc'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "multi matchers",
mint: math.MinInt64,
maxt: math.MaxInt64,
match: []string{"{__name__='heavy_metric',foo='foo'}", "{__name__='metric'}"},
expectedDump: "testdata/dump-test-2.prom",
},
{
name: "with reduced mint and maxt",
mint: int64(60000),
maxt: int64(120000),
match: []string{"{__name__='metric'}"},
expectedDump: "testdata/dump-test-3.prom",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match)
expectedMetrics, err := os.ReadFile(tt.expectedDump)
require.NoError(t, err)
if strings.Contains(runtime.GOOS, "windows") {
// We use "/n" while dumping on windows as well.
expectedMetrics = bytes.ReplaceAll(expectedMetrics, []byte("\r\n"), []byte("\n"))
}
// even though in case of one matcher samples are not sorted, the order in the cases above should stay the same.
require.Equal(t, string(expectedMetrics), dumpedMetrics)
})
}
}

View file

@ -15,6 +15,7 @@ package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
@ -27,6 +28,7 @@ import (
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/nsf/jsondiff"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
@ -40,7 +42,7 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files ...string) int {
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
failed := false
var run *regexp.Regexp
@ -49,7 +51,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files .
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts, run); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -67,7 +69,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files .
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp) []error {
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error {
fmt.Println("Unit Testing: ", filename)
b, err := os.ReadFile(filename)
@ -109,7 +111,7 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, unitTestInp.RuleFiles...)
ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...)
if ers != nil {
errs = append(errs, ers...)
}
@ -173,7 +175,7 @@ type testGroup struct {
}
// test performs the unit tests.
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, ruleFiles ...string) []error {
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) []error {
// Setup testing suite.
suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString(), queryOpts)
if err != nil {
@ -345,8 +347,44 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
}
expString := indentLines(expAlerts.String(), " ")
gotString := indentLines(gotAlerts.String(), " ")
errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n exp:%v, \n got:%v",
testName, testcase.Alertname, testcase.EvalTime.String(), expString, gotString))
if diffFlag {
// If empty, populates an empty value
if gotAlerts.Len() == 0 {
gotAlerts = append(gotAlerts, labelAndAnnotation{
Labels: labels.Labels{},
Annotations: labels.Labels{},
})
}
// If empty, populates an empty value
if expAlerts.Len() == 0 {
expAlerts = append(expAlerts, labelAndAnnotation{
Labels: labels.Labels{},
Annotations: labels.Labels{},
})
}
diffOpts := jsondiff.DefaultConsoleOptions()
expAlertsJSON, err := json.Marshal(expAlerts)
if err != nil {
errs = append(errs, fmt.Errorf("error marshaling expected %s alert: [%s]", tg.TestGroupName, err.Error()))
continue
}
gotAlertsJSON, err := json.Marshal(gotAlerts)
if err != nil {
errs = append(errs, fmt.Errorf("error marshaling received %s alert: [%s]", tg.TestGroupName, err.Error()))
continue
}
res, diff := jsondiff.Compare(expAlertsJSON, gotAlertsJSON, &diffOpts)
if res != jsondiff.FullMatch {
errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n diff: %v",
testName, testcase.Alertname, testcase.EvalTime.String(), indentLines(diff, " ")))
}
} else {
errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n exp:%v, \n got:%v",
testName, testcase.Alertname, testcase.EvalTime.String(), expString, gotString))
}
}
}

View file

@ -125,7 +125,7 @@ func TestRulesUnitTest(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, nil, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
@ -178,7 +178,7 @@ func TestRulesUnitTestRun(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, tt.args.run, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})

View file

@ -610,9 +610,12 @@ type ScrapeConfig struct {
// More than this label value length post metric-relabeling will cause the
// scrape to fail. 0 means no limit.
LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"`
// More than this many buckets in a native histogram will cause the scrape to
// fail.
// If there are more than this many buckets in a native histogram,
// buckets will be merged to stay within the limit.
NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"`
// If the growth factor of one bucket to the next is smaller than this,
// buckets will be merged to increase the factor sufficiently.
NativeHistogramMinBucketFactor float64 `yaml:"native_histogram_min_bucket_factor,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
@ -1124,6 +1127,9 @@ type QueueConfig struct {
MinBackoff model.Duration `yaml:"min_backoff,omitempty"`
MaxBackoff model.Duration `yaml:"max_backoff,omitempty"`
RetryOnRateLimit bool `yaml:"retry_on_http_429,omitempty"`
// Samples older than the limit will be dropped.
SampleAgeLimit model.Duration `yaml:"sample_age_limit,omitempty"`
}
// MetadataConfig is the configuration for sending metadata to remote

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows
// +build !windows
package config

View file

@ -420,10 +420,20 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
} else {
ch <- target{labelSet: nil, err: err}
}
// Get out of this routine because we cannot continue without a network interface.
return
d.addToCache(nicID, networkInterface)
} else {
networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
} else {
ch <- target{labelSet: nil, err: err}
}
// Get out of this routine because we cannot continue without a network interface.
return
}
d.addToCache(nicID, networkInterface)
}
d.addToCache(nicID, networkInterface)
}
if networkInterface.Properties == nil {

View file

@ -193,9 +193,10 @@ func (t *testRunner) targets() []*targetgroup.Group {
func (t *testRunner) requireUpdate(ref time.Time, expected []*targetgroup.Group) {
t.Helper()
timeout := time.After(defaultWait)
for {
select {
case <-time.After(defaultWait):
case <-timeout:
t.Fatalf("Expected update but got none")
return
case <-time.After(defaultWait / 10):

View file

@ -369,6 +369,11 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
// For all seen pods, check all container ports. If they were not covered
// by one of the service endpoints, generate targets for them.
for _, pe := range seenPods {
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) == 0 {
continue
}
for _, c := range pe.pod.Spec.Containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
@ -383,21 +388,18 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
continue
}
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) != 0 {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
}
}

View file

@ -405,6 +405,11 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
// For all seen pods, check all container ports. If they were not covered
// by one of the service endpoints, generate targets for them.
for _, pe := range seenPods {
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) == 0 {
continue
}
for _, c := range pe.pod.Spec.Containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
@ -422,21 +427,18 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
continue
}
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) != 0 {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
}
}

View file

@ -324,6 +324,25 @@ Run labels query.
##### `promtool query analyze`
Run queries against your Prometheus to analyze the usage pattern of certain metrics.
###### Flags
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--server</code> | Prometheus server to query. | |
| <code class="text-nowrap">--type</code> | Type of metric: histogram. | |
| <code class="text-nowrap">--duration</code> | Time frame to analyze. | `1h` |
| <code class="text-nowrap">--time</code> | Query time (RFC3339 or Unix timestamp), defaults to now. | |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | |
### `promtool debug`
Fetch debug information.
@ -431,9 +450,10 @@ Unit tests for rules.
###### Flags
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--run</code> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. |
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--run</code> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | |
| <code class="text-nowrap">--diff</code> | [Experimental] Print colored differential output between expected & received output. | `false` |
@ -548,7 +568,7 @@ Dump samples from a TSDB.
| --- | --- | --- |
| <code class="text-nowrap">--min-time</code> | Minimum timestamp to dump. | `-9223372036854775808` |
| <code class="text-nowrap">--max-time</code> | Maximum timestamp to dump. | `9223372036854775807` |
| <code class="text-nowrap">--match</code> | Series selector. | `{__name__=~'(?s:.*)'}` |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |

View file

@ -451,6 +451,46 @@ metric_relabel_configs:
# native histogram. If this is exceeded, the entire scrape will be treated as
# failed. 0 means no limit.
[ native_histogram_bucket_limit: <int> | default = 0 ]
# Lower limit for the growth factor of one bucket to the next in each native
# histogram. The resolution of a histogram with a lower growth factor will be
# reduced until it is within the limit.
# To set an upper limit for the schema (equivalent to "scale" in OTel's
# exponential histograms), use the following factor limits:
#
# +----------------------------+----------------------------+
# | growth factor | resulting schema AKA scale |
# +----------------------------+----------------------------+
# | 65536 | -4 |
# +----------------------------+----------------------------+
# | 256 | -3 |
# +----------------------------+----------------------------+
# | 16 | -2 |
# +----------------------------+----------------------------+
# | 4 | -1 |
# +----------------------------+----------------------------+
# | 2 | 0 |
# +----------------------------+----------------------------+
# | 1.4 | 1 |
# +----------------------------+----------------------------+
# | 1.1 | 2 |
# +----------------------------+----------------------------+
# | 1.09 | 3 |
# +----------------------------+----------------------------+
# | 1.04 | 4 |
# +----------------------------+----------------------------+
# | 1.02 | 5 |
# +----------------------------+----------------------------+
# | 1.01 | 6 |
# +----------------------------+----------------------------+
# | 1.005 | 7 |
# +----------------------------+----------------------------+
# | 1.002 | 8 |
# +----------------------------+----------------------------+
#
# 0 results in the smallest supported factor (which is currently ~1.0027 or
# schema 8, but might change in the future).
[ native_histogram_min_bucket_factor: <float> | default = 0 ]
```
Where `<job_name>` must be unique across all scrape configurations.
@ -3619,6 +3659,10 @@ queue_config:
# Retry upon receiving a 429 status code from the remote-write storage.
# This is experimental and might change in the future.
[ retry_on_http_429: <boolean> | default = false ]
# If set, any sample that is older than sample_age_limit
# will not be sent to the remote storage. The default value is 0s,
# which means that all samples are sent.
[ sample_age_limit: <duration> | default = 0s ]
# Configures the sending of series metadata to remote storage.
# Metadata configuration is subject to change at any point

View file

@ -212,8 +212,8 @@ While the following would be *incorrect*:
sum(http_requests_total{method="GET"}) offset 5m // INVALID.
The same works for range vectors. This returns the 5-minute rate that
`http_requests_total` had a week ago:
The same works for range vectors. This returns the 5-minute [rate](./functions.md#rate)
that `http_requests_total` had a week ago:
rate(http_requests_total[5m] offset 1w)

View file

@ -39,7 +39,7 @@ To select all HTTP status codes except 4xx ones, you could run:
## Subquery
Return the 5-minute rate of the `http_requests_total` metric for the past 30 minutes, with a resolution of 1 minute.
Return the 5-minute [rate](./functions.md#rate) of the `http_requests_total` metric for the past 30 minutes, with a resolution of 1 minute.
rate(http_requests_total[5m])[30m:1m]

View file

@ -594,6 +594,8 @@ Same as `sort`, but sorts in descending order.
Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
This function uses [natural sort order](https://en.wikipedia.org/wiki/Natural_sort_order).
## `sort_by_label_desc()`
**This function has to be enabled via the [feature flag](../feature_flags/) `--enable-feature=promql-experimental-functions`.**
@ -602,6 +604,8 @@ Same as `sort_by_label`, but sorts in descending order.
Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
This function uses [natural sort order](https://en.wikipedia.org/wiki/Natural_sort_order).
## `sqrt()`
`sqrt(v instant-vector)` calculates the square root of all elements in `v`.

View file

@ -7,10 +7,10 @@ require (
github.com/go-kit/log v0.2.1
github.com/gogo/protobuf v1.3.2
github.com/golang/snappy v0.0.4
github.com/influxdata/influxdb v1.11.2
github.com/prometheus/client_golang v1.17.0
github.com/influxdata/influxdb v1.11.4
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/common v0.45.0
github.com/prometheus/prometheus v0.48.0
github.com/prometheus/prometheus v0.48.1
github.com/stretchr/testify v1.8.4
)
@ -48,7 +48,7 @@ require (
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common/sigv4 v0.1.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/xhit/go-str2duration/v2 v2.1.0 // indirect
go.opentelemetry.io/collector/pdata v1.0.0-rcv0016 // indirect
go.opentelemetry.io/collector/semconv v0.87.0 // indirect
@ -62,7 +62,7 @@ require (
golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect

View file

@ -123,8 +123,8 @@ github.com/hashicorp/nomad/api v0.0.0-20230721134942-515895c7690c h1:Nc3Mt2BAnq0
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
github.com/hetznercloud/hcloud-go/v2 v2.4.0 h1:MqlAE+w125PLvJRCpAJmEwrIxoVdUdOyuFUhE/Ukbok=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/influxdata/influxdb v1.11.2 h1:qOF3uQN1mDfJNEKwbAgJsqehf8IXgKok2vlGm736oGo=
github.com/influxdata/influxdb v1.11.2/go.mod h1:eUMkLTE2vQwvSk6KGMrTBLKPaqSuczuelGbggigMPFw=
github.com/influxdata/influxdb v1.11.4 h1:H3pVW+/tWQ4lkHhZxVQ13Ov1hmhHYaAzz8L5aq3ZNtw=
github.com/influxdata/influxdb v1.11.4/go.mod h1:VO6X2zlamfmEf+Esc9dR+7UQhdE/krspWNEZPwxCrp0=
github.com/ionos-cloud/sdk-go/v6 v6.1.9 h1:Iq3VIXzeEbc8EbButuACgfLMiY5TPVWUPNrF+Vsddo4=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
@ -194,8 +194,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -213,10 +213,10 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
github.com/prometheus/prometheus v0.48.0 h1:yrBloImGQ7je4h8M10ujGh4R6oxYQJQKlMuETwNskGk=
github.com/prometheus/prometheus v0.48.0/go.mod h1:SRw624aMAxTfryAcP8rOjg4S/sHHaetx2lyJJ2nM83g=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/prometheus/prometheus v0.48.1 h1:CTszphSNTXkuCG6O0IfpKdHcJkvvnAAE1GbELKS+NFk=
github.com/prometheus/prometheus v0.48.1/go.mod h1:SRw624aMAxTfryAcP8rOjg4S/sHHaetx2lyJJ2nM83g=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.21 h1:yWfiTPwYxB0l5fGMhl/G+liULugVIHD9AU77iNLrURQ=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
@ -317,8 +317,8 @@ golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=

View file

@ -13,16 +13,18 @@ local template = grafana.template;
g.dashboard(
'%(prefix)sOverview' % $._config.grafanaPrometheus
)
.addMultiTemplate('job', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'job')
.addMultiTemplate('instance', 'prometheus_build_info{job=~"$job"}', 'instance')
.addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'cluster')
.addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job')
.addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance')
.addRow(
g.row('Prometheus Stats')
.addPanel(
g.panel('Prometheus Stats') +
g.tablePanel([
'count by (job, instance, version) (prometheus_build_info{job=~"$job", instance=~"$instance"})',
'max by (job, instance) (time() - process_start_time_seconds{job=~"$job", instance=~"$instance"})',
'count by (cluster, job, instance, version) (prometheus_build_info{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
'max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
], {
cluster: { alias: 'Cluster' },
job: { alias: 'Job' },
instance: { alias: 'Instance' },
version: { alias: 'Version' },
@ -35,12 +37,12 @@ local template = grafana.template;
g.row('Discovery')
.addPanel(
g.panel('Target Sync') +
g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', '{{scrape_job}}') +
g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3', '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}') +
{ yaxes: g.yaxes('ms') }
)
.addPanel(
g.panel('Targets') +
g.queryPanel('sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', 'Targets') +
g.queryPanel('sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})', '{{cluster}}:{{job}}:{{instance}}') +
g.stack
)
)
@ -48,29 +50,29 @@ local template = grafana.template;
g.row('Retrieval')
.addPanel(
g.panel('Average Scrape Interval Duration') +
g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{interval}} configured') +
g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{cluster}}:{{job}}:{{instance}} {{interval}} configured') +
{ yaxes: g.yaxes('ms') }
)
.addPanel(
g.panel('Scrape failures') +
g.queryPanel([
'sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))',
'sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))',
'sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))',
'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))',
'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))',
'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))',
'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))',
'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))',
'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))',
'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))',
], [
'exceeded body size limit: {{job}}',
'exceeded sample limit: {{job}}',
'duplicate timestamp: {{job}}',
'out of bounds: {{job}}',
'out of order: {{job}}',
'exceeded body size limit: {{cluster}} {{job}} {{instance}}',
'exceeded sample limit: {{cluster}} {{job}} {{instance}}',
'duplicate timestamp: {{cluster}} {{job}} {{instance}}',
'out of bounds: {{cluster}} {{job}} {{instance}}',
'out of order: {{cluster}} {{job}} {{instance}}',
]) +
g.stack
)
.addPanel(
g.panel('Appended Samples') +
g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', '{{job}} {{instance}}') +
g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])', '{{cluster}} {{job}} {{instance}}') +
g.stack
)
)
@ -78,12 +80,12 @@ local template = grafana.template;
g.row('Storage')
.addPanel(
g.panel('Head Series') +
g.queryPanel('prometheus_tsdb_head_series{job=~"$job",instance=~"$instance"}', '{{job}} {{instance}} head series') +
g.queryPanel('prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head series') +
g.stack
)
.addPanel(
g.panel('Head Chunks') +
g.queryPanel('prometheus_tsdb_head_chunks{job=~"$job",instance=~"$instance"}', '{{job}} {{instance}} head chunks') +
g.queryPanel('prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head chunks') +
g.stack
)
)
@ -91,12 +93,12 @@ local template = grafana.template;
g.row('Query')
.addPanel(
g.panel('Query Rate') +
g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{job}} {{instance}}') +
g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{cluster}} {{job}} {{instance}}') +
g.stack,
)
.addPanel(
g.panel('Stage Duration') +
g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",job=~"$job",instance=~"$instance"}) * 1e3', '{{slice}}') +
g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3', '{{slice}}') +
{ yaxes: g.yaxes('ms') } +
g.stack,
)

20
go.mod
View file

@ -19,6 +19,7 @@ require (
github.com/edsrzf/mmap-go v1.1.0
github.com/envoyproxy/go-control-plane v0.11.1
github.com/envoyproxy/protoc-gen-validate v1.0.2
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
github.com/fsnotify/fsnotify v1.7.0
github.com/go-kit/log v0.2.1
github.com/go-logfmt/logfmt v0.6.0
@ -33,20 +34,21 @@ require (
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.26.1
github.com/hashicorp/nomad/api v0.0.0-20230721134942-515895c7690c // Not upgrading this for now due to https://github.com/prometheus/prometheus/pull/13255#issuecomment-1845237409
github.com/hetznercloud/hcloud-go/v2 v2.4.0
github.com/hetznercloud/hcloud-go/v2 v2.6.0
github.com/ionos-cloud/sdk-go/v6 v6.1.10
github.com/json-iterator/go v1.1.12
github.com/klauspost/compress v1.17.4
github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b
github.com/linode/linodego v1.25.0
github.com/linode/linodego v1.27.1
github.com/miekg/dns v1.1.57
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f
github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1
github.com/oklog/run v1.1.0
github.com/oklog/ulid v1.3.1
github.com/ovh/go-ovh v1.4.3
github.com/prometheus/alertmanager v0.26.0
github.com/prometheus/client_golang v1.17.0
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/client_model v0.5.0
github.com/prometheus/common v0.45.1-0.20231122191551-832cd6e99f99
github.com/prometheus/common/assets v0.2.0
@ -71,10 +73,10 @@ require (
go.uber.org/goleak v1.3.0
go.uber.org/multierr v1.11.0
golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb
golang.org/x/net v0.19.0
golang.org/x/net v0.20.0
golang.org/x/oauth2 v0.15.0
golang.org/x/sync v0.5.0
golang.org/x/sys v0.15.0
golang.org/x/sys v0.16.0
golang.org/x/time v0.5.0
golang.org/x/tools v0.16.0
google.golang.org/api v0.153.0
@ -122,7 +124,7 @@ require (
github.com/go-openapi/spec v0.20.9 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/go-openapi/validate v0.22.1 // indirect
github.com/go-resty/resty/v2 v2.10.0 // indirect
github.com/go-resty/resty/v2 v2.11.0 // indirect
github.com/golang-jwt/jwt/v5 v5.0.0 // indirect
github.com/golang/glog v1.1.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
@ -168,7 +170,7 @@ require (
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.0 // indirect
github.com/xhit/go-str2duration/v2 v2.1.0 // indirect
@ -176,9 +178,9 @@ require (
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/metric v1.21.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.16.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/term v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20231120223509-83a465c0220f // indirect

40
go.sum
View file

@ -166,6 +166,8 @@ github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBF
github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1SCxNI1/Tieq/NFvh6dzLdgi7eu0tM=
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
@ -238,8 +240,8 @@ github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogB
github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/validate v0.22.1 h1:G+c2ub6q47kfX1sOBLwIQwzBVt8qmOAARyo/9Fqs9NU=
github.com/go-openapi/validate v0.22.1/go.mod h1:rjnrwK57VJ7A8xqfpAOEKRH8yQSGUriMu5/zuPSQ1hg=
github.com/go-resty/resty/v2 v2.10.0 h1:Qla4W/+TMmv0fOeeRqzEpXPLfTUnR5HZ1+lGs+CkiCo=
github.com/go-resty/resty/v2 v2.10.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A=
github.com/go-resty/resty/v2 v2.11.0 h1:i7jMfNOJYMp69lq7qozJP+bjgzfAzeOhuGlyDrqxT/8=
github.com/go-resty/resty/v2 v2.11.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
@ -439,8 +441,8 @@ github.com/hashicorp/nomad/api v0.0.0-20230721134942-515895c7690c/go.mod h1:O23q
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
github.com/hetznercloud/hcloud-go/v2 v2.4.0 h1:MqlAE+w125PLvJRCpAJmEwrIxoVdUdOyuFUhE/Ukbok=
github.com/hetznercloud/hcloud-go/v2 v2.4.0/go.mod h1:l7fA5xsncFBzQTyw29/dw5Yr88yEGKKdc6BHf24ONS0=
github.com/hetznercloud/hcloud-go/v2 v2.6.0 h1:RJOA2hHZ7rD1pScA4O1NF6qhkHyUdbbxjHgFNot8928=
github.com/hetznercloud/hcloud-go/v2 v2.6.0/go.mod h1:4J1cSE57+g0WS93IiHLV7ubTHItcp+awzeBp5bM9mfA=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
@ -501,8 +503,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
github.com/linode/linodego v1.25.0 h1:zYMz0lTasD503jBu3tSRhzEmXHQN1zptCw5o71ibyyU=
github.com/linode/linodego v1.25.0/go.mod h1:BMZI0pMM/YGjBis7pIXDPbcgYfCZLH0/UvzqtsGtG1c=
github.com/linode/linodego v1.27.1 h1:KoQm5g2fppw8qIClJqUEL0yKH0+f+7te3Mewagb5QKE=
github.com/linode/linodego v1.27.1/go.mod h1:5oAsx+uinHtVo6U77nXXXtox7MWzUW6aEkTOKXxA9uo=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@ -577,6 +579,8 @@ github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi
github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 h1:dOYG7LS/WK00RWZc8XGgcUTlTxpp3mKhdR2Q9z9HbXM=
github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs=
github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=
github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA=
@ -636,8 +640,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -667,8 +671,8 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
@ -817,8 +821,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY=
golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -900,8 +904,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -993,15 +997,15 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -469,8 +469,8 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
// is a counter reset or not.
// We do the same if the CounterResetHint is GaugeType, which should not happen, but PromQL still
// allows the user to apply functions to gauge histograms that are only meant for counter histograms.
// In this case, we treat the gauge histograms as a counter histograms
// (and we plan to return a warning about it to the user).
// In this case, we treat the gauge histograms as counter histograms. A warning should be returned
// to the user in this case.
if h.Count < previous.Count {
return true
}

View file

@ -15,10 +15,10 @@ package histogram
import (
"math"
"slices"
"testing"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)
func TestGetBound(t *testing.T) {

View file

@ -137,7 +137,6 @@ func (p *OpenMetricsParser) Type() ([]byte, model.MetricType) {
// Must only be called after Next returned a unit entry.
// The returned byte slices become invalid after the next call to Next.
func (p *OpenMetricsParser) Unit() ([]byte, []byte) {
// The Prometheus format does not have units.
return p.l.b[p.offsets[0]:p.offsets[1]], p.text
}

View file

@ -269,10 +269,11 @@ func (p *ProtobufParser) Type() ([]byte, model.MetricType) {
return n, model.MetricTypeUnknown
}
// Unit always returns (nil, nil) because units aren't supported by the protobuf
// format.
// Unit returns the metric unit in the current entry.
// Must only be called after Next returned a unit entry.
// The returned byte slices become invalid after the next call to Next.
func (p *ProtobufParser) Unit() ([]byte, []byte) {
return nil, nil
return p.metricBytes.Bytes(), []byte(p.mf.GetUnit())
}
// Comment always returns nil because comments aren't supported by the protobuf
@ -422,6 +423,16 @@ func (p *ProtobufParser) Next() (Entry, error) {
default:
return EntryInvalid, fmt.Errorf("unknown metric type for metric %q: %s", name, p.mf.GetType())
}
unit := p.mf.GetUnit()
if len(unit) > 0 {
if p.mf.GetType() == dto.MetricType_COUNTER && strings.HasSuffix(name, "_total") {
if !strings.HasSuffix(name[:len(name)-6], unit) || len(name)-6 < len(unit)+1 || name[len(name)-6-len(unit)-1] != '_' {
return EntryInvalid, fmt.Errorf("unit %q not a suffix of counter %q", unit, name)
}
} else if !strings.HasSuffix(name, unit) || len(name) < len(unit)+1 || name[len(name)-len(unit)-1] != '_' {
return EntryInvalid, fmt.Errorf("unit %q not a suffix of metric %q", unit, name)
}
}
p.metricBytes.Reset()
p.metricBytes.WriteString(name)

View file

@ -58,6 +58,7 @@ metric: <
`name: "go_memstats_alloc_bytes_total"
help: "Total number of bytes allocated, even if freed."
type: COUNTER
unit: "bytes"
metric: <
counter: <
value: 1.546544e+06
@ -665,6 +666,7 @@ func TestProtobufParse(t *testing.T) {
{
m: "go_memstats_alloc_bytes_total",
help: "Total number of bytes allocated, even if freed.",
unit: "bytes",
},
{
m: "go_memstats_alloc_bytes_total",

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build plugins
// +build plugins
package main

View file

@ -24,6 +24,7 @@ import (
"runtime"
"sort"
"strconv"
"strings"
"sync"
"time"
@ -1543,6 +1544,18 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
if e.Func.Name == "rate" || e.Func.Name == "increase" {
samples := inMatrix[0]
metricName := samples.Metric.Get(labels.MetricName)
if metricName != "" && len(samples.Floats) > 0 &&
!strings.HasSuffix(metricName, "_total") &&
!strings.HasSuffix(metricName, "_sum") &&
!strings.HasSuffix(metricName, "_count") &&
!strings.HasSuffix(metricName, "_bucket") {
warnings.Add(annotations.NewPossibleNonCounterInfo(metricName, e.Args[0].PositionRange()))
}
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
@ -2494,22 +2507,12 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram
switch op {
case parser.ADD:
if hlhs != nil && hrhs != nil {
// The histogram being added must have the larger schema
// code (i.e. the higher resolution).
if hrhs.Schema >= hlhs.Schema {
return 0, hlhs.Copy().Add(hrhs).Compact(0), true
}
return 0, hrhs.Copy().Add(hlhs).Compact(0), true
return 0, hlhs.Copy().Add(hrhs).Compact(0), true
}
return lhs + rhs, nil, true
case parser.SUB:
if hlhs != nil && hrhs != nil {
// The histogram being subtracted must have the larger schema
// code (i.e. the higher resolution).
if hrhs.Schema >= hlhs.Schema {
return 0, hlhs.Copy().Sub(hrhs).Compact(0), true
}
return 0, hrhs.Copy().Mul(-1).Add(hlhs).Compact(0), true
return 0, hlhs.Copy().Sub(hrhs).Compact(0), true
}
return lhs - rhs, nil, true
case parser.MUL:
@ -2694,13 +2697,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, grouping []string, par
if s.H != nil {
group.hasHistogram = true
if group.histogramValue != nil {
// The histogram being added must have
// an equal or larger schema.
if s.H.Schema >= group.histogramValue.Schema {
group.histogramValue.Add(s.H)
} else {
group.histogramValue = s.H.Copy().Add(group.histogramValue)
}
group.histogramValue.Add(s.H)
}
// Otherwise the aggregation contained floats
// previously and will be invalid anyway. No
@ -2717,15 +2714,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, grouping []string, par
if group.histogramMean != nil {
left := s.H.Copy().Div(float64(group.groupCount))
right := group.histogramMean.Copy().Div(float64(group.groupCount))
// The histogram being added/subtracted must have
// an equal or larger schema.
if s.H.Schema >= group.histogramMean.Schema {
toAdd := right.Mul(-1).Add(left)
group.histogramMean.Add(toAdd)
} else {
toAdd := left.Sub(right)
group.histogramMean = toAdd.Add(group.histogramMean)
}
toAdd := left.Sub(right)
group.histogramMean.Add(toAdd)
}
// Otherwise the aggregation contained floats
// previously and will be invalid anyway. No

View file

@ -21,6 +21,7 @@ import (
"strings"
"time"
"github.com/facette/natsort"
"github.com/grafana/regexp"
"github.com/prometheus/common/model"
"golang.org/x/exp/slices"
@ -186,6 +187,8 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra
minSchema = last.Schema
}
var annos annotations.Annotations
// First iteration to find out two things:
// - What's the smallest relevant schema?
// - Are all data points histograms?
@ -196,10 +199,12 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra
if curr == nil {
return nil, annotations.New().Add(annotations.NewMixedFloatsHistogramsWarning(metricName, pos))
}
// TODO(trevorwhitney): Check if isCounter is consistent with curr.CounterResetHint.
if !isCounter {
continue
}
if curr.CounterResetHint == histogram.GaugeType {
annos.Add(annotations.NewNativeHistogramNotCounterWarning(metricName, pos))
}
if curr.Schema < minSchema {
minSchema = curr.Schema
}
@ -217,6 +222,8 @@ func histogramRate(points []HPoint, isCounter bool, metricName string, pos posra
}
prev = curr
}
} else if points[0].H.CounterResetHint != histogram.GaugeType || points[len(points)-1].H.CounterResetHint != histogram.GaugeType {
annos.Add(annotations.NewNativeHistogramNotGaugeWarning(metricName, pos))
}
h.CounterResetHint = histogram.GaugeType
@ -380,15 +387,16 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode
for _, label := range labels {
lv1 := a.Metric.Get(label)
lv2 := b.Metric.Get(label)
// 0 if a == b, -1 if a < b, and +1 if a > b.
switch strings.Compare(lv1, lv2) {
case -1:
return -1
case +1:
return +1
default:
if lv1 == lv2 {
continue
}
if natsort.Compare(lv1, lv2) {
return -1
}
return +1
}
return 0
@ -409,19 +417,16 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval
for _, label := range labels {
lv1 := a.Metric.Get(label)
lv2 := b.Metric.Get(label)
// If label values are the same, continue to the next label
if lv1 == lv2 {
continue
}
// 0 if a == b, -1 if a < b, and +1 if a > b.
switch strings.Compare(lv1, lv2) {
case -1:
if natsort.Compare(lv1, lv2) {
return +1
case +1:
return -1
default:
continue
}
return -1
}
return 0
@ -532,15 +537,8 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
count++
left := h.H.Copy().Div(float64(count))
right := mean.Copy().Div(float64(count))
// The histogram being added/subtracted must have
// an equal or larger schema.
if h.H.Schema >= mean.Schema {
toAdd := right.Mul(-1).Add(left)
mean.Add(toAdd)
} else {
toAdd := left.Sub(right)
mean = toAdd.Add(mean)
}
toAdd := left.Sub(right)
mean.Add(toAdd)
}
return mean
}), nil
@ -680,13 +678,7 @@ func funcSumOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
return aggrHistOverTime(vals, enh, func(s Series) *histogram.FloatHistogram {
sum := s.Histograms[0].H.Copy()
for _, h := range s.Histograms[1:] {
// The histogram being added must have
// an equal or larger schema.
if h.H.Schema >= sum.Schema {
sum.Add(h.H)
} else {
sum = h.H.Copy().Add(sum)
}
sum.Add(h.H)
}
return sum
}), nil

View file

@ -13,7 +13,6 @@
// Only build when go-fuzz is in use
//go:build gofuzz
// +build gofuzz
package promql

View file

@ -13,7 +13,6 @@
// Only build when go-fuzz is in use
//go:build gofuzz
// +build gofuzz
package promql

View file

@ -208,6 +208,20 @@ func ParseMetricSelector(input string) (m []*labels.Matcher, err error) {
return m, err
}
// ParseMetricSelectors parses a list of provided textual metric selectors into lists of
// label matchers.
func ParseMetricSelectors(matchers []string) (m [][]*labels.Matcher, err error) {
var matcherSets [][]*labels.Matcher
for _, s := range matchers {
matchers, err := ParseMetricSelector(s)
if err != nil {
return nil, err
}
matcherSets = append(matcherSets, matchers)
}
return matcherSets, nil
}
// SequenceValue is an omittable value in a sequence of time series values.
type SequenceValue struct {
Value float64

View file

@ -482,6 +482,19 @@ load 5m
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
http_requests{job="api-server", instance="2", group="production"} 0+10x10
cpu_time_total{job="cpu", cpu="0"} 0+10x10
cpu_time_total{job="cpu", cpu="1"} 0+10x10
cpu_time_total{job="cpu", cpu="2"} 0+10x10
cpu_time_total{job="cpu", cpu="3"} 0+10x10
cpu_time_total{job="cpu", cpu="10"} 0+10x10
cpu_time_total{job="cpu", cpu="11"} 0+10x10
cpu_time_total{job="cpu", cpu="12"} 0+10x10
cpu_time_total{job="cpu", cpu="20"} 0+10x10
cpu_time_total{job="cpu", cpu="21"} 0+10x10
cpu_time_total{job="cpu", cpu="100"} 0+10x10
node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 0+10x10
node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 0+10x10
node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 0+10x10
eval_ordered instant at 50m sort_by_label(http_requests, "instance")
http_requests{group="production", instance="0", job="api-server"} 100
@ -579,6 +592,28 @@ eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group
http_requests{group="canary", instance="0", job="app-server"} 700
http_requests{group="canary", instance="0", job="api-server"} 300
eval_ordered instant at 50m sort_by_label(cpu_time_total, "cpu")
cpu_time_total{job="cpu", cpu="0"} 100
cpu_time_total{job="cpu", cpu="1"} 100
cpu_time_total{job="cpu", cpu="2"} 100
cpu_time_total{job="cpu", cpu="3"} 100
cpu_time_total{job="cpu", cpu="10"} 100
cpu_time_total{job="cpu", cpu="11"} 100
cpu_time_total{job="cpu", cpu="12"} 100
cpu_time_total{job="cpu", cpu="20"} 100
cpu_time_total{job="cpu", cpu="21"} 100
cpu_time_total{job="cpu", cpu="100"} 100
eval_ordered instant at 50m sort_by_label(node_uname_info, "instance")
node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100
node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 100
node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100
eval_ordered instant at 50m sort_by_label(node_uname_info, "release")
node_uname_info{job="node_exporter", instance="4m600", release="1.2.3"} 100
node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100
node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100
# Tests for holt_winters
clear

View file

@ -99,6 +99,7 @@ type scrapeLoopOptions struct {
scraper scraper
sampleLimit int
bucketLimit int
maxSchema int32
labelLimits *labelLimits
honorLabels bool
honorTimestamps bool
@ -165,6 +166,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
opts.enableCompression,
opts.sampleLimit,
opts.bucketLimit,
opts.maxSchema,
opts.labelLimits,
opts.interval,
opts.timeout,
@ -270,6 +272,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
bodySizeLimit = int64(sp.config.BodySizeLimit)
sampleLimit = int(sp.config.SampleLimit)
bucketLimit = int(sp.config.NativeHistogramBucketLimit)
maxSchema = pickSchema(sp.config.NativeHistogramMinBucketFactor)
labelLimits = &labelLimits{
labelLimit: int(sp.config.LabelLimit),
labelNameLengthLimit: int(sp.config.LabelNameLengthLimit),
@ -310,6 +313,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
scraper: s,
sampleLimit: sampleLimit,
bucketLimit: bucketLimit,
maxSchema: maxSchema,
labelLimits: labelLimits,
honorLabels: honorLabels,
honorTimestamps: honorTimestamps,
@ -613,7 +617,7 @@ func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels
}
// appender returns an appender for ingested samples from the target.
func appender(app storage.Appender, sampleLimit, bucketLimit int) storage.Appender {
func appender(app storage.Appender, sampleLimit, bucketLimit int, maxSchema int32) storage.Appender {
app = &timeLimitAppender{
Appender: app,
maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)),
@ -633,6 +637,14 @@ func appender(app storage.Appender, sampleLimit, bucketLimit int) storage.Append
limit: bucketLimit,
}
}
if maxSchema < nativeHistogramMaxSchema {
app = &maxSchemaAppender{
Appender: app,
maxSchema: maxSchema,
}
}
return app
}
@ -786,6 +798,7 @@ type scrapeLoop struct {
forcedErrMtx sync.Mutex
sampleLimit int
bucketLimit int
maxSchema int32
labelLimits *labelLimits
interval time.Duration
timeout time.Duration
@ -1078,6 +1091,7 @@ func newScrapeLoop(ctx context.Context,
enableCompression bool,
sampleLimit int,
bucketLimit int,
maxSchema int32,
labelLimits *labelLimits,
interval time.Duration,
timeout time.Duration,
@ -1128,6 +1142,7 @@ func newScrapeLoop(ctx context.Context,
enableCompression: enableCompression,
sampleLimit: sampleLimit,
bucketLimit: bucketLimit,
maxSchema: maxSchema,
labelLimits: labelLimits,
interval: interval,
timeout: timeout,
@ -1458,7 +1473,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string,
}
// Take an appender with limits.
app = appender(app, sl.sampleLimit, sl.bucketLimit)
app = appender(app, sl.sampleLimit, sl.bucketLimit, sl.maxSchema)
defer func() {
if err != nil {
@ -1906,3 +1921,18 @@ func TargetFromContext(ctx context.Context) (*Target, bool) {
t, ok := ctx.Value(ctxKeyTarget).(*Target)
return t, ok
}
func pickSchema(bucketFactor float64) int32 {
if bucketFactor <= 1 {
bucketFactor = 1.00271
}
floor := math.Floor(-math.Log2(math.Log2(bucketFactor)))
switch {
case floor >= float64(nativeHistogramMaxSchema):
return nativeHistogramMaxSchema
case floor <= float64(nativeHistogramMinSchema):
return nativeHistogramMinSchema
default:
return int32(floor)
}
}

View file

@ -513,7 +513,7 @@ func TestScrapePoolAppender(t *testing.T) {
appl, ok := loop.(*scrapeLoop)
require.True(t, ok, "Expected scrapeLoop but got %T", loop)
wrapped := appender(appl.appender(context.Background()), 0, 0)
wrapped := appender(appl.appender(context.Background()), 0, 0, nativeHistogramMaxSchema)
tl, ok := wrapped.(*timeLimitAppender)
require.True(t, ok, "Expected timeLimitAppender but got %T", wrapped)
@ -529,7 +529,7 @@ func TestScrapePoolAppender(t *testing.T) {
appl, ok = loop.(*scrapeLoop)
require.True(t, ok, "Expected scrapeLoop but got %T", loop)
wrapped = appender(appl.appender(context.Background()), sampleLimit, 0)
wrapped = appender(appl.appender(context.Background()), sampleLimit, 0, nativeHistogramMaxSchema)
sl, ok := wrapped.(*limitAppender)
require.True(t, ok, "Expected limitAppender but got %T", wrapped)
@ -540,7 +540,7 @@ func TestScrapePoolAppender(t *testing.T) {
_, ok = tl.Appender.(nopAppender)
require.True(t, ok, "Expected base appender but got %T", tl.Appender)
wrapped = appender(appl.appender(context.Background()), sampleLimit, 100)
wrapped = appender(appl.appender(context.Background()), sampleLimit, 100, nativeHistogramMaxSchema)
bl, ok := wrapped.(*bucketLimitAppender)
require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped)
@ -553,6 +553,23 @@ func TestScrapePoolAppender(t *testing.T) {
_, ok = tl.Appender.(nopAppender)
require.True(t, ok, "Expected base appender but got %T", tl.Appender)
wrapped = appender(appl.appender(context.Background()), sampleLimit, 100, 0)
ml, ok := wrapped.(*maxSchemaAppender)
require.True(t, ok, "Expected maxSchemaAppender but got %T", wrapped)
bl, ok = ml.Appender.(*bucketLimitAppender)
require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped)
sl, ok = bl.Appender.(*limitAppender)
require.True(t, ok, "Expected limitAppender but got %T", bl)
tl, ok = sl.Appender.(*timeLimitAppender)
require.True(t, ok, "Expected timeLimitAppender but got %T", sl.Appender)
_, ok = tl.Appender.(nopAppender)
require.True(t, ok, "Expected base appender but got %T", tl.Appender)
}
func TestScrapePoolRaces(t *testing.T) {
@ -653,7 +670,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app
true,
false,
true,
0, 0,
0, 0, nativeHistogramMaxSchema,
nil,
interval,
time.Hour,
@ -796,7 +813,7 @@ func TestScrapeLoopRun(t *testing.T) {
true,
false,
true,
0, 0,
0, 0, nativeHistogramMaxSchema,
nil,
time.Second,
time.Hour,
@ -942,7 +959,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
true,
false,
true,
0, 0,
0, 0, nativeHistogramMaxSchema,
nil,
0,
0,
@ -1194,7 +1211,9 @@ func TestScrapeLoopCache(t *testing.T) {
)
ctx, cancel := context.WithCancel(context.Background())
sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond)
// Decreasing the scrape interval could make the test fail, as multiple scrapes might be initiated at identical millisecond timestamps.
// See https://github.com/prometheus/prometheus/issues/12727.
sl := newBasicScrapeLoop(t, ctx, scraper, app, 100*time.Millisecond)
numScrapes := 0
@ -3465,3 +3484,73 @@ func TestScrapeLoopCompression(t *testing.T) {
})
}
}
func TestPickSchema(t *testing.T) {
tcs := []struct {
factor float64
schema int32
}{
{
factor: 65536,
schema: -4,
},
{
factor: 256,
schema: -3,
},
{
factor: 16,
schema: -2,
},
{
factor: 4,
schema: -1,
},
{
factor: 2,
schema: 0,
},
{
factor: 1.4,
schema: 1,
},
{
factor: 1.1,
schema: 2,
},
{
factor: 1.09,
schema: 3,
},
{
factor: 1.04,
schema: 4,
},
{
factor: 1.02,
schema: 5,
},
{
factor: 1.01,
schema: 6,
},
{
factor: 1.005,
schema: 7,
},
{
factor: 1.002,
schema: 8,
},
// The default value of native_histogram_min_bucket_factor
{
factor: 0,
schema: 8,
},
}
for _, tc := range tcs {
schema := pickSchema(tc.factor)
require.Equal(t, tc.schema, schema)
}
}

View file

@ -387,6 +387,35 @@ func (app *bucketLimitAppender) AppendHistogram(ref storage.SeriesRef, lset labe
return ref, nil
}
const (
nativeHistogramMaxSchema int32 = 8
nativeHistogramMinSchema int32 = -4
)
type maxSchemaAppender struct {
storage.Appender
maxSchema int32
}
func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
if h != nil {
if h.Schema > app.maxSchema {
h = h.ReduceResolution(app.maxSchema)
}
}
if fh != nil {
if fh.Schema > app.maxSchema {
fh = fh.ReduceResolution(app.maxSchema)
}
}
ref, err := app.Appender.AppendHistogram(ref, lset, t, h, fh)
if err != nil {
return 0, err
}
return ref, nil
}
// PopulateLabels builds a label set from the given label set and scrape configuration.
// It returns a label set before relabeling was applied as the second return value.
// Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling.

View file

@ -590,3 +590,64 @@ func TestBucketLimitAppender(t *testing.T) {
}
}
}
func TestMaxSchemaAppender(t *testing.T) {
example := histogram.Histogram{
Schema: 0,
Count: 21,
Sum: 33,
ZeroThreshold: 0.001,
ZeroCount: 3,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 3},
},
PositiveBuckets: []int64{3, 0, 0},
NegativeSpans: []histogram.Span{
{Offset: 0, Length: 3},
},
NegativeBuckets: []int64{3, 0, 0},
}
cases := []struct {
h histogram.Histogram
maxSchema int32
expectSchema int32
}{
{
h: example,
maxSchema: -1,
expectSchema: -1,
},
{
h: example,
maxSchema: 0,
expectSchema: 0,
},
}
resApp := &collectResultAppender{}
for _, c := range cases {
for _, floatHisto := range []bool{true, false} {
t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) {
app := &maxSchemaAppender{Appender: resApp, maxSchema: c.maxSchema}
ts := int64(10 * time.Minute / time.Millisecond)
lbls := labels.FromStrings("__name__", "sparse_histogram_series")
var err error
if floatHisto {
fh := c.h.Copy().ToFloat(nil)
_, err = app.AppendHistogram(0, lbls, ts, nil, fh)
require.Equal(t, c.expectSchema, fh.Schema)
require.NoError(t, err)
} else {
h := c.h.Copy()
_, err = app.AppendHistogram(0, lbls, ts, h, nil)
require.Equal(t, c.expectSchema, h.Schema)
require.NoError(t, err)
}
require.NoError(t, app.Commit())
})
}
}
}

View file

@ -12,5 +12,5 @@ GZIP_OPTS="-fk"
if ! gzip -k -h &>/dev/null; then GZIP_OPTS="-f"; fi
find static -type f -name '*.gz' -delete
find static -type f -exec gzip $GZIP_OPTS '{}' \; -print0 | xargs -0 -I % echo %.gz | xargs echo //go:embed >> embed.go
find static -type f -exec gzip $GZIP_OPTS '{}' \; -print0 | xargs -0 -I % echo %.gz | sort | xargs echo //go:embed >> embed.go
echo var EmbedFS embed.FS >> embed.go

View file

@ -26,7 +26,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: install Go
uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version: 1.21.x
- name: Install snmp_exporter/generator dependencies

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build tools
// +build tools
// Package tools tracks dependencies for tools that are required to generate the protobuf code.
// See https://github.com/golang/go/issues/25922

View file

@ -517,7 +517,7 @@ func TestMetricTypeToMetricTypeProto(t *testing.T) {
}
func TestDecodeWriteRequest(t *testing.T) {
buf, _, err := buildWriteRequest(writeRequestFixture.Timeseries, nil, nil, nil)
buf, _, _, err := buildWriteRequest(nil, writeRequestFixture.Timeseries, nil, nil, nil, nil)
require.NoError(t, err)
actual, err := DecodeWriteRequest(bytes.NewReader(buf))

View file

@ -1,3 +1,5 @@
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
@ -25,6 +27,7 @@ var dropSanitizationGate = featuregate.GlobalRegistry().MustRegister(
//
// Exception is made for double-underscores which are allowed
func NormalizeLabel(label string) string {
// Trivial case
if len(label) == 0 {
return label

View file

@ -1,3 +1,5 @@
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,3 +1,5 @@
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,4 +1,4 @@
// DO NOT EDIT. COPIED AS-IS. SEE README.md
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,4 +1,4 @@
// DO NOT EDIT. COPIED AS-IS. SEE README.md
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,4 +1,4 @@
// DO NOT EDIT. COPIED AS-IS. SEE README.md
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,4 +1,4 @@
// DO NOT EDIT. COPIED AS-IS. SEE README.md
// DO NOT EDIT. COPIED AS-IS. SEE ../README.md
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

View file

@ -1,4 +1,5 @@
#!/bin/bash
set -xe
OTEL_VERSION=v0.88.0
@ -6,11 +7,16 @@ git clone https://github.com/open-telemetry/opentelemetry-collector-contrib ./tm
cd ./tmp
git checkout $OTEL_VERSION
cd ..
rm -rf ./prometheusremotewrite/*
cp -r ./tmp/pkg/translator/prometheusremotewrite/*.go ./prometheusremotewrite
rm -rf ./prometheusremotewrite/*_test.go
rm -rf ./prometheus/*
cp -r ./tmp/pkg/translator/prometheus/*.go ./prometheus
rm -rf ./prometheus/*_test.go
rm -rf ./tmp
sed -i '' 's#github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus#github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus#g' ./prometheusremotewrite/*.go
sed -i '' '1s#^#// DO NOT EDIT. COPIED AS-IS. SEE README.md\n\n#g' ./prometheusremotewrite/*.go
sed -i '' '1s#^#// DO NOT EDIT. COPIED AS-IS. SEE ../README.md\n\n#g' ./prometheusremotewrite/*.go ./prometheus/*.go

View file

@ -36,6 +36,7 @@ import (
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/tsdb/chunks"
@ -51,6 +52,10 @@ const (
// Allow 30% too many shards before scaling down.
shardToleranceFraction = 0.3
reasonTooOld = "too_old"
reasonDroppedSeries = "dropped_series"
reasonUnintentionalDroppedSeries = "unintentionally_dropped_series"
)
type queueManagerMetrics struct {
@ -68,9 +73,9 @@ type queueManagerMetrics struct {
retriedExemplarsTotal prometheus.Counter
retriedHistogramsTotal prometheus.Counter
retriedMetadataTotal prometheus.Counter
droppedSamplesTotal prometheus.Counter
droppedExemplarsTotal prometheus.Counter
droppedHistogramsTotal prometheus.Counter
droppedSamplesTotal *prometheus.CounterVec
droppedExemplarsTotal *prometheus.CounterVec
droppedHistogramsTotal *prometheus.CounterVec
enqueueRetriesTotal prometheus.Counter
sentBatchDuration prometheus.Histogram
highestSentTimestamp *maxTimestamp
@ -180,27 +185,27 @@ func newQueueManagerMetrics(r prometheus.Registerer, rn, e string) *queueManager
Help: "Total number of metadata entries which failed on send to remote storage but were retried because the send error was recoverable.",
ConstLabels: constLabels,
})
m.droppedSamplesTotal = prometheus.NewCounter(prometheus.CounterOpts{
m.droppedSamplesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "samples_dropped_total",
Help: "Total number of samples which were dropped after being read from the WAL before being sent via remote write, either via relabelling or unintentionally because of an unknown reference ID.",
Help: "Total number of samples which were dropped after being read from the WAL before being sent via remote write, either via relabelling, due to being too old or unintentionally because of an unknown reference ID.",
ConstLabels: constLabels,
})
m.droppedExemplarsTotal = prometheus.NewCounter(prometheus.CounterOpts{
}, []string{"reason"})
m.droppedExemplarsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "exemplars_dropped_total",
Help: "Total number of exemplars which were dropped after being read from the WAL before being sent via remote write, either via relabelling or unintentionally because of an unknown reference ID.",
Help: "Total number of exemplars which were dropped after being read from the WAL before being sent via remote write, either via relabelling, due to being too old or unintentionally because of an unknown reference ID.",
ConstLabels: constLabels,
})
m.droppedHistogramsTotal = prometheus.NewCounter(prometheus.CounterOpts{
}, []string{"reason"})
m.droppedHistogramsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "histograms_dropped_total",
Help: "Total number of histograms which were dropped after being read from the WAL before being sent via remote write, either via relabelling or unintentionally because of an unknown reference ID.",
Help: "Total number of histograms which were dropped after being read from the WAL before being sent via remote write, either via relabelling, due to being too old or unintentionally because of an unknown reference ID.",
ConstLabels: constLabels,
})
}, []string{"reason"})
m.enqueueRetriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
@ -391,7 +396,8 @@ type WriteClient interface {
// indicated by the provided WriteClient. Implements writeTo interface
// used by WAL Watcher.
type QueueManager struct {
lastSendTimestamp atomic.Int64
lastSendTimestamp atomic.Int64
buildRequestLimitTimestamp atomic.Int64
logger log.Logger
flushDeadline time.Duration
@ -529,7 +535,7 @@ func (t *QueueManager) AppendMetadata(ctx context.Context, metadata []scrape.Met
func (t *QueueManager) sendMetadataWithBackoff(ctx context.Context, metadata []prompb.MetricMetadata, pBuf *proto.Buffer) error {
// Build the WriteRequest with no samples.
req, _, err := buildWriteRequest(nil, metadata, pBuf, nil)
req, _, _, err := buildWriteRequest(t.logger, nil, metadata, pBuf, nil, nil)
if err != nil {
return err
}
@ -575,18 +581,65 @@ func (t *QueueManager) sendMetadataWithBackoff(ctx context.Context, metadata []p
return nil
}
func isSampleOld(baseTime time.Time, sampleAgeLimit time.Duration, ts int64) bool {
if sampleAgeLimit == 0 {
// If sampleAgeLimit is unset, then we never skip samples due to their age.
return false
}
limitTs := baseTime.Add(-sampleAgeLimit)
sampleTs := timestamp.Time(ts)
return sampleTs.Before(limitTs)
}
func isTimeSeriesOldFilter(metrics *queueManagerMetrics, baseTime time.Time, sampleAgeLimit time.Duration) func(ts prompb.TimeSeries) bool {
return func(ts prompb.TimeSeries) bool {
if sampleAgeLimit == 0 {
// If sampleAgeLimit is unset, then we never skip samples due to their age.
return false
}
switch {
// Only the first element should be set in the series, therefore we only check the first element.
case len(ts.Samples) > 0:
if isSampleOld(baseTime, sampleAgeLimit, ts.Samples[0].Timestamp) {
metrics.droppedSamplesTotal.WithLabelValues(reasonTooOld).Inc()
return true
}
case len(ts.Histograms) > 0:
if isSampleOld(baseTime, sampleAgeLimit, ts.Histograms[0].Timestamp) {
metrics.droppedHistogramsTotal.WithLabelValues(reasonTooOld).Inc()
return true
}
case len(ts.Exemplars) > 0:
if isSampleOld(baseTime, sampleAgeLimit, ts.Exemplars[0].Timestamp) {
metrics.droppedExemplarsTotal.WithLabelValues(reasonTooOld).Inc()
return true
}
default:
return false
}
return false
}
}
// Append queues a sample to be sent to the remote storage. Blocks until all samples are
// enqueued on their shards or a shutdown signal is received.
func (t *QueueManager) Append(samples []record.RefSample) bool {
currentTime := time.Now()
outer:
for _, s := range samples {
if isSampleOld(currentTime, time.Duration(t.cfg.SampleAgeLimit), s.T) {
t.metrics.droppedSamplesTotal.WithLabelValues(reasonTooOld).Inc()
continue
}
t.seriesMtx.Lock()
lbls, ok := t.seriesLabels[s.Ref]
if !ok {
t.metrics.droppedSamplesTotal.Inc()
t.dataDropped.incr(1)
if _, ok := t.droppedSeries[s.Ref]; !ok {
level.Info(t.logger).Log("msg", "Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref)
t.metrics.droppedSamplesTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc()
} else {
t.metrics.droppedSamplesTotal.WithLabelValues(reasonDroppedSeries).Inc()
}
t.seriesMtx.Unlock()
continue
@ -629,17 +682,23 @@ func (t *QueueManager) AppendExemplars(exemplars []record.RefExemplar) bool {
if !t.sendExemplars {
return true
}
currentTime := time.Now()
outer:
for _, e := range exemplars {
if isSampleOld(currentTime, time.Duration(t.cfg.SampleAgeLimit), e.T) {
t.metrics.droppedExemplarsTotal.WithLabelValues(reasonTooOld).Inc()
continue
}
t.seriesMtx.Lock()
lbls, ok := t.seriesLabels[e.Ref]
if !ok {
t.metrics.droppedExemplarsTotal.Inc()
// Track dropped exemplars in the same EWMA for sharding calc.
t.dataDropped.incr(1)
if _, ok := t.droppedSeries[e.Ref]; !ok {
level.Info(t.logger).Log("msg", "Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref)
t.metrics.droppedExemplarsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc()
} else {
t.metrics.droppedExemplarsTotal.WithLabelValues(reasonDroppedSeries).Inc()
}
t.seriesMtx.Unlock()
continue
@ -678,16 +737,22 @@ func (t *QueueManager) AppendHistograms(histograms []record.RefHistogramSample)
if !t.sendNativeHistograms {
return true
}
currentTime := time.Now()
outer:
for _, h := range histograms {
if isSampleOld(currentTime, time.Duration(t.cfg.SampleAgeLimit), h.T) {
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonTooOld).Inc()
continue
}
t.seriesMtx.Lock()
lbls, ok := t.seriesLabels[h.Ref]
if !ok {
t.metrics.droppedHistogramsTotal.Inc()
t.dataDropped.incr(1)
if _, ok := t.droppedSeries[h.Ref]; !ok {
level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref)
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc()
} else {
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc()
}
t.seriesMtx.Unlock()
continue
@ -725,16 +790,22 @@ func (t *QueueManager) AppendFloatHistograms(floatHistograms []record.RefFloatHi
if !t.sendNativeHistograms {
return true
}
currentTime := time.Now()
outer:
for _, h := range floatHistograms {
if isSampleOld(currentTime, time.Duration(t.cfg.SampleAgeLimit), h.T) {
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonTooOld).Inc()
continue
}
t.seriesMtx.Lock()
lbls, ok := t.seriesLabels[h.Ref]
if !ok {
t.metrics.droppedHistogramsTotal.Inc()
t.dataDropped.incr(1)
if _, ok := t.droppedSeries[h.Ref]; !ok {
level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref)
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc()
} else {
t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc()
}
t.seriesMtx.Unlock()
continue
@ -1490,7 +1561,8 @@ func (s *shards) sendSamples(ctx context.Context, samples []prompb.TimeSeries, s
// sendSamples to the remote storage with backoff for recoverable errors.
func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.TimeSeries, sampleCount, exemplarCount, histogramCount int, pBuf *proto.Buffer, buf *[]byte) error {
// Build the WriteRequest with no metadata.
req, highest, err := buildWriteRequest(samples, nil, pBuf, *buf)
req, highest, lowest, err := buildWriteRequest(s.qm.logger, samples, nil, pBuf, *buf, nil)
s.qm.buildRequestLimitTimestamp.Store(lowest)
if err != nil {
// Failing to build the write request is non-recoverable, since it will
// only error if marshaling the proto to bytes fails.
@ -1504,6 +1576,25 @@ func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.Ti
// without causing a memory leak, and it has the nice effect of not propagating any
// parameters for sendSamplesWithBackoff/3.
attemptStore := func(try int) error {
currentTime := time.Now()
lowest := s.qm.buildRequestLimitTimestamp.Load()
if isSampleOld(currentTime, time.Duration(s.qm.cfg.SampleAgeLimit), lowest) {
// This will filter out old samples during retries.
req, _, lowest, err := buildWriteRequest(
s.qm.logger,
samples,
nil,
pBuf,
*buf,
isTimeSeriesOldFilter(s.qm.metrics, currentTime, time.Duration(s.qm.cfg.SampleAgeLimit)),
)
s.qm.buildRequestLimitTimestamp.Store(lowest)
if err != nil {
return err
}
*buf = req
}
ctx, span := otel.Tracer("").Start(ctx, "Remote Send Batch")
defer span.End()
@ -1608,9 +1699,27 @@ func sendWriteRequestWithBackoff(ctx context.Context, cfg config.QueueConfig, l
}
}
func buildWriteRequest(samples []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf []byte) ([]byte, int64, error) {
func buildTimeSeries(timeSeries []prompb.TimeSeries, filter func(prompb.TimeSeries) bool) (int64, int64, []prompb.TimeSeries, int, int, int) {
var highest int64
for _, ts := range samples {
var lowest int64
var droppedSamples, droppedExemplars, droppedHistograms int
keepIdx := 0
lowest = math.MaxInt64
for i, ts := range timeSeries {
if filter != nil && filter(ts) {
if len(ts.Samples) > 0 {
droppedSamples++
}
if len(ts.Exemplars) > 0 {
droppedExemplars++
}
if len(ts.Histograms) > 0 {
droppedHistograms++
}
continue
}
// At the moment we only ever append a TimeSeries with a single sample or exemplar in it.
if len(ts.Samples) > 0 && ts.Samples[0].Timestamp > highest {
highest = ts.Samples[0].Timestamp
@ -1621,10 +1730,37 @@ func buildWriteRequest(samples []prompb.TimeSeries, metadata []prompb.MetricMeta
if len(ts.Histograms) > 0 && ts.Histograms[0].Timestamp > highest {
highest = ts.Histograms[0].Timestamp
}
// Get lowest timestamp
if len(ts.Samples) > 0 && ts.Samples[0].Timestamp < lowest {
lowest = ts.Samples[0].Timestamp
}
if len(ts.Exemplars) > 0 && ts.Exemplars[0].Timestamp < lowest {
lowest = ts.Exemplars[0].Timestamp
}
if len(ts.Histograms) > 0 && ts.Histograms[0].Timestamp < lowest {
lowest = ts.Histograms[0].Timestamp
}
// Move the current element to the write position and increment the write pointer
timeSeries[keepIdx] = timeSeries[i]
keepIdx++
}
timeSeries = timeSeries[:keepIdx]
return highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms
}
func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf []byte, filter func(prompb.TimeSeries) bool) ([]byte, int64, int64, error) {
highest, lowest, timeSeries,
droppedSamples, droppedExemplars, droppedHistograms := buildTimeSeries(timeSeries, filter)
if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 {
level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms)
}
req := &prompb.WriteRequest{
Timeseries: samples,
Timeseries: timeSeries,
Metadata: metadata,
}
@ -1635,7 +1771,7 @@ func buildWriteRequest(samples []prompb.TimeSeries, metadata []prompb.MetricMeta
}
err := pBuf.Marshal(req)
if err != nil {
return nil, highest, err
return nil, highest, lowest, err
}
// snappy uses len() to see if it needs to allocate a new slice. Make the
@ -1644,5 +1780,5 @@ func buildWriteRequest(samples []prompb.TimeSeries, metadata []prompb.MetricMeta
buf = buf[0:cap(buf)]
}
compressed := snappy.Encode(buf, pBuf.Bytes())
return compressed, highest, nil
return compressed, highest, lowest, nil
}

View file

@ -548,7 +548,7 @@ func TestShouldReshard(t *testing.T) {
func createTimeseries(numSamples, numSeries int, extraLabels ...labels.Label) ([]record.RefSample, []record.RefSeries) {
samples := make([]record.RefSample, 0, numSamples)
series := make([]record.RefSeries, 0, numSeries)
b := labels.ScratchBuilder{}
lb := labels.ScratchBuilder{}
for i := 0; i < numSeries; i++ {
name := fmt.Sprintf("test_metric_%d", i)
for j := 0; j < numSamples; j++ {
@ -559,15 +559,15 @@ func createTimeseries(numSamples, numSeries int, extraLabels ...labels.Label) ([
})
}
// Create Labels that is name of series plus any extra labels supplied.
b.Reset()
b.Add(labels.MetricName, name)
lb.Reset()
lb.Add(labels.MetricName, name)
for _, l := range extraLabels {
b.Add(l.Name, l.Value)
lb.Add(l.Name, l.Value)
}
b.Sort()
lb.Sort()
series = append(series, record.RefSeries{
Ref: chunks.HeadSeriesRef(i),
Labels: b.Labels(),
Labels: lb.Labels(),
})
}
return samples, series
@ -1321,3 +1321,263 @@ func TestQueue_FlushAndShutdownDoesNotDeadlock(t *testing.T) {
t.FailNow()
}
}
func TestDropOldTimeSeries(t *testing.T) {
size := 10
nSeries := 6
nSamples := config.DefaultQueueConfig.Capacity * size
samples, newSamples, series := createTimeseriesWithOldSamples(nSamples, nSeries)
c := NewTestWriteClient()
c.expectSamples(newSamples, series)
cfg := config.DefaultQueueConfig
mcfg := config.DefaultMetadataConfig
cfg.MaxShards = 1
cfg.SampleAgeLimit = model.Duration(60 * time.Second)
dir := t.TempDir()
metrics := newQueueManagerMetrics(nil, "", "")
m := NewQueueManager(metrics, nil, nil, nil, dir, newEWMARate(ewmaWeight, shardUpdateDuration), cfg, mcfg, labels.EmptyLabels(), nil, c, defaultFlushDeadline, newPool(), newHighestTimestampMetric(), nil, false, false)
m.StoreSeries(series, 0)
m.Start()
defer m.Stop()
m.Append(samples)
c.waitForExpectedData(t)
}
func TestIsSampleOld(t *testing.T) {
currentTime := time.Now()
require.True(t, isSampleOld(currentTime, 60*time.Second, timestamp.FromTime(currentTime.Add(-61*time.Second))))
require.False(t, isSampleOld(currentTime, 60*time.Second, timestamp.FromTime(currentTime.Add(-59*time.Second))))
}
func createTimeseriesWithOldSamples(numSamples, numSeries int, extraLabels ...labels.Label) ([]record.RefSample, []record.RefSample, []record.RefSeries) {
newSamples := make([]record.RefSample, 0, numSamples)
samples := make([]record.RefSample, 0, numSamples)
series := make([]record.RefSeries, 0, numSeries)
lb := labels.ScratchBuilder{}
for i := 0; i < numSeries; i++ {
name := fmt.Sprintf("test_metric_%d", i)
// We create half of the samples in the past.
past := timestamp.FromTime(time.Now().Add(-5 * time.Minute))
for j := 0; j < numSamples/2; j++ {
samples = append(samples, record.RefSample{
Ref: chunks.HeadSeriesRef(i),
T: past + int64(j),
V: float64(i),
})
}
for j := 0; j < numSamples/2; j++ {
sample := record.RefSample{
Ref: chunks.HeadSeriesRef(i),
T: int64(int(time.Now().UnixMilli()) + j),
V: float64(i),
}
samples = append(samples, sample)
newSamples = append(newSamples, sample)
}
// Create Labels that is name of series plus any extra labels supplied.
lb.Reset()
lb.Add(labels.MetricName, name)
for _, l := range extraLabels {
lb.Add(l.Name, l.Value)
}
lb.Sort()
series = append(series, record.RefSeries{
Ref: chunks.HeadSeriesRef(i),
Labels: lb.Labels(),
})
}
return samples, newSamples, series
}
func filterTsLimit(limit int64, ts prompb.TimeSeries) bool {
return limit > ts.Samples[0].Timestamp
}
func TestBuildTimeSeries(t *testing.T) {
testCases := []struct {
name string
ts []prompb.TimeSeries
filter func(ts prompb.TimeSeries) bool
lowestTs int64
highestTs int64
droppedSamples int
responseLen int
}{
{
name: "No filter applied",
ts: []prompb.TimeSeries{
{
Samples: []prompb.Sample{
{
Timestamp: 1234567890,
Value: 1.23,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567891,
Value: 2.34,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567892,
Value: 3.34,
},
},
},
},
filter: nil,
responseLen: 3,
lowestTs: 1234567890,
highestTs: 1234567892,
},
{
name: "Filter applied, samples in order",
ts: []prompb.TimeSeries{
{
Samples: []prompb.Sample{
{
Timestamp: 1234567890,
Value: 1.23,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567891,
Value: 2.34,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567892,
Value: 3.45,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567893,
Value: 3.45,
},
},
},
},
filter: func(ts prompb.TimeSeries) bool { return filterTsLimit(1234567892, ts) },
responseLen: 2,
lowestTs: 1234567892,
highestTs: 1234567893,
droppedSamples: 2,
},
{
name: "Filter applied, samples out of order",
ts: []prompb.TimeSeries{
{
Samples: []prompb.Sample{
{
Timestamp: 1234567892,
Value: 3.45,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567890,
Value: 1.23,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567893,
Value: 3.45,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567891,
Value: 2.34,
},
},
},
},
filter: func(ts prompb.TimeSeries) bool { return filterTsLimit(1234567892, ts) },
responseLen: 2,
lowestTs: 1234567892,
highestTs: 1234567893,
droppedSamples: 2,
},
{
name: "Filter applied, samples not consecutive",
ts: []prompb.TimeSeries{
{
Samples: []prompb.Sample{
{
Timestamp: 1234567890,
Value: 1.23,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567892,
Value: 3.45,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567895,
Value: 6.78,
},
},
},
{
Samples: []prompb.Sample{
{
Timestamp: 1234567897,
Value: 6.78,
},
},
},
},
filter: func(ts prompb.TimeSeries) bool { return filterTsLimit(1234567895, ts) },
responseLen: 2,
lowestTs: 1234567895,
highestTs: 1234567897,
droppedSamples: 2,
},
}
// Run the test cases
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
highest, lowest, result, droppedSamples, _, _ := buildTimeSeries(tc.ts, tc.filter)
require.NotNil(t, result)
require.Len(t, result, tc.responseLen)
require.Equal(t, tc.highestTs, highest)
require.Equal(t, tc.lowestTs, lowest)
require.Equal(t, tc.droppedSamples, droppedSamples)
})
}
}

View file

@ -68,7 +68,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
err = h.write(r.Context(), req)
switch {
case err == nil:
case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrOutOfBounds), errors.Is(err, storage.ErrDuplicateSampleForTimestamp):
case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrOutOfBounds), errors.Is(err, storage.ErrDuplicateSampleForTimestamp), errors.Is(err, storage.ErrTooOldSample):
// Indicated an out of order sample is a bad request to prevent retries.
http.Error(w, err.Error(), http.StatusBadRequest)
return

View file

@ -38,7 +38,7 @@ import (
)
func TestRemoteWriteHandler(t *testing.T) {
buf, _, err := buildWriteRequest(writeRequestFixture.Timeseries, nil, nil, nil)
buf, _, _, err := buildWriteRequest(nil, writeRequestFixture.Timeseries, nil, nil, nil, nil)
require.NoError(t, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -84,10 +84,10 @@ func TestRemoteWriteHandler(t *testing.T) {
}
func TestOutOfOrderSample(t *testing.T) {
buf, _, err := buildWriteRequest([]prompb.TimeSeries{{
buf, _, _, err := buildWriteRequest(nil, []prompb.TimeSeries{{
Labels: []prompb.Label{{Name: "__name__", Value: "test_metric"}},
Samples: []prompb.Sample{{Value: 1, Timestamp: 0}},
}}, nil, nil, nil)
}}, nil, nil, nil, nil)
require.NoError(t, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -109,10 +109,10 @@ func TestOutOfOrderSample(t *testing.T) {
// don't fail on ingestion errors since the exemplar storage is
// still experimental.
func TestOutOfOrderExemplar(t *testing.T) {
buf, _, err := buildWriteRequest([]prompb.TimeSeries{{
buf, _, _, err := buildWriteRequest(nil, []prompb.TimeSeries{{
Labels: []prompb.Label{{Name: "__name__", Value: "test_metric"}},
Exemplars: []prompb.Exemplar{{Labels: []prompb.Label{{Name: "foo", Value: "bar"}}, Value: 1, Timestamp: 0}},
}}, nil, nil, nil)
}}, nil, nil, nil, nil)
require.NoError(t, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -132,10 +132,10 @@ func TestOutOfOrderExemplar(t *testing.T) {
}
func TestOutOfOrderHistogram(t *testing.T) {
buf, _, err := buildWriteRequest([]prompb.TimeSeries{{
buf, _, _, err := buildWriteRequest(nil, []prompb.TimeSeries{{
Labels: []prompb.Label{{Name: "__name__", Value: "test_metric"}},
Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram), FloatHistogramToHistogramProto(1, testHistogram.ToFloat(nil))},
}}, nil, nil, nil)
}}, nil, nil, nil, nil)
require.NoError(t, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -158,13 +158,13 @@ func BenchmarkRemoteWritehandler(b *testing.B) {
reqs := []*http.Request{}
for i := 0; i < b.N; i++ {
num := strings.Repeat(strconv.Itoa(i), 16)
buf, _, err := buildWriteRequest([]prompb.TimeSeries{{
buf, _, _, err := buildWriteRequest(nil, []prompb.TimeSeries{{
Labels: []prompb.Label{
{Name: "__name__", Value: "test_metric"},
{Name: "test_label_name_" + num, Value: labelValue + num},
},
Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram)},
}}, nil, nil, nil)
}}, nil, nil, nil, nil)
require.NoError(b, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
require.NoError(b, err)
@ -182,7 +182,7 @@ func BenchmarkRemoteWritehandler(b *testing.B) {
}
func TestCommitErr(t *testing.T) {
buf, _, err := buildWriteRequest(writeRequestFixture.Timeseries, nil, nil, nil)
buf, _, _, err := buildWriteRequest(nil, writeRequestFixture.Timeseries, nil, nil, nil, nil)
require.NoError(t, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -219,7 +219,7 @@ func BenchmarkRemoteWriteOOOSamples(b *testing.B) {
handler := NewWriteHandler(log.NewNopLogger(), nil, db.Head())
buf, _, err := buildWriteRequest(genSeriesWithSample(1000, 200*time.Minute.Milliseconds()), nil, nil, nil)
buf, _, _, err := buildWriteRequest(nil, genSeriesWithSample(1000, 200*time.Minute.Milliseconds()), nil, nil, nil, nil)
require.NoError(b, err)
req, err := http.NewRequest("", "", bytes.NewReader(buf))
@ -232,7 +232,7 @@ func BenchmarkRemoteWriteOOOSamples(b *testing.B) {
var bufRequests [][]byte
for i := 0; i < 100; i++ {
buf, _, err = buildWriteRequest(genSeriesWithSample(1000, int64(80+i)*time.Minute.Milliseconds()), nil, nil, nil)
buf, _, _, err = buildWriteRequest(nil, genSeriesWithSample(1000, int64(80+i)*time.Minute.Milliseconds()), nil, nil, nil, nil)
require.NoError(b, err)
bufRequests = append(bufRequests, buf)
}

View file

@ -84,7 +84,7 @@ func TestDB_InvalidSeries(t *testing.T) {
})
}
func createTestAgentDB(t *testing.T, reg prometheus.Registerer, opts *Options) *DB {
func createTestAgentDB(t testing.TB, reg prometheus.Registerer, opts *Options) *DB {
t.Helper()
dbDir := t.TempDir()
@ -878,3 +878,21 @@ func TestDBAllowOOOSamples(t *testing.T) {
require.Equal(t, float64(80), m.Metric[1].Counter.GetValue(), "agent wal mismatch of total appended histograms")
require.NoError(t, db.Close())
}
func BenchmarkCreateSeries(b *testing.B) {
s := createTestAgentDB(b, nil, DefaultOptions())
defer s.Close()
app := s.Appender(context.Background()).(*appender)
lbls := make([]labels.Labels, b.N)
for i, l := range labelsForTest("benchmark", b.N) {
lbls[i] = labels.New(l...)
}
b.ResetTimer()
for _, l := range lbls {
app.getOrCreate(l)
}
}

View file

@ -45,14 +45,23 @@ func (m *memSeries) updateTimestamp(newTs int64) bool {
return false
}
// seriesHashmap is a simple hashmap for memSeries by their label set.
// It is built on top of a regular hashmap and holds a slice of series to
// resolve hash collisions. Its methods require the hash to be submitted
// seriesHashmap lets agent find a memSeries by its label set, via a 64-bit hash.
// There is one map for the common case where the hash value is unique, and a
// second map for the case that two series have the same hash value.
// Each series is in only one of the maps. Its methods require the hash to be submitted
// with the label set to avoid re-computing hash throughout the code.
type seriesHashmap map[uint64][]*memSeries
type seriesHashmap struct {
unique map[uint64]*memSeries
conflicts map[uint64][]*memSeries
}
func (m seriesHashmap) Get(hash uint64, lset labels.Labels) *memSeries {
for _, s := range m[hash] {
func (m *seriesHashmap) Get(hash uint64, lset labels.Labels) *memSeries {
if s, found := m.unique[hash]; found {
if labels.Equal(s.lset, lset) {
return s
}
}
for _, s := range m.conflicts[hash] {
if labels.Equal(s.lset, lset) {
return s
}
@ -60,28 +69,49 @@ func (m seriesHashmap) Get(hash uint64, lset labels.Labels) *memSeries {
return nil
}
func (m seriesHashmap) Set(hash uint64, s *memSeries) {
seriesSet := m[hash]
func (m *seriesHashmap) Set(hash uint64, s *memSeries) {
if existing, found := m.unique[hash]; !found || labels.Equal(existing.lset, s.lset) {
m.unique[hash] = s
return
}
if m.conflicts == nil {
m.conflicts = make(map[uint64][]*memSeries)
}
seriesSet := m.conflicts[hash]
for i, prev := range seriesSet {
if labels.Equal(prev.lset, s.lset) {
seriesSet[i] = s
return
}
}
m[hash] = append(seriesSet, s)
m.conflicts[hash] = append(seriesSet, s)
}
func (m seriesHashmap) Delete(hash uint64, ref chunks.HeadSeriesRef) {
func (m *seriesHashmap) Delete(hash uint64, ref chunks.HeadSeriesRef) {
var rem []*memSeries
for _, s := range m[hash] {
if s.ref != ref {
rem = append(rem, s)
unique, found := m.unique[hash]
switch {
case !found: // Supplied hash is not stored.
return
case unique.ref == ref:
conflicts := m.conflicts[hash]
if len(conflicts) == 0 { // Exactly one series with this hash was stored
delete(m.unique, hash)
return
}
m.unique[hash] = conflicts[0] // First remaining series goes in 'unique'.
rem = conflicts[1:] // Keep the rest.
default: // The series to delete is somewhere in 'conflicts'. Keep all the ones that don't match.
for _, s := range m.conflicts[hash] {
if s.ref != ref {
rem = append(rem, s)
}
}
}
if len(rem) == 0 {
delete(m, hash)
delete(m.conflicts, hash)
} else {
m[hash] = rem
m.conflicts[hash] = rem
}
}
@ -117,7 +147,10 @@ func newStripeSeries(stripeSize int) *stripeSeries {
s.series[i] = map[chunks.HeadSeriesRef]*memSeries{}
}
for i := range s.hashes {
s.hashes[i] = seriesHashmap{}
s.hashes[i] = seriesHashmap{
unique: map[uint64]*memSeries{},
conflicts: nil, // Initialized on demand in set().
}
}
for i := range s.exemplars {
s.exemplars[i] = map[chunks.HeadSeriesRef]*exemplar.Exemplar{}
@ -136,40 +169,49 @@ func (s *stripeSeries) GC(mint int64) map[chunks.HeadSeriesRef]struct{} {
defer s.gcMut.Unlock()
deleted := map[chunks.HeadSeriesRef]struct{}{}
// For one series, truncate old chunks and check if any chunks left. If not, mark as deleted and collect the ID.
check := func(hashLock int, hash uint64, series *memSeries) {
series.Lock()
// Any series that has received a write since mint is still alive.
if series.lastTs >= mint {
series.Unlock()
return
}
// The series is stale. We need to obtain a second lock for the
// ref if it's different than the hash lock.
refLock := int(series.ref) & (s.size - 1)
if hashLock != refLock {
s.locks[refLock].Lock()
}
deleted[series.ref] = struct{}{}
delete(s.series[refLock], series.ref)
s.hashes[hashLock].Delete(hash, series.ref)
// Since the series is gone, we'll also delete
// the latest stored exemplar.
delete(s.exemplars[refLock], series.ref)
if hashLock != refLock {
s.locks[refLock].Unlock()
}
series.Unlock()
}
for hashLock := 0; hashLock < s.size; hashLock++ {
s.locks[hashLock].Lock()
for hash, all := range s.hashes[hashLock] {
for hash, all := range s.hashes[hashLock].conflicts {
for _, series := range all {
series.Lock()
// Any series that has received a write since mint is still alive.
if series.lastTs >= mint {
series.Unlock()
continue
}
// The series is stale. We need to obtain a second lock for the
// ref if it's different than the hash lock.
refLock := int(series.ref) & (s.size - 1)
if hashLock != refLock {
s.locks[refLock].Lock()
}
deleted[series.ref] = struct{}{}
delete(s.series[refLock], series.ref)
s.hashes[hashLock].Delete(hash, series.ref)
// Since the series is gone, we'll also delete
// the latest stored exemplar.
delete(s.exemplars[refLock], series.ref)
if hashLock != refLock {
s.locks[refLock].Unlock()
}
series.Unlock()
check(hashLock, hash, series)
}
}
for hash, series := range s.hashes[hashLock].unique {
check(hashLock, hash, series)
}
s.locks[hashLock].Unlock()
}

View file

@ -74,3 +74,63 @@ func TestNoDeadlock(t *testing.T) {
require.FailNow(t, "deadlock detected")
}
}
func labelsWithHashCollision() (labels.Labels, labels.Labels) {
// These two series have the same XXHash; thanks to https://github.com/pstibrany/labels_hash_collisions
ls1 := labels.FromStrings("__name__", "metric", "lbl1", "value", "lbl2", "l6CQ5y")
ls2 := labels.FromStrings("__name__", "metric", "lbl1", "value", "lbl2", "v7uDlF")
if ls1.Hash() != ls2.Hash() {
// These ones are the same when using -tags stringlabels
ls1 = labels.FromStrings("__name__", "metric", "lbl", "HFnEaGl")
ls2 = labels.FromStrings("__name__", "metric", "lbl", "RqcXatm")
}
if ls1.Hash() != ls2.Hash() {
panic("This code needs to be updated: find new labels with colliding hash values.")
}
return ls1, ls2
}
// stripeSeriesWithCollidingSeries returns a stripeSeries with two memSeries having the same, colliding, hash.
func stripeSeriesWithCollidingSeries(_ *testing.T) (*stripeSeries, *memSeries, *memSeries) {
lbls1, lbls2 := labelsWithHashCollision()
ms1 := memSeries{
lset: lbls1,
}
ms2 := memSeries{
lset: lbls2,
}
hash := lbls1.Hash()
s := newStripeSeries(1)
s.Set(hash, &ms1)
s.Set(hash, &ms2)
return s, &ms1, &ms2
}
func TestStripeSeries_Get(t *testing.T) {
s, ms1, ms2 := stripeSeriesWithCollidingSeries(t)
hash := ms1.lset.Hash()
// Verify that we can get both of the series despite the hash collision
got := s.GetByHash(hash, ms1.lset)
require.Same(t, ms1, got)
got = s.GetByHash(hash, ms2.lset)
require.Same(t, ms2, got)
}
func TestStripeSeries_gc(t *testing.T) {
s, ms1, ms2 := stripeSeriesWithCollidingSeries(t)
hash := ms1.lset.Hash()
s.GC(1)
// Verify that we can get neither ms1 nor ms2 after gc-ing corresponding series
got := s.GetByHash(hash, ms1.lset)
require.Nil(t, got)
got = s.GetByHash(hash, ms2.lset)
require.Nil(t, got)
}

View file

@ -235,6 +235,13 @@ func TestLabelValuesWithMatchers(t *testing.T) {
require.NoError(t, err)
defer func() { require.NoError(t, indexReader.Close()) }()
var uniqueWithout30s []string
for i := 0; i < 100; i++ {
if i/10 != 3 {
uniqueWithout30s = append(uniqueWithout30s, fmt.Sprintf("value%d", i))
}
}
sort.Strings(uniqueWithout30s)
testCases := []struct {
name string
labelName string
@ -261,6 +268,14 @@ func TestLabelValuesWithMatchers(t *testing.T) {
labelName: "tens",
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotEqual, "unique", "")},
expectedValues: []string{"value0", "value1", "value2", "value3", "value4", "value5", "value6", "value7", "value8", "value9"},
}, {
name: "get unique IDs based on tens not being equal to a certain value, while not empty",
labelName: "unique",
matchers: []*labels.Matcher{
labels.MustNewMatcher(labels.MatchNotEqual, "tens", "value3"),
labels.MustNewMatcher(labels.MatchNotEqual, "tens", ""),
},
expectedValues: uniqueWithout30s,
},
}

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows
// +build !windows
package chunks

View file

@ -75,13 +75,15 @@ type Compactor interface {
// LeveledCompactor implements the Compactor interface.
type LeveledCompactor struct {
metrics *CompactorMetrics
logger log.Logger
ranges []int64
chunkPool chunkenc.Pool
ctx context.Context
maxBlockChunkSegmentSize int64
mergeFunc storage.VerticalChunkSeriesMergeFunc
metrics *CompactorMetrics
logger log.Logger
ranges []int64
chunkPool chunkenc.Pool
ctx context.Context
maxBlockChunkSegmentSize int64
mergeFunc storage.VerticalChunkSeriesMergeFunc
postingsEncoder index.PostingsEncoder
enableOverlappingCompaction bool
}
type CompactorMetrics struct {
@ -144,12 +146,35 @@ func newCompactorMetrics(r prometheus.Registerer) *CompactorMetrics {
return m
}
// NewLeveledCompactor returns a LeveledCompactor.
func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) {
return NewLeveledCompactorWithChunkSize(ctx, r, l, ranges, pool, chunks.DefaultChunkSegmentSize, mergeFunc)
type LeveledCompactorOptions struct {
// PE specifies the postings encoder. It is called when compactor is writing out the postings for a label name/value pair during compaction.
// If it is nil then the default encoder is used. At the moment that is the "raw" encoder. See index.EncodePostingsRaw for more.
PE index.PostingsEncoder
// MaxBlockChunkSegmentSize is the max block chunk segment size. If it is 0 then the default chunks.DefaultChunkSegmentSize is used.
MaxBlockChunkSegmentSize int64
// MergeFunc is used for merging series together in vertical compaction. By default storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) is used.
MergeFunc storage.VerticalChunkSeriesMergeFunc
// EnableOverlappingCompaction enables compaction of overlapping blocks. In Prometheus it is always enabled.
// It is useful for downstream projects like Mimir, Cortex, Thanos where they have a separate component that does compaction.
EnableOverlappingCompaction bool
}
func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) {
return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{
MaxBlockChunkSegmentSize: maxBlockChunkSegmentSize,
MergeFunc: mergeFunc,
EnableOverlappingCompaction: true,
})
}
func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) {
return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{
MergeFunc: mergeFunc,
EnableOverlappingCompaction: true,
})
}
func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) {
if len(ranges) == 0 {
return nil, fmt.Errorf("at least one range must be provided")
}
@ -159,17 +184,28 @@ func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Register
if l == nil {
l = log.NewNopLogger()
}
mergeFunc := opts.MergeFunc
if mergeFunc == nil {
mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)
}
maxBlockChunkSegmentSize := opts.MaxBlockChunkSegmentSize
if maxBlockChunkSegmentSize == 0 {
maxBlockChunkSegmentSize = chunks.DefaultChunkSegmentSize
}
pe := opts.PE
if pe == nil {
pe = index.EncodePostingsRaw
}
return &LeveledCompactor{
ranges: ranges,
chunkPool: pool,
logger: l,
metrics: newCompactorMetrics(r),
ctx: ctx,
maxBlockChunkSegmentSize: maxBlockChunkSegmentSize,
mergeFunc: mergeFunc,
ranges: ranges,
chunkPool: pool,
logger: l,
metrics: newCompactorMetrics(r),
ctx: ctx,
maxBlockChunkSegmentSize: maxBlockChunkSegmentSize,
mergeFunc: mergeFunc,
postingsEncoder: pe,
enableOverlappingCompaction: opts.EnableOverlappingCompaction,
}, nil
}
@ -288,6 +324,9 @@ func (c *LeveledCompactor) selectDirs(ds []dirMeta) []dirMeta {
// selectOverlappingDirs returns all dirs with overlapping time ranges.
// It expects sorted input by mint and returns the overlapping dirs in the same order as received.
func (c *LeveledCompactor) selectOverlappingDirs(ds []dirMeta) []string {
if !c.enableOverlappingCompaction {
return nil
}
if len(ds) < 2 {
return nil
}
@ -599,7 +638,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl
}
}
indexw, err := index.NewWriter(c.ctx, filepath.Join(tmp, indexFilename))
indexw, err := index.NewWriterWithEncoder(c.ctx, filepath.Join(tmp, indexFilename), c.postingsEncoder)
if err != nil {
return fmt.Errorf("open index writer: %w", err)
}

View file

@ -70,19 +70,20 @@ var ErrNotReady = errors.New("TSDB not ready")
// millisecond precision timestamps.
func DefaultOptions() *Options {
return &Options{
WALSegmentSize: wlog.DefaultSegmentSize,
MaxBlockChunkSegmentSize: chunks.DefaultChunkSegmentSize,
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
MinBlockDuration: DefaultBlockDuration,
MaxBlockDuration: DefaultBlockDuration,
NoLockfile: false,
SamplesPerChunk: DefaultSamplesPerChunk,
WALCompression: wlog.CompressionNone,
StripeSize: DefaultStripeSize,
HeadChunksWriteBufferSize: chunks.DefaultWriteBufferSize,
IsolationDisabled: defaultIsolationDisabled,
HeadChunksWriteQueueSize: chunks.DefaultWriteQueueSize,
OutOfOrderCapMax: DefaultOutOfOrderCapMax,
WALSegmentSize: wlog.DefaultSegmentSize,
MaxBlockChunkSegmentSize: chunks.DefaultChunkSegmentSize,
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
MinBlockDuration: DefaultBlockDuration,
MaxBlockDuration: DefaultBlockDuration,
NoLockfile: false,
SamplesPerChunk: DefaultSamplesPerChunk,
WALCompression: wlog.CompressionNone,
StripeSize: DefaultStripeSize,
HeadChunksWriteBufferSize: chunks.DefaultWriteBufferSize,
IsolationDisabled: defaultIsolationDisabled,
HeadChunksWriteQueueSize: chunks.DefaultWriteQueueSize,
OutOfOrderCapMax: DefaultOutOfOrderCapMax,
EnableOverlappingCompaction: true,
}
}
@ -177,6 +178,14 @@ type Options struct {
// OutOfOrderCapMax is maximum capacity for OOO chunks (in samples).
// If it is <=0, the default value is assumed.
OutOfOrderCapMax int64
// Compaction of overlapping blocks are allowed if EnableOverlappingCompaction is true.
// This is an optional flag for overlapping blocks.
// The reason why this flag exists is because there are various users of the TSDB
// that do not want vertical compaction happening on ingest time. Instead,
// they'd rather keep overlapping blocks and let another component do the overlapping compaction later.
// For Prometheus, this will always be true.
EnableOverlappingCompaction bool
}
type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{}
@ -442,8 +451,7 @@ func (db *DBReadOnly) FlushWAL(dir string) (returnErr error) {
nil,
db.logger,
ExponentialBlockRanges(DefaultOptions().MinBlockDuration, 3, 5),
chunkenc.NewPool(),
nil,
chunkenc.NewPool(), nil,
)
if err != nil {
return fmt.Errorf("create leveled compactor: %w", err)
@ -817,7 +825,10 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
}
ctx, cancel := context.WithCancel(context.Background())
db.compactor, err = NewLeveledCompactorWithChunkSize(ctx, r, l, rngs, db.chunkPool, opts.MaxBlockChunkSegmentSize, nil)
db.compactor, err = NewLeveledCompactorWithOptions(ctx, r, l, rngs, db.chunkPool, LeveledCompactorOptions{
MaxBlockChunkSegmentSize: opts.MaxBlockChunkSegmentSize,
EnableOverlappingCompaction: opts.EnableOverlappingCompaction,
})
if err != nil {
cancel()
return nil, fmt.Errorf("create leveled compactor: %w", err)

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build !windows
// +build !windows
package fileutil

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build windows
// +build windows
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build js
// +build js
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build solaris
// +build solaris
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
// +build darwin dragonfly freebsd linux netbsd openbsd
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build windows
// +build windows
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build windows
// +build windows
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build windows
// +build windows
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build js
// +build js
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows && !plan9 && !js
// +build !windows,!plan9,!js
package fileutil

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build !linux && !darwin
// +build !linux,!darwin
package fileutil

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build !linux && !darwin
// +build !linux,!darwin
package fileutil

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build darwin
// +build darwin
package fileutil

View file

@ -13,7 +13,6 @@
// limitations under the License.
//go:build linux
// +build linux
package fileutil

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build go1.12
// +build go1.12
// Package goversion enforces the go version supported by the tsdb module.
package goversion

View file

@ -110,6 +110,8 @@ type symbolCacheEntry struct {
lastValue string
}
type PostingsEncoder func(*encoding.Encbuf, []uint32) error
// Writer implements the IndexWriter interface for the standard
// serialization format.
type Writer struct {
@ -148,6 +150,8 @@ type Writer struct {
crc32 hash.Hash
Version int
postingsEncoder PostingsEncoder
}
// TOC represents index Table Of Content that states where each section of index starts.
@ -186,7 +190,8 @@ func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) {
}
// NewWriter returns a new Writer to the given filename. It serializes data in format version 2.
func NewWriter(ctx context.Context, fn string) (*Writer, error) {
// It uses the given encoder to encode each postings list.
func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncoder) (*Writer, error) {
dir := filepath.Dir(fn)
df, err := fileutil.OpenDir(dir)
@ -229,9 +234,10 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
buf1: encoding.Encbuf{B: make([]byte, 0, 1<<22)},
buf2: encoding.Encbuf{B: make([]byte, 0, 1<<22)},
symbolCache: make(map[string]symbolCacheEntry, 1<<8),
labelNames: make(map[string]uint64, 1<<8),
crc32: newCRC32(),
symbolCache: make(map[string]symbolCacheEntry, 1<<8),
labelNames: make(map[string]uint64, 1<<8),
crc32: newCRC32(),
postingsEncoder: encoder,
}
if err := iw.writeMeta(); err != nil {
return nil, err
@ -239,6 +245,12 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
return iw, nil
}
// NewWriter creates a new index writer using the default encoder. See
// NewWriterWithEncoder.
func NewWriter(ctx context.Context, fn string) (*Writer, error) {
return NewWriterWithEncoder(ctx, fn, EncodePostingsRaw)
}
func (w *Writer) write(bufs ...[]byte) error {
return w.f.Write(bufs...)
}
@ -941,6 +953,20 @@ func (w *Writer) writePostingsToTmpFiles() error {
return nil
}
// EncodePostingsRaw uses the "basic" postings list encoding format with no compression:
// <BE uint32 len X><BE uint32 0><BE uint32 1>...<BE uint32 X-1>.
func EncodePostingsRaw(e *encoding.Encbuf, offs []uint32) error {
e.PutBE32int(len(offs))
for _, off := range offs {
if off > (1<<32)-1 {
return fmt.Errorf("series offset %d exceeds 4 bytes", off)
}
e.PutBE32(off)
}
return nil
}
func (w *Writer) writePosting(name, value string, offs []uint32) error {
// Align beginning to 4 bytes for more efficient postings list scans.
if err := w.fP.AddPadding(4); err != nil {
@ -959,13 +985,8 @@ func (w *Writer) writePosting(name, value string, offs []uint32) error {
w.cntPO++
w.buf1.Reset()
w.buf1.PutBE32int(len(offs))
for _, off := range offs {
if off > (1<<32)-1 {
return fmt.Errorf("series offset %d exceeds 4 bytes", off)
}
w.buf1.PutBE32(off)
if err := w.postingsEncoder(&w.buf1, offs); err != nil {
return err
}
w.buf2.Reset()

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows
// +build !windows
package tsdb

View file

@ -107,6 +107,8 @@ var (
BadBucketLabelWarning = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel)
MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)
NativeHistogramNotCounterWarning = fmt.Errorf("%w: this native histogram metric is not a counter:", PromQLWarning)
NativeHistogramNotGaugeWarning = fmt.Errorf("%w: this native histogram metric is not a gauge:", PromQLWarning)
PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count/_bucket:", PromQLInfo)
HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo)
@ -166,6 +168,24 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi
}
}
// NewNativeHistogramNotCounterWarning is used when histogramRate is called
// with isCounter set to true on a gauge histogram.
func NewNativeHistogramNotCounterWarning(metricName string, pos posrange.PositionRange) error {
return annoErr{
PositionRange: pos,
Err: fmt.Errorf("%w %q", NativeHistogramNotCounterWarning, metricName),
}
}
// NewNativeHistogramNotGaugeWarning is used when histogramRate is called
// with isCounter set to false on a counter histogram.
func NewNativeHistogramNotGaugeWarning(metricName string, pos posrange.PositionRange) error {
return annoErr{
PositionRange: pos,
Err: fmt.Errorf("%w %q", NativeHistogramNotGaugeWarning, metricName),
}
}
// NewPossibleNonCounterInfo is used when a named counter metric with only float samples does not
// have the suffixes _total, _sum, _count, or _bucket.
func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) error {

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows
// +build !windows
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build windows
// +build windows
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build openbsd || windows || netbsd || solaris
// +build openbsd windows netbsd solaris
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows && !openbsd && !netbsd && !solaris && !386
// +build !windows,!openbsd,!netbsd,!solaris,!386
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build linux && 386
// +build linux,386
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build (386 && darwin) || (386 && freebsd)
// +build 386,darwin 386,freebsd
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !linux
// +build !linux
package runtime

View file

@ -12,7 +12,6 @@
// limitations under the License.
//go:build !windows && !openbsd
// +build !windows,!openbsd
package runtime

Some files were not shown because too many files have changed in this diff Show more