From 91de19fbeffe19429f10c501a2f4364ae01711ea Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 7 Oct 2024 13:50:01 +0100 Subject: [PATCH 01/11] [BUGFIX] TSDB: Don't read in-order chunks from before head MinTime Because we are reimplementing the `IndexReader` to fetch in-order and out-of-order chunks together, we must reproduce the behaviour of `Head.indexRange()`, which floors the minimum time queried at `head.MinTime()`. Signed-off-by: Bryan Boreham --- tsdb/db.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsdb/db.go b/tsdb/db.go index 3b1dee27d4..ce43942554 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -2060,7 +2060,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.Querier - inoMint := mint + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) var err error @@ -2138,7 +2138,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.ChunkQuerier - inoMint := mint + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt) From 96236815435a8b99986f2dbbd5d34592e81662f6 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 8 Oct 2024 17:27:10 +0100 Subject: [PATCH 02/11] Prepare 3.0.0-beta.1 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 17 +++++++++++++++++ VERSION | 2 +- web/ui/mantine-ui/package.json | 4 ++-- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- 7 files changed, 31 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8874d254f5..a6b00997db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,26 @@ ## unreleased +## 3.0.0-beta.1 / 2024-09-25 + +* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 * [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 * [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 +* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 +* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994 +* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 +* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 +* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 +* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 +* [PERF] TSDB: Grow postings by doubling. #14721 +* [PERF] Relabeling: Optimize adding a constant label pair. #12180 * [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 +* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 +* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 +* [BUGFIX] PromQL: Unary negation of native histograms. #14821 +* [BUGFIX] Autoreload: Reload invalid yaml files. #14947 ## 3.0.0-beta.0 / 2024-09-05 diff --git a/VERSION b/VERSION index 7e9b524994..1941d52827 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0-beta.0 +3.0.0-beta.1 diff --git a/web/ui/mantine-ui/package.json b/web/ui/mantine-ui/package.json index b10520e825..4e886e482f 100644 --- a/web/ui/mantine-ui/package.json +++ b/web/ui/mantine-ui/package.json @@ -1,7 +1,7 @@ { "name": "@prometheus-io/mantine-ui", "private": true, - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "type": "module", "scripts": { "start": "vite", @@ -28,7 +28,7 @@ "@microsoft/fetch-event-source": "^2.0.1", "@nexucis/fuzzy": "^0.5.1", "@nexucis/kvsearch": "^0.9.1", - "@prometheus-io/codemirror-promql": "0.300.0-beta.0", + "@prometheus-io/codemirror-promql": "0.300.0-beta.1", "@reduxjs/toolkit": "^2.2.1", "@tabler/icons-react": "^3.19.0", "@tanstack/react-query": "^5.59.0", diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 3e459e83a0..bcc5464795 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.300.0-beta.0", + "@prometheus-io/lezer-promql": "0.300.0-beta.1", "lru-cache": "^11.0.1" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 125c37b96a..3eadc3a536 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 2a37ff13ce..10b14a8bc7 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "workspaces": [ "mantine-ui", "module/*" @@ -24,7 +24,7 @@ }, "mantine-ui": { "name": "@prometheus-io/mantine-ui", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "dependencies": { "@codemirror/autocomplete": "^6.18.1", "@codemirror/language": "^6.10.2", @@ -42,7 +42,7 @@ "@microsoft/fetch-event-source": "^2.0.1", "@nexucis/fuzzy": "^0.5.1", "@nexucis/kvsearch": "^0.9.1", - "@prometheus-io/codemirror-promql": "0.300.0-beta.0", + "@prometheus-io/codemirror-promql": "0.300.0-beta.1", "@reduxjs/toolkit": "^2.2.1", "@tabler/icons-react": "^3.19.0", "@tanstack/react-query": "^5.59.0", @@ -155,10 +155,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.300.0-beta.0", + "@prometheus-io/lezer-promql": "0.300.0-beta.1", "lru-cache": "^11.0.1" }, "devDependencies": { @@ -188,7 +188,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.1", diff --git a/web/ui/package.json b/web/ui/package.json index 8b05c690aa..6d6e280108 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -1,7 +1,7 @@ { "name": "prometheus-io", "description": "Monorepo for the Prometheus UI", - "version": "0.300.0-beta.0", + "version": "0.300.0-beta.1", "private": true, "scripts": { "build": "bash build_ui.sh --all", From eb90f238b719ee9db48adde62a89e3823d369397 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 8 Oct 2024 17:29:08 +0100 Subject: [PATCH 03/11] Update CHANGELOG to commit 90f7832447d Signed-off-by: Bryan Boreham --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6b00997db..476ff61f0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,25 +2,31 @@ ## unreleased -## 3.0.0-beta.1 / 2024-09-25 +## 3.0.0-beta.1 / 2024-10-09 * [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 * [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 * [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 * [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 -* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994 +* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 +* [CHANGE]: Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738 +* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 +* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 * [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 * [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 * [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 * [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [ENHANCEMENT] Consul SD: Support catalog filters. #11224 * [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 * [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 * [PERF] TSDB: Grow postings by doubling. #14721 * [PERF] Relabeling: Optimize adding a constant label pair. #12180 +* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095 * [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 * [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 * [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 * [BUGFIX] PromQL: Unary negation of native histograms. #14821 +* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025 * [BUGFIX] Autoreload: Reload invalid yaml files. #14947 ## 3.0.0-beta.0 / 2024-09-05 From eaf98049e503929dc4aa906023986b4d8a8e820f Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 8 Oct 2024 18:15:55 +0100 Subject: [PATCH 04/11] Few more 3.0-beta.1 CHANGELOGs Signed-off-by: Bryan Boreham --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 476ff61f0e..a8e7621186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,15 @@ * [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 * [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 * [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 -* [CHANGE]: Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738 +* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738 +* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111 * [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 * [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 * [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 * [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 * [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 * [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 * [ENHANCEMENT] Consul SD: Support catalog filters. #11224 * [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 * [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 From 78de9bd10fc8c454840e71147f4006d2973ba1c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 14 Oct 2024 16:38:56 +0200 Subject: [PATCH 05/11] convertnhcb: use CutSuffix instead of regex replace for histogram name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is much quicker. Signed-off-by: György Krajcsovits --- util/convertnhcb/convertnhcb.go | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index acbb4bec85..5e08422aa0 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -19,30 +19,10 @@ import ( "sort" "strings" - "github.com/grafana/regexp" - "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" ) -var histogramNameSuffixReplacements = []struct { - pattern *regexp.Regexp - repl string -}{ - { - pattern: regexp.MustCompile(`_bucket$`), - repl: "", - }, - { - pattern: regexp.MustCompile(`_sum$`), - repl: "", - }, - { - pattern: regexp.MustCompile(`_count$`), - repl: "", - }, -} - // TempHistogram is used to collect information about classic histogram // samples incrementally before creating a histogram.Histogram or // histogram.FloatHistogram based on the values collected. @@ -176,9 +156,18 @@ func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { Labels() } +// GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from +// the metric name. We specifically do not remove the _created suffix as that +// should be removed by the caller. func GetHistogramMetricBaseName(s string) string { - for _, rep := range histogramNameSuffixReplacements { - s = rep.pattern.ReplaceAllString(s, rep.repl) + if r, ok := strings.CutSuffix(s, "_bucket"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_sum"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_count"); ok { + return r } return s } From d4b1f9eb338c7ff54787674cad8d2fefa7eb0404 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia <80708727+NeerajGartia21@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:14:36 +0530 Subject: [PATCH 06/11] Corrects the behaviour of binary opperators between histogram and float (#14726) promql: corrects binary operators functioning for mixed sample with histogram and float For invalid pairings of sample types, an annotation is added now. Signed-off-by: Neeraj Gartia --------- Signed-off-by: Neeraj Gartia --- model/histogram/float_histogram.go | 8 ++++ model/histogram/float_histogram_test.go | 12 ++--- promql/engine.go | 45 ++++++++++++++----- .../testdata/native_histograms.test | 30 +++++++++++-- util/annotations/annotations.go | 10 +++++ 5 files changed, 83 insertions(+), 22 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index b0deb6cc4d..a6ad47acd3 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -304,6 +304,14 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar h.Sum /= scalar + // Division by zero removes all buckets. + if scalar == 0 { + h.PositiveBuckets = nil + h.NegativeBuckets = nil + h.PositiveSpans = nil + h.NegativeSpans = nil + return h + } for i := range h.PositiveBuckets { h.PositiveBuckets[i] /= scalar } diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index cf370a313e..34988e9d39 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -399,14 +399,10 @@ func TestFloatHistogramDiv(t *testing.T) { }, 0, &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: math.Inf(1), - Count: math.Inf(1), - Sum: math.Inf(1), - PositiveSpans: []Span{{-2, 1}, {2, 3}}, - PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, + ZeroThreshold: 0.01, + Count: math.Inf(1), + Sum: math.Inf(1), + ZeroCount: math.Inf(1), }, }, { diff --git a/promql/engine.go b/promql/engine.go index e1beb2d910..60b4b81384 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -43,6 +43,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" @@ -1929,20 +1930,20 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, }, e.LHS, e.RHS) default: return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh) + vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case lt == parser.ValueTypeVector && rt == parser.ValueTypeScalar: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) case lt == parser.ValueTypeScalar && rt == parser.ValueTypeVector: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } @@ -2534,7 +2535,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi } // VectorBinop evaluates a binary operation between two Vectors, excluding set operators. -func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { if matching.Card == parser.CardManyToMany { panic("many-to-many only allowed for set operators") } @@ -2608,7 +2609,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * fl, fr = fr, fl hl, hr = hr, hl } - floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr) + floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr, pos) if err != nil { lastErr = err } @@ -2717,7 +2718,7 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V } // VectorscalarBinop evaluates a binary operation between a Vector and a Scalar. -func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { var lastErr error for _, lhsSample := range lhs { lf, rf := lhsSample.F, rhs.V @@ -2729,7 +2730,7 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala lf, rf = rf, lf lh, rh = rh, lh } - float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh) + float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh, pos) if err != nil { lastErr = err } @@ -2796,7 +2797,7 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { } // vectorElemBinop evaluates a binary operation between two Vector elements. -func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram) (float64, *histogram.FloatHistogram, bool, error) { +func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram, pos posrange.PositionRange) (float64, *histogram.FloatHistogram, bool, error) { switch op { case parser.ADD: if hlhs != nil && hrhs != nil { @@ -2806,7 +2807,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs + rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs + rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "+", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "+", "histogram", pos) case parser.SUB: if hlhs != nil && hrhs != nil { res, err := hlhs.Copy().Sub(hrhs) @@ -2815,7 +2822,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs - rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs - rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "-", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "-", "histogram", pos) case parser.MUL: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Mul(rhs), true, nil @@ -2823,11 +2836,20 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram if hlhs == nil && hrhs != nil { return 0, hrhs.Copy().Mul(lhs), true, nil } + if hlhs != nil && hrhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "*", "histogram", pos) + } return lhs * rhs, nil, true, nil case parser.DIV: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Div(rhs), true, nil } + if hrhs != nil { + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "/", "histogram", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "/", "histogram", pos) + } return lhs / rhs, nil, true, nil case parser.POW: return math.Pow(lhs, rhs), nil, true, nil @@ -3365,6 +3387,9 @@ func handleVectorBinopError(err error, e *parser.BinaryExpr) annotations.Annotat } metricName := "" pos := e.PositionRange() + if errors.Is(err, annotations.PromQLInfo) || errors.Is(err, annotations.PromQLWarning) { + return annotations.New().Add(err) + } if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) } else if errors.Is(err, histogram.ErrHistogramsIncompatibleBounds) { diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index ca4993660f..8c5814ae8a 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -948,18 +948,40 @@ eval instant at 10m histogram_mul_div*float_series_0 eval instant at 10m float_series_0*histogram_mul_div {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} -# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934 eval instant at 10m histogram_mul_div/0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div/float_series_0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div*0/0 - {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}} + {} {{schema:0 count:NaN sum:NaN z_bucket_w:0.001 z_bucket:NaN}} + +eval_info instant at 10m histogram_mul_div*histogram_mul_div + +eval_info instant at 10m histogram_mul_div/histogram_mul_div + +eval_info instant at 10m float_series_3/histogram_mul_div + +eval_info instant at 10m 0/histogram_mul_div clear +# Apply binary operators to mixed histogram and float samples. +# TODO:(NeerajGartia21) move these tests to their respective locations when tests from engine_test.go are be moved here. + +load 10m + histogram_sample {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + float_sample 0x1 + +eval_info instant at 10m float_sample+histogram_sample + +eval_info instant at 10m histogram_sample+float_sample + +eval_info instant at 10m float_sample-histogram_sample + +eval_info instant at 10m histogram_sample-float_sample + # Counter reset only noticeable in a single bucket. load 5m reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}} diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index b0272b7fee..ebe74ecd11 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -146,6 +146,7 @@ var ( PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count/_bucket:", PromQLInfo) HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo) + IncompatibleTypesInBinOpInfo = fmt.Errorf("%w: incompatible sample types encountered for binary operator", PromQLInfo) ) type annoErr struct { @@ -273,3 +274,12 @@ func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange. Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName), } } + +// NewIncompatibleTypesInBinOpInfo is used if binary operators act on a +// combination of types that doesn't work and therefore returns no result. +func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posrange.PositionRange) error { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q: %s %s %s", IncompatibleTypesInBinOpInfo, operator, lhsType, operator, rhsType), + } +} From ab2475c4268fee1aeb9b1b01a1d6684bec973219 Mon Sep 17 00:00:00 2001 From: machine424 Date: Tue, 6 Aug 2024 12:42:35 +0200 Subject: [PATCH 07/11] test(tsdb): add a reproducer for https://github.com/prometheus/prometheus/issues/14422 Signed-off-by: machine424 --- tsdb/db_test.go | 116 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 8c216956d6..08417e889e 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -15,6 +15,7 @@ package tsdb import ( "bufio" + "bytes" "context" "encoding/binary" "flag" @@ -23,6 +24,8 @@ import ( "log/slog" "math" "math/rand" + "net/http" + "net/http/httptest" "os" "path" "path/filepath" @@ -41,6 +44,12 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage/remote" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -8857,3 +8866,110 @@ func TestGenerateCompactionDelay(t *testing.T) { assertDelay(db.generateCompactionDelay()) } } + +type blockedResponseRecorder struct { + r *httptest.ResponseRecorder + + // writeblocked is used to block writing until the test wants it to resume. + writeBlocked chan struct{} + // writeStarted is closed by blockedResponseRecorder to signal that writing has started. + writeStarted chan struct{} +} + +func (br *blockedResponseRecorder) Write(buf []byte) (int, error) { + select { + case <-br.writeStarted: + default: + close(br.writeStarted) + } + + <-br.writeBlocked + return br.r.Write(buf) +} + +func (br *blockedResponseRecorder) Header() http.Header { return br.r.Header() } + +func (br *blockedResponseRecorder) WriteHeader(code int) { br.r.WriteHeader(code) } + +func (br *blockedResponseRecorder) Flush() { br.r.Flush() } + +// TestBlockClosingBlockedDuringRemoteRead ensures that a TSDB Block is not closed while it is being queried +// through remote read. This is a regression test for https://github.com/prometheus/prometheus/issues/14422. +// TODO: Ideally, this should reside in storage/remote/read_handler_test.go once the necessary TSDB utils are accessible there. +func TestBlockClosingBlockedDuringRemoteRead(t *testing.T) { + dir := t.TempDir() + + createBlock(t, dir, genSeries(2, 1, 0, 10)) + db, err := Open(dir, nil, nil, nil, nil) + require.NoError(t, err) + // No error checking as manually closing the block is supposed to make this fail. + defer db.Close() + + readAPI := remote.NewReadHandler(nil, nil, db, func() config.Config { + return config.Config{} + }, + 0, 1, 0, + ) + + matcher, err := labels.NewMatcher(labels.MatchRegexp, "__name__", ".*") + require.NoError(t, err) + + query, err := remote.ToQuery(0, 10, []*labels.Matcher{matcher}, nil) + require.NoError(t, err) + + req := &prompb.ReadRequest{ + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}, + } + data, err := proto.Marshal(req) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(snappy.Encode(nil, data))) + require.NoError(t, err) + + blockedRecorder := &blockedResponseRecorder{ + r: httptest.NewRecorder(), + writeBlocked: make(chan struct{}), + writeStarted: make(chan struct{}), + } + + readDone := make(chan struct{}) + go func() { + readAPI.ServeHTTP(blockedRecorder, request) + require.Equal(t, http.StatusOK, blockedRecorder.r.Code) + close(readDone) + }() + + // Wait for the read API to start streaming data. + <-blockedRecorder.writeStarted + + // Try to close the queried block. + blockClosed := make(chan struct{}) + go func() { + for _, block := range db.Blocks() { + block.Close() + } + close(blockClosed) + }() + + // Closing the queried block should block. + // Wait a little bit to make sure of that. + select { + case <-time.After(100 * time.Millisecond): + case <-readDone: + require.Fail(t, "read API should still be streaming data.") + case <-blockClosed: + require.Fail(t, "Block shouldn't get closed while being queried.") + } + + // Resume the read API data streaming. + close(blockedRecorder.writeBlocked) + <-readDone + + // The block should be no longer needed and closing it should end. + select { + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Closing the block timed out.") + case <-blockClosed: + } +} From 08a716250222301d6990050ba033ea4d9024ac9b Mon Sep 17 00:00:00 2001 From: akunszt <32456696+akunszt@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:36:37 +0200 Subject: [PATCH 08/11] discovery: aws/ec2 unit tests (#14364) * discovery: add aws/ec2 unit tests * discovery: initial skeleton for aws/ec2 unit tests This is a - very likely - not too useful unit test for the AWS SD. It is commited so other people can check the basic logic and the implementation. Signed-off-by: Arpad Kunszt * discovery: fix linter complains about ec2_test.go Signed-off-by: Arpad Kunszt * discovery: add basic unit test for aws This tests only the basic labelling, not including the VPC related information. Signed-off-by: Arpad Kunszt * discovery: fix linter complains about ec2_test.go Signed-off-by: Arpad Kunszt * discovery: other linter fixes in aws/ec2_test.go Signed-off-by: Arpad Kunszt * discovery: implement remaining tests for aws/ec2 The coverage is not 100% but I think it is a good starting point if someone wants to improve that. Currently it covers all the AWS API calls. Signed-off-by: Arpad Kunszt * discovery: make linter happy in aws/ec2_test.go Signed-off-by: Arpad Kunszt * discovery: make utility funtcions private Signed-off-by: Arpad Kunszt * discover: no global variable in the aws/ec2 test Signed-off-by: Arpad Kunszt * discovery: common body for some tests in ec2 Signed-off-by: Arpad Kunszt * discovery: try to make golangci-lint happy Signed-off-by: Arpad Kunszt * discovery: make every non-test function private Signed-off-by: Arpad Kunszt * discovery: test for errors first in TestRefresh Signed-off-by: Arpad Kunszt * discovery: move refresh tests into the function This way people can find both the test cases and the execution of the test at the same place. Signed-off-by: Arpad Kunszt * discovery: fix copyright date Signed-off-by: Arpad Kunszt * discovery: remove misleading comment Signed-off-by: Arpad Kunszt * discovery: rename test for easier identification Signed-off-by: Arpad Kunszt * discovery: use static values for the test cases Signed-off-by: Arpad Kunszt * discover: try to make the linter happy Signed-off-by: Arpad Kunszt * discovery: drop redundant data from ec2 and use common ptr functions Signed-off-by: Arpad Kunszt * discovery: use Error instead of Equal Signed-off-by: Arpad Kunszt * discovery: merge refreshAZIDs tests into one Signed-off-by: Arpad Kunszt --------- Signed-off-by: Arpad Kunszt --- discovery/aws/ec2.go | 5 +- discovery/aws/ec2_test.go | 434 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 437 insertions(+), 2 deletions(-) create mode 100644 discovery/aws/ec2_test.go diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index 51eec8dba4..5a725cb48f 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -148,7 +149,7 @@ type EC2Discovery struct { *refresh.Discovery logger *slog.Logger cfg *EC2SDConfig - ec2 *ec2.EC2 + ec2 ec2iface.EC2API // azToAZID maps this account's availability zones to their underlying AZ // ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so @@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.D return d, nil } -func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) { +func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) { if d.ec2 != nil { return d.ec2, nil } diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go new file mode 100644 index 0000000000..f34065c23e --- /dev/null +++ b/discovery/aws/ec2_test.go @@ -0,0 +1,434 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aws + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Helper function to get pointers on literals. +// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package. +func strptr(str string) *string { + return &str +} + +func boolptr(b bool) *bool { + return &b +} + +func int64ptr(i int64) *int64 { + return &i +} + +// Struct for test data. +type ec2DataStore struct { + region string + + azToAZID map[string]string + + ownerID string + + instances []*ec2.Instance +} + +// The tests itself. +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestEC2DiscoveryRefreshAZIDs(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + shouldFail bool + ec2Data *ec2DataStore + }{ + { + name: "Normal", + shouldFail: false, + ec2Data: &ec2DataStore{ + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + }, + }, + { + name: "HandleError", + shouldFail: true, + ec2Data: &ec2DataStore{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + } + + err := d.refreshAZIDs(ctx) + if tt.shouldFail { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, client.ec2Data.azToAZID, d.azToAZID) + } + }) + } +} + +func TestEC2DiscoveryRefresh(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + ec2Data *ec2DataStore + expected []*targetgroup.Group + }{ + { + name: "NoPrivateIp", + ec2Data: &ec2DataStore{ + region: "region-noprivateip", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + InstanceId: strptr("instance-id-noprivateip"), + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-noprivateip", + }, + }, + }, + { + name: "NoVpc", + ec2Data: &ec2DataStore{ + region: "region-novpc", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + ownerID: "owner-id-novpc", + instances: []*ec2.Instance{ + { + // set every possible options and test them here + Architecture: strptr("architecture-novpc"), + ImageId: strptr("ami-novpc"), + InstanceId: strptr("instance-id-novpc"), + InstanceLifecycle: strptr("instance-lifecycle-novpc"), + InstanceType: strptr("instance-type-novpc"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + Platform: strptr("platform-novpc"), + PrivateDnsName: strptr("private-dns-novpc"), + PrivateIpAddress: strptr("1.2.3.4"), + PublicDnsName: strptr("public-dns-novpc"), + PublicIpAddress: strptr("42.42.42.2"), + State: &ec2.InstanceState{Name: strptr("running")}, + // test tags once and for all + Tags: []*ec2.Tag{ + {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")}, + {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")}, + nil, + {Value: strptr("tag-4-value")}, + {Key: strptr("tag-5-key")}, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-novpc", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("1.2.3.4:4242"), + "__meta_ec2_ami": model.LabelValue("ami-novpc"), + "__meta_ec2_architecture": model.LabelValue("architecture-novpc"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"), + "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"), + "__meta_ec2_platform": model.LabelValue("platform-novpc"), + "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"), + "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"), + "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"), + "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"), + "__meta_ec2_region": model.LabelValue("region-novpc"), + "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"), + "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"), + }, + }, + }, + }, + }, + { + name: "Ipv4", + ec2Data: &ec2DataStore{ + region: "region-ipv4", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv4"), + InstanceId: strptr("instance-id-ipv4"), + InstanceType: strptr("instance-type-ipv4"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")}, + PrivateIpAddress: strptr("5.6.7.8"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-3"), + VpcId: strptr("vpc-ipv4"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without subnet -> should be ignored + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:1::1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + }, + // interface with subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + // interface with another subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-1"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv4", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("5.6.7.8:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv4"), + "__meta_ec2_availability_zone": model.LabelValue("azname-c"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"), + "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"), + "__meta_ec2_region": model.LabelValue("region-ipv4"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"), + }, + }, + }, + }, + }, + { + name: "Ipv6", + ec2Data: &ec2DataStore{ + region: "region-ipv6", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv6"), + InstanceId: strptr("instance-id-ipv6"), + InstanceType: strptr("instance-type-ipv6"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + PrivateIpAddress: strptr("9.10.11.12"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-2"), + VpcId: strptr("vpc-ipv6"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without primary IPv6, index 2 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::1:1"), + IsPrimaryIpv6: boolptr(false), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 1 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(1), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::2:1"), + IsPrimaryIpv6: boolptr(false), + }, + { + Ipv6Address: strptr("2001:db8:2::2:2"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 3 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::3:1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-1"), + }, + // interface without primary IPv6, index 0 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(0), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv6", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("9.10.11.12:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv6"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"), + "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"), + "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"), + "__meta_ec2_region": model.LabelValue("region-ipv6"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"), + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + cfg: &EC2SDConfig{ + Port: 4242, + Region: client.ec2Data.region, + }, + } + + g, err := d.refresh(ctx) + require.NoError(t, err) + require.Equal(t, tt.expected, g) + }) + } +} + +// EC2 client mock. +type mockEC2Client struct { + ec2iface.EC2API + ec2Data ec2DataStore +} + +func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client { + client := mockEC2Client{ + ec2Data: *ec2Data, + } + return &client +} + +func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) { + if len(m.ec2Data.azToAZID) == 0 { + return nil, errors.New("No AZs found") + } + + azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID)) + + i := 0 + for k, v := range m.ec2Data.azToAZID { + azs[i] = &ec2.AvailabilityZone{ + ZoneName: strptr(k), + ZoneId: strptr(v), + } + i++ + } + + return &ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: azs, + }, nil +} + +func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error { + r := ec2.Reservation{} + r.SetInstances(m.ec2Data.instances) + r.SetOwnerId(m.ec2Data.ownerID) + + o := ec2.DescribeInstancesOutput{} + o.SetReservations([]*ec2.Reservation{&r}) + + _ = fn(&o, true) + + return nil +} From 5a4e4f69366ce123d803675802d72651212e5996 Mon Sep 17 00:00:00 2001 From: Joshua Hesketh Date: Thu, 17 Oct 2024 00:00:46 +1100 Subject: [PATCH 09/11] Fix stddev/stdvar when aggregating histograms, NaNs, and infinities (#14941) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit promql: Fix stddev/stdvar when aggregating histograms, NaNs, and Infs Native histograms are ignored when calculating stddev or stdvar. However, for the first series of each group, a `groupedAggregation` is always created. If the first series that was encountered is a histogram then it acts as the equivalent of a 0 point. This change creates the first `groupedAggregation` with the `seen` field set to `false` if the point is a histogram, thus ignoring it like the rest of the aggregation function does. A new `groupedAggregation` will then be created once an actual float value is encountered. This commit also sets the `floatValue` field of the `groupedAggregation` to `NaN`, if the first float value of a group is `NaN` or `±Inf`, so that the outcome is consistently `NaN` once those values are in the mix. (The added tests fail without this change). Signed-off-by: Joshua Hesketh Signed-off-by: beorn7 --------- Signed-off-by: Joshua Hesketh Signed-off-by: beorn7 Co-authored-by: beorn7 --- CHANGELOG.md | 3 + promql/engine.go | 10 +- promql/promqltest/testdata/aggregators.test | 157 ++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e7621186..cc3a68d8b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## unreleased +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 + ## 3.0.0-beta.1 / 2024-10-09 * [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 diff --git a/promql/engine.go b/promql/engine.go index 60b4b81384..5435db0fc4 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2926,7 +2926,15 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 3c91883960..e2eb381dbc 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -572,3 +572,160 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN From 4271670a829de5e8d6ff47089578eba0eada76e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 16 Oct 2024 15:02:13 +0200 Subject: [PATCH 10/11] chore(deps): update client_golang from 1.20.4 to 1.20.5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: https://github.com/prometheus/client_golang/releases/tag/v1.20.5 Signed-off-by: György Krajcsovits --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e019aabf63..6d33d2ed23 100644 --- a/go.mod +++ b/go.mod @@ -50,7 +50,7 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.60.0 github.com/prometheus/common/assets v0.2.0 diff --git a/go.sum b/go.sum index b601aab9ba..3d415cf344 100644 --- a/go.sum +++ b/go.sum @@ -500,8 +500,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= From 2cabd1b707b0569f65fbc2bafa899cabf47cfe09 Mon Sep 17 00:00:00 2001 From: Yi <38248129+jyz0309@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:25:05 +0800 Subject: [PATCH 11/11] config: remove expand-external-labels flag in release 3.0 (#14657) remove expand-external-labels feature flag and enabled env arg expansion for external labels by default. Signed-off-by: jyz0309 <45495947@qq.com> --- cmd/prometheus/main.go | 26 +++++------- cmd/promtool/main.go | 2 +- cmd/promtool/sd.go | 2 +- config/config.go | 14 +++---- config/config_test.go | 65 ++++++++++++++--------------- docs/configuration/configuration.md | 6 ++- docs/feature_flags.md | 9 ---- scrape/scrape_test.go | 2 +- 8 files changed, 56 insertions(+), 70 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index b84b4edf68..4a70d63bf8 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -200,11 +200,10 @@ type flagConfig struct { memlimitRatio float64 // These options are extracted from featureList // for ease of use. - enableExpandExternalLabels bool - enablePerStepStats bool - enableAutoGOMAXPROCS bool - enableAutoGOMEMLIMIT bool - enableConcurrentRuleEval bool + enablePerStepStats bool + enableAutoGOMAXPROCS bool + enableAutoGOMEMLIMIT bool + enableConcurrentRuleEval bool prometheusURL string corsRegexString string @@ -220,9 +219,6 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "expand-external-labels": - c.enableExpandExternalLabels = true - logger.Info("Experimental expand-external-labels enabled") case "exemplar-storage": c.tsdb.EnableExemplarStorage = true logger.Info("Experimental in-memory exemplar storage enabled") @@ -595,7 +591,7 @@ func main() { // Throw error for invalid config before starting other components. var cfgFile *config.Config - if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, promslog.NewNopLogger()); err != nil { + if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { absPath, pathErr := filepath.Abs(cfg.configFile) if pathErr != nil { absPath = cfg.configFile @@ -1145,7 +1141,7 @@ func main() { for { select { case <-hup: - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) } else if cfg.enableAutoReload { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { @@ -1155,7 +1151,7 @@ func main() { } } case rc := <-webHandler.Reload(): - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) rc <- err } else { @@ -1180,7 +1176,7 @@ func main() { } logger.Info("Configuration file change detected, reloading the configuration.") - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) } else { checksum = currentChecksum @@ -1210,7 +1206,7 @@ func main() { return nil } - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err) } @@ -1437,7 +1433,7 @@ type reloader struct { reloader func(*config.Config) error } -func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { +func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { start := time.Now() timingsLogger := logger logger.Info("Loading configuration file", "filename", filename) @@ -1453,7 +1449,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b } }() - conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger) + conf, err := config.LoadFile(filename, agentMode, logger) if err != nil { return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err) } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 159fae764d..6e4eb88437 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -575,7 +575,7 @@ func checkFileExists(fn string) error { func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { fmt.Println("Checking", filename) - cfg, err := config.LoadFile(filename, agentMode, false, promslog.NewNopLogger()) + cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { return nil, err } diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index 5c00dab03a..5e005bca8b 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -41,7 +41,7 @@ type sdCheckResult struct { func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int { logger := promslog.New(&promslog.Config{}) - cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) + cfg, err := config.LoadFile(sdConfigFiles, false, logger) if err != nil { fmt.Fprintln(os.Stderr, "Cannot load config", err) return failureExitCode diff --git a/config/config.go b/config/config.go index 3f35a195d0..3eb6898d5c 100644 --- a/config/config.go +++ b/config/config.go @@ -72,7 +72,7 @@ const ( ) // Load parses the YAML input s into a Config. -func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, error) { +func Load(s string, logger *slog.Logger) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry @@ -84,10 +84,6 @@ func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, er return nil, err } - if !expandExternalLabels { - return cfg, nil - } - b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -106,17 +102,19 @@ func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, er // Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024 b.Add(v.Name, newV) }) - cfg.GlobalConfig.ExternalLabels = b.Labels() + if !b.Labels().IsEmpty() { + cfg.GlobalConfig.ExternalLabels = b.Labels() + } return cfg, nil } // LoadFile parses the given YAML file into a Config. -func LoadFile(filename string, agentMode, expandExternalLabels bool, logger *slog.Logger) (*Config, error) { +func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) { content, err := os.ReadFile(filename) if err != nil { return nil, err } - cfg, err := Load(string(content), expandExternalLabels, logger) + cfg, err := Load(string(content), logger) if err != nil { return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err) } diff --git a/config/config_test.go b/config/config_test.go index 07f071ffee..547070dacc 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1501,7 +1501,7 @@ var expectedConf = &Config{ } func TestYAMLRoundtrip(t *testing.T) { - want, err := LoadFile("testdata/roundtrip.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1514,7 +1514,7 @@ func TestYAMLRoundtrip(t *testing.T) { } func TestRemoteWriteRetryOnRateLimit(t *testing.T) { - want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1529,7 +1529,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) { func TestOTLPSanitizeResourceAttributes(t *testing.T) { t.Run("good config", func(t *testing.T) { - want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, promslog.NewNopLogger()) + want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1541,7 +1541,7 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) t.Run("bad config", func(t *testing.T) { - _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, promslog.NewNopLogger()) + _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger()) require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`) require.ErrorContains(t, err, `empty promoted OTel resource attribute`) }) @@ -1550,16 +1550,17 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. - _, err := LoadFile("testdata/global_timeout.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - c, err := LoadFile("testdata/conf.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) + require.NoError(t, err) require.Equal(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { - c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.ScrapeConfigs, 1) for _, sc := range c.ScrapeConfigs { @@ -1569,7 +1570,7 @@ func TestScrapeIntervalLarger(t *testing.T) { // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { - c, err := LoadFile("testdata/conf.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) @@ -1586,31 +1587,31 @@ func TestElideSecrets(t *testing.T) { func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path - c, err := LoadFile(ruleFilesConfigFile, false, false, promslog.NewNopLogger()) + c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger()) require.NoError(t, err) require.Equal(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesWithKubeConfig(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesSelectors(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } @@ -2094,7 +2095,7 @@ func TestBadConfigs(t *testing.T) { model.NameValidationScheme = model.UTF8Validation }() for _, ee := range expectedErrors { - _, err := LoadFile("testdata/"+ee.filename, false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger()) require.ErrorContains(t, err, ee.errMsg, "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } @@ -2125,7 +2126,7 @@ func TestBadStaticConfigsYML(t *testing.T) { } func TestEmptyConfig(t *testing.T) { - c, err := Load("", false, promslog.NewNopLogger()) + c, err := Load("", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig require.Equal(t, exp, *c) @@ -2135,38 +2136,34 @@ func TestExpandExternalLabels(t *testing.T) { // Cleanup ant TEST env variable that could exist on the system. os.Setenv("TEST", "") - c, err := LoadFile("testdata/external_labels.good.yml", false, false, promslog.NewNopLogger()) - require.NoError(t, err) - testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels) - - c, err = LoadFile("testdata/external_labels.good.yml", false, true, promslog.NewNopLogger()) + c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) os.Setenv("TEST", "TestValue") - c, err = LoadFile("testdata/external_labels.good.yml", false, true, promslog.NewNopLogger()) + c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) } func TestAgentMode(t *testing.T) { - _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field rule_files is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field remote_read is not allowed in agent mode") - c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Empty(t, c.RemoteWriteConfigs) - c, err = LoadFile("testdata/agent_mode.good.yml", true, false, promslog.NewNopLogger()) + c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.RemoteWriteConfigs, 1) require.Equal( @@ -2177,7 +2174,7 @@ func TestAgentMode(t *testing.T) { } func TestEmptyGlobalBlock(t *testing.T) { - c, err := Load("global:\n", false, promslog.NewNopLogger()) + c, err := Load("global:\n", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig exp.Runtime = DefaultRuntimeConfig @@ -2332,7 +2329,7 @@ func TestGetScrapeConfigs(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c, err := LoadFile(tc.configFile, false, false, promslog.NewNopLogger()) + c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger()) require.NoError(t, err) scfgs, err := c.GetScrapeConfigs() @@ -2350,7 +2347,7 @@ func kubernetesSDHostURL() config.URL { } func TestScrapeConfigDisableCompression(t *testing.T) { - want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2397,7 +2394,7 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, promslog.NewNopLogger()) + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 46d3251418..6e0cf431cb 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -79,7 +79,11 @@ global: [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). + # external systems (federation, remote storage, Alertmanager). + # Environment variable references `${var}` or `$var` are replaced according + # to the values of the current environment variables. + # References to undefined variables are replaced by the empty string. + # The `$` character can be escaped by using `$$`. external_labels: [ : ... ] diff --git a/docs/feature_flags.md b/docs/feature_flags.md index a3e2c0b9e9..65eb60eaf1 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -11,15 +11,6 @@ Their behaviour can change in future releases which will be communicated via the You can enable them using the `--enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. -## Expand environment variables in external labels - -`--enable-feature=expand-external-labels` - -Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file) -values according to the values of the current environment variables. References -to undefined variables are replaced by the empty string. -The `$` character can be escaped by using `$$`. - ## Exemplars storage `--enable-feature=exemplar-storage` diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index bdf85a451c..e0e094da54 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3835,7 +3835,7 @@ scrape_configs: mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) require.NoError(t, err) - cfg, err := config.Load(configStr, false, promslog.NewNopLogger()) + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) mng.ApplyConfig(cfg) tsets := make(chan map[string][]*targetgroup.Group)