From 6100e756a8ffb8aec255f0964e5471aafbe87de3 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 26 Jul 2024 09:49:57 +0200 Subject: [PATCH 01/10] Ignore stale histograms for counter reset detection The histogram stats decoder keeps track of the last seen histogram sample in order to properly detect counter resets. We are seeing an issue where a histogram with UnknownResetHint gets treated as a counter reset when it follows a stale histogram sample. I believe that this is incorrect since stale samples should be completely ignored in PromQL. As a result, they should not be stored in the histogram stats iterator and the counter reset detection needs to be done against the last non-stale sample. Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator.go | 2 - promql/histogram_stats_iterator_test.go | 123 +++++++++++++++--------- tsdb/tsdbutil/histogram.go | 10 +- 3 files changed, 84 insertions(+), 51 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index dfafea5f8..0a5f67ae7 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -48,7 +48,6 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi var t int64 t, f.currentH = f.Iterator.AtHistogram(f.currentH) if value.IsStaleNaN(f.currentH.Sum) { - f.setLastH(f.currentH) h = &histogram.Histogram{Sum: f.currentH.Sum} return t, h } @@ -77,7 +76,6 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) var t int64 t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH) if value.IsStaleNaN(f.currentFH.Sum) { - f.setLastFH(f.currentFH) return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum} } diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index b71a9d602..d5c081348 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -14,62 +14,99 @@ package promql import ( + "fmt" + "math" "testing" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/tsdbutil" ) func TestHistogramStatsDecoding(t *testing.T) { - histograms := []*histogram.Histogram{ - tsdbutil.GenerateTestHistogram(0), - tsdbutil.GenerateTestHistogram(1), - tsdbutil.GenerateTestHistogram(2), - tsdbutil.GenerateTestHistogram(2), - } - histograms[0].CounterResetHint = histogram.NotCounterReset - histograms[1].CounterResetHint = histogram.UnknownCounterReset - histograms[2].CounterResetHint = histogram.CounterReset - histograms[3].CounterResetHint = histogram.UnknownCounterReset - - expectedHints := []histogram.CounterResetHint{ - histogram.NotCounterReset, - histogram.NotCounterReset, - histogram.CounterReset, - histogram.NotCounterReset, + cases := []struct { + name string + histograms []*histogram.Histogram + expectedHints []histogram.CounterResetHint + }{ + { + name: "unknown counter reset triggers detection", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(2, histogram.CounterReset), + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.NotCounterReset, + histogram.CounterReset, + histogram.NotCounterReset, + }, + }, + { + name: "stale sample before unknown reset hint", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + {Sum: math.Float64frombits(value.StaleNaN)}, + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.NotCounterReset, + histogram.UnknownCounterReset, + histogram.NotCounterReset, + }, + }, } - t.Run("histogram_stats", func(t *testing.T) { - decodedStats := make([]*histogram.Histogram, 0) - statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) - for statsIterator.Next() != chunkenc.ValNone { - _, h := statsIterator.AtHistogram(nil) - decodedStats = append(decodedStats, h) - } - for i := 0; i < len(histograms); i++ { - require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) - require.Equal(t, histograms[i].Count, decodedStats[i].Count) - require.Equal(t, histograms[i].Sum, decodedStats[i].Sum) - } - }) - t.Run("float_histogram_stats", func(t *testing.T) { - decodedStats := make([]*histogram.FloatHistogram, 0) - statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) - for statsIterator.Next() != chunkenc.ValNone { - _, h := statsIterator.AtFloatHistogram(nil) - decodedStats = append(decodedStats, h) - } - for i := 0; i < len(histograms); i++ { - fh := histograms[i].ToFloat(nil) - require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) - require.Equal(t, fh.Count, decodedStats[i].Count) - require.Equal(t, fh.Sum, decodedStats[i].Sum) - } - }) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Run("histogram_stats", func(t *testing.T) { + decodedStats := make([]*histogram.Histogram, 0) + statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil)) + for statsIterator.Next() != chunkenc.ValNone { + _, h := statsIterator.AtHistogram(nil) + decodedStats = append(decodedStats, h) + } + for i := 0; i < len(tc.histograms); i++ { + require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i)) + h := tc.histograms[i] + if value.IsStaleNaN(h.Sum) { + require.True(t, value.IsStaleNaN(decodedStats[i].Sum)) + require.Equal(t, uint64(0), decodedStats[i].Count) + } else { + require.Equal(t, tc.histograms[i].Count, decodedStats[i].Count) + require.Equal(t, tc.histograms[i].Sum, decodedStats[i].Sum) + } + } + }) + t.Run("float_histogram_stats", func(t *testing.T) { + decodedStats := make([]*histogram.FloatHistogram, 0) + statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil)) + for statsIterator.Next() != chunkenc.ValNone { + _, h := statsIterator.AtFloatHistogram(nil) + decodedStats = append(decodedStats, h) + } + for i := 0; i < len(tc.histograms); i++ { + require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint) + fh := tc.histograms[i].ToFloat(nil) + if value.IsStaleNaN(fh.Sum) { + require.True(t, value.IsStaleNaN(decodedStats[i].Sum)) + require.Equal(t, float64(0), decodedStats[i].Count) + } else { + require.Equal(t, fh.Count, decodedStats[i].Count) + require.Equal(t, fh.Sum, decodedStats[i].Sum) + } + } + }) + }) + } } type histogramSeries struct { diff --git a/tsdb/tsdbutil/histogram.go b/tsdb/tsdbutil/histogram.go index 3c7349cf7..ce934a638 100644 --- a/tsdb/tsdbutil/histogram.go +++ b/tsdb/tsdbutil/histogram.go @@ -30,12 +30,10 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) { return r } -func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram { - hs := GenerateTestHistograms(n) - for i := range hs { - hs[i].CounterResetHint = histogram.UnknownCounterReset - } - return hs +func GenerateTestHistogramWithHint(n int, hint histogram.CounterResetHint) *histogram.Histogram { + h := GenerateTestHistogram(n) + h.CounterResetHint = hint + return h } // GenerateTestHistogram but it is up to the user to set any known counter reset hint. From 02d9d874a2f02d09201f77c7d122b042a3da4ab9 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 29 Jul 2024 14:53:32 +0200 Subject: [PATCH 02/10] Add more test cases Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator_test.go | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index d5c081348..7a2953d3e 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -63,6 +63,39 @@ func TestHistogramStatsDecoding(t *testing.T) { histogram.NotCounterReset, }, }, + { + name: "unknown counter reset at the beginning", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + }, + }, + { + name: "detect real counter reset", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.CounterReset, + }, + }, + { + name: "detect real counter reset after stale NaN", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + {Sum: math.Float64frombits(value.StaleNaN)}, + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.UnknownCounterReset, + histogram.CounterReset, + }, + }, } for _, tc := range cases { From 8f9751b00ddef4b06aa91b7213442726076c193f Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Wed, 31 Jul 2024 11:17:09 +0200 Subject: [PATCH 03/10] Use CopyTo when resetting histogram in stats iterator The histogram stats iterator does not fully clear the histogram object and is not resilient to new fields being added to the histogram type. To resolve the issue, the commit uses the CopyTo methods which should be future proof to new fields being added. Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index dfafea5f8..c1b7746ec 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -63,9 +63,13 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi return t, h } - h.CounterResetHint = f.getResetHint(f.currentH) - h.Count = f.currentH.Count - h.Sum = f.currentH.Sum + returnValue := histogram.Histogram{ + CounterResetHint: f.getResetHint(f.currentH), + Count: f.currentH.Count, + Sum: f.currentH.Sum, + } + returnValue.CopyTo(h) + f.setLastH(f.currentH) return t, h } @@ -91,9 +95,13 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) return t, fh } - fh.CounterResetHint = f.getFloatResetHint(f.currentFH.CounterResetHint) - fh.Count = f.currentFH.Count - fh.Sum = f.currentFH.Sum + returnValue := histogram.FloatHistogram{ + CounterResetHint: f.getFloatResetHint(f.currentFH.CounterResetHint), + Count: f.currentFH.Count, + Sum: f.currentFH.Sum, + } + returnValue.CopyTo(fh) + f.setLastFH(f.currentFH) return t, fh } From 909785b97fd4b48317739377283c699d81ffe5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 6 Aug 2024 07:46:26 +0200 Subject: [PATCH 04/10] Fix histogram pool poisoning bu chunkenc.Iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chunkenc.Iterator.AtFloatHistogram may do a shallow copy if it receives nil as input pointer. This can in turn share the span slice with multiple histograms in the matrixSelectorHPool, leading to unexpected errors. Signed-off-by: György Krajcsovits --- promql/engine.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index 25e67db63..621c2116e 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2356,6 +2356,11 @@ loop: } else { histograms = append(histograms, HPoint{H: &histogram.FloatHistogram{}}) } + if histograms[n].H == nil { + // Initialize to non zero to AtFloatHistogram does a copy for sure. + // Not an issue in the loop above since that uses an intermediate buffer. + histograms[n].H = &histogram.FloatHistogram{} + } histograms[n].T, histograms[n].H = it.AtFloatHistogram(histograms[n].H) if value.IsStaleNaN(histograms[n].H.Sum) { histograms = histograms[:n] From 1fb0ff7e4516d5993e84c2bee0f4e303b7f0c8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 6 Aug 2024 20:48:16 +0200 Subject: [PATCH 05/10] Add unit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- promql/engine_test.go | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/promql/engine_test.go b/promql/engine_test.go index 523c0613d..9ed3dd939 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3798,3 +3798,62 @@ func makeInt64Pointer(val int64) *int64 { *valp = val return valp } + +func TestHistogramCopyFromIteratorRegression(t *testing.T) { + // Loading the following histograms creates two chunks because there's a + // counter reset. Not only the counter is lower in the last histogram + // but also there's missing buckets. + // This in turns means that chunk iterators will have different spans. + load := `load 1m +histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum:1 count:1 buckets:[1]}} +` + storage := promqltest.LoadedStorage(t, load) + t.Cleanup(func() { storage.Close() }) + engine := promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) + + verify := func(t *testing.T, qry promql.Query, expected []histogram.FloatHistogram) { + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + + m, ok := res.Value.(promql.Matrix) + require.True(t, ok) + + require.Len(t, m, 1) + series := m[0] + + require.Empty(t, series.Floats) + require.Len(t, series.Histograms, len(expected)) + for i, e := range expected { + series.Histograms[i].H.CounterResetHint = histogram.UnknownCounterReset // Don't care. + require.Equal(t, &e, series.Histograms[i].H) + } + } + + qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[60s])", time.Unix(0, 0), time.Unix(0, 0).Add(1*time.Minute), time.Minute) + require.NoError(t, err) + verify(t, qry, []histogram.FloatHistogram{ + { + Count: 2, + Sum: 2, // Increase from 4 to 6 is 2. + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, // Two buckets changed between the first and second histogram. + PositiveBuckets: []float64{1, 1}, // Increase from 2 to 3 is 1 in both buckets. + }, + }) + + qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[60s]", time.Unix(0, 0).Add(2*time.Minute)) + require.NoError(t, err) + verify(t, qry, []histogram.FloatHistogram{ + { + Count: 6, + Sum: 6, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []float64{3, 3}, + }, + { + Count: 1, + Sum: 1, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []float64{1}, + }, + }) +} From 1d7fe4be5c581265a5c102f3a4fbdc26af3fd357 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 7 Aug 2024 20:15:46 +0200 Subject: [PATCH 06/10] Update promql/engine.go Signed-off-by: George Krajcsovits --- promql/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promql/engine.go b/promql/engine.go index 621c2116e..f6b79f3a4 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2357,7 +2357,7 @@ loop: histograms = append(histograms, HPoint{H: &histogram.FloatHistogram{}}) } if histograms[n].H == nil { - // Initialize to non zero to AtFloatHistogram does a copy for sure. + // Make sure to pass non zero H to AtFloatHistogram so that it does a deep-copy. // Not an issue in the loop above since that uses an intermediate buffer. histograms[n].H = &histogram.FloatHistogram{} } From 8978f3ef71fb8d92d9b430cd3a6591b6ca231c7f Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 9 Aug 2024 12:07:28 +0100 Subject: [PATCH 07/10] Cut release 2.54.0 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 8 ++------ VERSION | 2 +- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- web/ui/react-app/package.json | 4 ++-- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e338d9c5c..885749504 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,6 @@ # Changelog -## 2.54.0-rc.1 / 2024-08-05 - -* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584 - -## 2.54.0-rc.0 / 2024-07-19 +## 2.54.0 / 2024-08-09 Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/). This is experimental at this time and may still change. @@ -22,7 +18,7 @@ Remote-write v2 is enabled by default, but can be disabled via feature-flag `web * [ENHANCEMENT] TSDB: Optimise seek within index. #14393 * [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307 * [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286 -* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396 +* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584 * [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368 * [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173 * [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156 diff --git a/VERSION b/VERSION index 149ab4732..99aed793a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.0-rc.1 +2.54.0 diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 26823cdfd..6307f12f3 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0-rc.1", + "@prometheus-io/lezer-promql": "0.54.0", "lru-cache": "^7.18.3" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 774c95967..14d263241 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 3243fe6aa..0f5db9f6d 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.54.0-rc.1", + "version": "0.54.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.54.0-rc.1", + "version": "0.54.0", "workspaces": [ "react-app", "module/*" @@ -30,10 +30,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0-rc.1", + "@prometheus-io/lezer-promql": "0.54.0", "lru-cache": "^7.18.3" }, "devDependencies": { @@ -69,7 +69,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.0", @@ -19332,7 +19332,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.54.0-rc.1", + "version": "0.54.0", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -19350,7 +19350,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0-rc.1", + "@prometheus-io/codemirror-promql": "0.54.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/package.json b/web/ui/package.json index fe1e77ef5..01dd71f34 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -28,5 +28,5 @@ "ts-jest": "^29.2.2", "typescript": "^4.9.5" }, - "version": "0.54.0-rc.1" + "version": "0.54.0" } diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index b65354730..01620531a 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.54.0-rc.1", + "version": "0.54.0", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0-rc.1", + "@prometheus-io/codemirror-promql": "0.54.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", From 144470c7b011be44ab73e4fb3d7fe228f401a016 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 19 Aug 2024 10:58:35 +0100 Subject: [PATCH 08/10] [BUGFIX] Scraping: allow multiple samples on same series (#14685) So long as they specify timestamps. We don't check that the timestamps are different. Extend test, and use client_golang/prometheus/testutil to simplify metric check. Signed-off-by: Bryan Boreham --- scrape/scrape.go | 2 +- scrape/scrape_test.go | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index 68411a62e..ccb068b68 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1631,7 +1631,7 @@ loop: updateMetadata(lset, true) } - if seriesAlreadyScraped { + if seriesAlreadyScraped && parsedTimestamp == nil { err = storage.ErrDuplicateSampleForTimestamp } else { if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a3fe6ac1a..c5a0b8b83 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -35,6 +35,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -3627,6 +3628,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { require.Equal(t, 3, total) require.Equal(t, 3, added) require.Equal(t, 1, seriesAdded) + require.Equal(t, 2.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) slApp = sl.appender(ctx) total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "", time.Time{}) @@ -3635,12 +3637,18 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { require.Equal(t, 3, total) require.Equal(t, 3, added) require.Equal(t, 0, seriesAdded) + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) - metric := dto.Metric{} - err = sl.metrics.targetScrapeSampleDuplicate.Write(&metric) + // When different timestamps are supplied, multiple samples are accepted. + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "", time.Time{}) require.NoError(t, err) - value := metric.GetCounter().GetValue() - require.Equal(t, 4.0, value) + require.NoError(t, slApp.Commit()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 0, seriesAdded) + // Metric is not higher than last time. + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) } // This tests running a full scrape loop and checking that the scrape option From 89dee48cc81df4091ae1f9c1e402f32b47183440 Mon Sep 17 00:00:00 2001 From: "ouyang1204@gmail.com" Date: Sun, 11 Aug 2024 15:31:11 +0800 Subject: [PATCH 09/10] fix the issue of failing to match the first network when the container is reconnected to a new network Signed-off-by: ouyang1204@gmail.com --- discovery/moby/docker.go | 33 +++-- discovery/moby/docker_test.go | 119 ++++++++++++++++-- .../testdata/dockerprom/containers/json.json | 69 ++++++++++ docs/configuration/configuration.md | 4 +- 4 files changed, 198 insertions(+), 27 deletions(-) diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 11445092e..68f6fe3cc 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -19,6 +19,7 @@ import ( "net" "net/http" "net/url" + "sort" "strconv" "time" @@ -251,28 +252,26 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er } if d.matchFirstNetwork && len(networks) > 1 { - // Match user defined network - if containerNetworkMode.IsUserDefined() { - networkMode := string(containerNetworkMode) - networks = map[string]*network.EndpointSettings{networkMode: networks[networkMode]} - } else { - // Get first network if container network mode has "none" value. - // This case appears under certain condition: - // 1. Container created with network set to "--net=none". - // 2. Disconnect network "none". - // 3. Reconnect network with user defined networks. - var first string - for k, n := range networks { - if n != nil { - first = k - break - } + // Sort networks by name and take first non-nil network. + keys := make([]string, 0, len(networks)) + for k, n := range networks { + if n != nil { + keys = append(keys, k) } - networks = map[string]*network.EndpointSettings{first: networks[first]} + } + if len(keys) > 0 { + sort.Strings(keys) + firstNetworkMode := keys[0] + firstNetwork := networks[firstNetworkMode] + networks = map[string]*network.EndpointSettings{firstNetworkMode: firstNetwork} } } for _, n := range networks { + if n == nil { + continue + } + var added bool for _, p := range c.Ports { diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index c108ddf58..398393a15 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -60,9 +60,9 @@ host: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 6) + require.Len(t, tg.Targets, 8) - for i, lbls := range []model.LabelSet{ + expected := []model.LabelSet{ { "__address__": "172.19.0.2:9100", "__meta_docker_container_id": "c301b928faceb1a18fe379f6bc178727ef920bb30b0f9b8592b32b36255a0eca", @@ -163,7 +163,43 @@ host: %s "__meta_docker_network_scope": "local", "__meta_docker_port_private": "9104", }, - } { + { + "__address__": "172.20.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.20.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, + } + sortFunc(expected) + sortFunc(tg.Targets) + + for i, lbls := range expected { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) }) @@ -202,13 +238,8 @@ host: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 9) + require.Len(t, tg.Targets, 13) - sortFunc := func(labelSets []model.LabelSet) { - sort.Slice(labelSets, func(i, j int) bool { - return labelSets[i]["__address__"] < labelSets[j]["__address__"] - }) - } expected := []model.LabelSet{ { "__address__": "172.19.0.2:9100", @@ -359,6 +390,70 @@ host: %s "__meta_docker_network_scope": "local", "__meta_docker_port_private": "9104", }, + { + "__address__": "172.20.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.20.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, + { + "__address__": "172.21.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.21.0.3", + "__meta_docker_network_name": "dockersd_private1", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.21.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.21.0.3", + "__meta_docker_network_name": "dockersd_private1", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, } sortFunc(expected) @@ -370,3 +465,9 @@ host: %s }) } } + +func sortFunc(labelSets []model.LabelSet) { + sort.Slice(labelSets, func(i, j int) bool { + return labelSets[i]["__address__"] < labelSets[j]["__address__"] + }) +} diff --git a/discovery/moby/testdata/dockerprom/containers/json.json b/discovery/moby/testdata/dockerprom/containers/json.json index ebfc56b6d..33406bf9a 100644 --- a/discovery/moby/testdata/dockerprom/containers/json.json +++ b/discovery/moby/testdata/dockerprom/containers/json.json @@ -228,5 +228,74 @@ "Networks": {} }, "Mounts": [] + }, + { + "Id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "Names": [ + "/dockersd_multi_networks" + ], + "Image": "mysql:5.7.29", + "ImageID": "sha256:16ae2f4625ba63a250462bedeece422e741de9f0caf3b1d89fd5b257aca80cd1", + "Command": "mysqld", + "Created": 1616273136, + "Ports": [ + { + "PrivatePort": 3306, + "Type": "tcp" + }, + { + "PrivatePort": 33060, + "Type": "tcp" + } + ], + "Labels": { + "com.docker.compose.project": "dockersd", + "com.docker.compose.service": "mysql", + "com.docker.compose.version": "2.2.2" + }, + "State": "running", + "Status": "Up 40 seconds", + "HostConfig": { + "NetworkMode": "dockersd_private_none" + }, + "NetworkSettings": { + "Networks": { + "dockersd_private": { + "IPAMConfig": null, + "Links": null, + "Aliases": null, + "NetworkID": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "EndpointID": "972d6807997369605ace863af58de6cb90c787a5bf2ffc4105662d393ae539b7", + "Gateway": "172.20.0.1", + "IPAddress": "172.20.0.3", + "IPPrefixLen": 16, + "IPv6Gateway": "", + "GlobalIPv6Address": "", + "GlobalIPv6PrefixLen": 0, + "MacAddress": "02:42:ac:14:00:02", + "DriverOpts": null + }, + "dockersd_private1": { + "IPAMConfig": {}, + "Links": null, + "Aliases": [ + "mysql", + "mysql", + "f9ade4b83199" + ], + "NetworkID": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "EndpointID": "91a98405344ee1cb7d977cafabe634837876651544b32da20a5e0155868e6f5f", + "Gateway": "172.21.0.1", + "IPAddress": "172.21.0.3", + "IPPrefixLen": 24, + "IPv6Gateway": "", + "GlobalIPv6Address": "", + "GlobalIPv6PrefixLen": 0, + "MacAddress": "02:42:ac:15:00:02", + "DriverOpts": null + } + } + }, + "Mounts": [] } ] diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index ff24082e4..6ce3d9c48 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -943,7 +943,9 @@ tls_config: # The host to use if the container is in host networking mode. [ host_networking_host: | default = "localhost" ] -# Match the first network if the container has multiple networks defined, thus avoiding collecting duplicate targets. +# Sort all non-nil networks in ascending order based on network name and +# get the first network if the container has multiple networks defined, +# thus avoiding collecting duplicate targets. [ match_first_network: | default = true ] # Optional filters to limit the discovery process to a subset of available From da28f88910660f852aa37017dcc240afc83359c0 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 27 Aug 2024 10:14:16 +0100 Subject: [PATCH 10/10] Cut release 2.54.1 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 8 ++++++++ VERSION | 2 +- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- web/ui/react-app/package.json | 4 ++-- 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 885749504..c07e5b578 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 2.54.1 / 2024-08-27 + +* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685 +* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654 +* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538 +* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514 +* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605 + ## 2.54.0 / 2024-08-09 Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/). diff --git a/VERSION b/VERSION index 99aed793a..3a40665f5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.0 +2.54.1 diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 6307f12f3..1c459b090 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0", + "version": "0.54.1", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0", + "@prometheus-io/lezer-promql": "0.54.1", "lru-cache": "^7.18.3" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 14d263241..8f9c0b1c8 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0", + "version": "0.54.1", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 0f5db9f6d..e305ee964 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.54.0", + "version": "0.54.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.54.0", + "version": "0.54.1", "workspaces": [ "react-app", "module/*" @@ -30,10 +30,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0", + "version": "0.54.1", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0", + "@prometheus-io/lezer-promql": "0.54.1", "lru-cache": "^7.18.3" }, "devDependencies": { @@ -69,7 +69,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0", + "version": "0.54.1", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.0", @@ -19332,7 +19332,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.54.0", + "version": "0.54.1", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -19350,7 +19350,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0", + "@prometheus-io/codemirror-promql": "0.54.1", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/package.json b/web/ui/package.json index 01dd71f34..f97d7098b 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -28,5 +28,5 @@ "ts-jest": "^29.2.2", "typescript": "^4.9.5" }, - "version": "0.54.0" + "version": "0.54.1" } diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index 01620531a..ace3ef264 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.54.0", + "version": "0.54.1", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0", + "@prometheus-io/codemirror-promql": "0.54.1", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6",