diff --git a/CHANGELOG.md b/CHANGELOG.md index eb519e5d3b..40968a8d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 2.45.5 / 2024-05-02 + +* [BUGFIX] tsdb/agent: ensure that new series get written to WAL on rollback. #12592 +* [BUGFIX] Remote write: Avoid a race condition when applying configuration. #13135 + ## 2.45.4 / 2024-03-18 * [BUGFIX] Remote read: Release querier resources before encoding the results. #13777 diff --git a/VERSION b/VERSION index 09f18239ca..0f0796f195 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.45.4 +2.45.5 diff --git a/go.mod b/go.mod index 773b5955d9..d0fc84247a 100644 --- a/go.mod +++ b/go.mod @@ -72,7 +72,7 @@ require ( google.golang.org/api v0.149.0 google.golang.org/genproto/googleapis/api v0.0.0-20231212172506-995d672761c0 google.golang.org/grpc v1.60.0 - google.golang.org/protobuf v1.31.0 + google.golang.org/protobuf v1.34.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.26.2 @@ -137,7 +137,7 @@ require ( github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/golang/glog v1.1.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic v0.6.9 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/go-querystring v1.1.0 // indirect diff --git a/go.sum b/go.sum index 2556db62d6..568e442da4 100644 --- a/go.sum +++ b/go.sum @@ -330,8 +330,8 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= @@ -1204,8 +1204,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/storage/remote/storage.go b/storage/remote/storage.go index d01f96b3ba..0d0edae828 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -77,10 +77,7 @@ func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCal } func (s *Storage) Notify() { - for _, q := range s.rws.queues { - // These should all be non blocking - q.watcher.Notify() - } + s.rws.Notify() } // ApplyConfig updates the state as the new config requires. diff --git a/storage/remote/storage_test.go b/storage/remote/storage_test.go index 1bca61fdda..1c83afc7e4 100644 --- a/storage/remote/storage_test.go +++ b/storage/remote/storage_test.go @@ -14,7 +14,9 @@ package remote import ( + "fmt" "net/url" + "sync" "testing" common_config "github.com/prometheus/common/config" @@ -147,3 +149,39 @@ func baseRemoteReadConfig(host string) *config.RemoteReadConfig { } return &cfg } + +// TestWriteStorageApplyConfigsDuringCommit helps detecting races when +// ApplyConfig runs concurrently with Notify +// See https://github.com/prometheus/prometheus/issues/12747 +func TestWriteStorageApplyConfigsDuringCommit(t *testing.T) { + s := NewStorage(nil, nil, nil, t.TempDir(), defaultFlushDeadline, nil) + + var wg sync.WaitGroup + wg.Add(2000) + + start := make(chan struct{}) + for i := 0; i < 1000; i++ { + go func(i int) { + <-start + conf := &config.Config{ + GlobalConfig: config.DefaultGlobalConfig, + RemoteWriteConfigs: []*config.RemoteWriteConfig{ + baseRemoteWriteConfig(fmt.Sprintf("http://test-%d.com", i)), + }, + } + require.NoError(t, s.ApplyConfig(conf)) + wg.Done() + }(i) + } + + for i := 0; i < 1000; i++ { + go func() { + <-start + s.Notify() + wg.Done() + }() + } + + close(start) + wg.Wait() +} diff --git a/storage/remote/write.go b/storage/remote/write.go index 4b0a249014..237f8caa91 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -121,6 +121,16 @@ func (rws *WriteStorage) run() { } } +func (rws *WriteStorage) Notify() { + rws.mtx.Lock() + defer rws.mtx.Unlock() + + for _, q := range rws.queues { + // These should all be non blocking + q.watcher.Notify() + } +} + // ApplyConfig updates the state as the new config requires. // Only stop & create queues which have changes. func (rws *WriteStorage) ApplyConfig(conf *config.Config) error { diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 13cad6bfca..45ef299fd0 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -981,11 +981,25 @@ func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Met // Commit submits the collected samples and purges the batch. func (a *appender) Commit() error { + if err := a.log(); err != nil { + return err + } + + a.clearData() + a.appenderPool.Put(a) + return nil +} + +// log logs all pending data to the WAL. +func (a *appender) log() error { a.mtx.RLock() defer a.mtx.RUnlock() var encoder record.Encoder buf := a.bufPool.Get().([]byte) + defer func() { + a.bufPool.Put(buf) //nolint:staticcheck + }() if len(a.pendingSeries) > 0 { buf = encoder.Series(a.pendingSeries, buf) @@ -1047,12 +1061,11 @@ func (a *appender) Commit() error { } } - //nolint:staticcheck - a.bufPool.Put(buf) - return a.Rollback() + return nil } -func (a *appender) Rollback() error { +// clearData clears all pending data. +func (a *appender) clearData() { a.pendingSeries = a.pendingSeries[:0] a.pendingSamples = a.pendingSamples[:0] a.pendingHistograms = a.pendingHistograms[:0] @@ -1061,6 +1074,39 @@ func (a *appender) Rollback() error { a.sampleSeries = a.sampleSeries[:0] a.histogramSeries = a.histogramSeries[:0] a.floatHistogramSeries = a.floatHistogramSeries[:0] +} + +func (a *appender) Rollback() error { + // Series are created in-memory regardless of rollback. This means we must + // log them to the WAL, otherwise subsequent commits may reference a series + // which was never written to the WAL. + if err := a.logSeries(); err != nil { + return err + } + + a.clearData() a.appenderPool.Put(a) return nil } + +// logSeries logs only pending series records to the WAL. +func (a *appender) logSeries() error { + a.mtx.RLock() + defer a.mtx.RUnlock() + + if len(a.pendingSeries) > 0 { + buf := a.bufPool.Get().([]byte) + defer func() { + a.bufPool.Put(buf) //nolint:staticcheck + }() + + var encoder record.Encoder + buf = encoder.Series(a.pendingSeries, buf) + if err := a.wal.Log(buf); err != nil { + return err + } + buf = buf[:0] + } + + return nil +} diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index e284e1b77e..5c71d548f8 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -333,8 +333,8 @@ func TestRollback(t *testing.T) { } } - // Check that the rollback ensured nothing got stored. - require.Equal(t, 0, walSeriesCount, "series should not have been written to WAL") + // Check that only series get stored after calling Rollback. + require.Equal(t, numSeries*3, walSeriesCount, "series should have been written to WAL") require.Equal(t, 0, walSamplesCount, "samples should not have been written to WAL") require.Equal(t, 0, walExemplarsCount, "exemplars should not have been written to WAL") require.Equal(t, 0, walHistogramCount, "histograms should not have been written to WAL") diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 0aeb16720e..586a35638f 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.45.4", + "version": "0.45.5", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.45.4", + "@prometheus-io/lezer-promql": "0.45.5", "lru-cache": "^6.0.0" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index ac14615c28..63ab4611ac 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.45.4", + "version": "0.45.5", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 6bc5eb970b..a78400c1fb 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -28,10 +28,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.45.4", + "version": "0.45.5", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.45.4", + "@prometheus-io/lezer-promql": "0.45.5", "lru-cache": "^6.0.0" }, "devDependencies": { @@ -61,7 +61,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.45.4", + "version": "0.45.5", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.2.3", @@ -20765,7 +20765,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.45.4", + "version": "0.45.5", "dependencies": { "@codemirror/autocomplete": "^6.7.1", "@codemirror/commands": "^6.2.4", @@ -20783,7 +20783,7 @@ "@lezer/lr": "^1.3.6", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.45.4", + "@prometheus-io/codemirror-promql": "0.45.5", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^7.6.0", @@ -23423,7 +23423,7 @@ "@lezer/lr": "^1.3.6", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.45.4", + "@prometheus-io/codemirror-promql": "0.45.5", "@testing-library/react-hooks": "^7.0.2", "@types/enzyme": "^3.10.13", "@types/flot": "0.0.32", @@ -23487,7 +23487,7 @@ "@lezer/common": "^1.0.3", "@lezer/highlight": "^1.1.6", "@lezer/lr": "^1.3.6", - "@prometheus-io/lezer-promql": "0.45.4", + "@prometheus-io/lezer-promql": "0.45.5", "@types/lru-cache": "^5.1.1", "isomorphic-fetch": "^3.0.0", "lru-cache": "^6.0.0", diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index 860f1fda01..9b144bbc3e 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.45.4", + "version": "0.45.5", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.7.1", @@ -19,7 +19,7 @@ "@lezer/common": "^1.0.3", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.45.4", + "@prometheus-io/codemirror-promql": "0.45.5", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^7.6.0",