2017-04-10 11:59:45 -07:00
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2016-11-15 01:34:25 -08:00
// Package tsdb implements a time series storage for float64 sample data.
package tsdb
import (
2018-12-05 08:34:42 -08:00
"context"
2023-11-16 10:54:41 -08:00
"errors"
2016-12-04 04:16:11 -08:00
"fmt"
2017-02-27 01:46:15 -08:00
"io"
2022-04-27 02:24:36 -07:00
"io/fs"
2018-04-05 05:51:33 -07:00
"math"
2016-12-04 04:16:11 -08:00
"os"
2016-12-08 08:43:10 -08:00
"path/filepath"
2016-12-14 23:31:26 -08:00
"strconv"
2018-04-05 05:51:33 -07:00
"strings"
2016-12-08 08:43:10 -08:00
"sync"
2017-01-06 06:18:06 -08:00
"time"
2016-12-14 23:31:26 -08:00
2021-06-11 09:17:59 -07:00
"github.com/go-kit/log"
"github.com/go-kit/log/level"
2017-05-18 07:09:30 -07:00
"github.com/oklog/ulid"
2016-12-31 00:48:49 -08:00
"github.com/prometheus/client_golang/prometheus"
2022-09-20 10:05:50 -07:00
"go.uber.org/atomic"
2023-07-02 15:16:26 -07:00
"golang.org/x/exp/slices"
2020-10-22 02:00:08 -07:00
"golang.org/x/sync/errgroup"
2021-07-19 21:52:57 -07:00
"github.com/prometheus/prometheus/config"
2021-11-08 06:23:17 -08:00
"github.com/prometheus/prometheus/model/labels"
2020-02-06 07:58:38 -08:00
"github.com/prometheus/prometheus/storage"
2019-08-13 01:34:14 -07:00
"github.com/prometheus/prometheus/tsdb/chunkenc"
2020-11-19 05:00:47 -08:00
"github.com/prometheus/prometheus/tsdb/chunks"
2019-08-13 01:34:14 -07:00
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
"github.com/prometheus/prometheus/tsdb/fileutil"
2020-10-22 02:00:08 -07:00
_ "github.com/prometheus/prometheus/tsdb/goversion" // Load the package into main to make sure minium Go version is met.
2021-11-11 08:45:25 -08:00
"github.com/prometheus/prometheus/tsdb/tsdbutil"
2022-10-10 08:08:46 -07:00
"github.com/prometheus/prometheus/tsdb/wlog"
2016-11-15 01:34:25 -08:00
)
2019-11-21 04:10:25 -08:00
const (
2020-02-11 08:34:09 -08:00
// Default duration of a block in milliseconds.
DefaultBlockDuration = int64 ( 2 * time . Hour / time . Millisecond )
2020-08-10 22:56:08 -07:00
// Block dir suffixes to make deletion and creation operations atomic.
// We decided to do suffixes instead of creating meta.json as last (or delete as first) one,
// because in error case you still can recover meta.json from the block content within local TSDB dir.
// TODO(bwplotka): TSDB can end up with various .tmp files (e.g meta.json.tmp, WAL or segment tmp file. Think
// about removing those too on start to save space. Currently only blocks tmp dirs are removed.
tmpForDeletionBlockDirSuffix = ".tmp-for-deletion"
tmpForCreationBlockDirSuffix = ".tmp-for-creation"
2021-01-09 01:02:26 -08:00
// Pre-2.21 tmp dir suffix, used in clean-up functions.
tmpLegacy = ".tmp"
2020-02-06 07:58:38 -08:00
)
2021-10-22 01:06:44 -07:00
// ErrNotReady is returned if the underlying storage is not ready yet.
var ErrNotReady = errors . New ( "TSDB not ready" )
2019-11-21 04:10:25 -08:00
2022-11-28 09:09:18 -08:00
// DefaultOptions used for the DB. They are reasonable for setups using
2018-03-02 03:12:32 -08:00
// millisecond precision timestamps.
2020-02-06 07:58:38 -08:00
func DefaultOptions ( ) * Options {
return & Options {
remote write 2.0: sync with `main` branch (#13510)
* consoles: exclude iowait and steal from CPU Utilisation
'iowait' and 'steal' indicate specific idle/wait states, which shouldn't
be counted into CPU Utilisation. Also see
https://github.com/prometheus-operator/kube-prometheus/pull/796 and
https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667.
Per the iostat man page:
%idle
Show the percentage of time that the CPU or CPUs were idle and the
system did not have an outstanding disk I/O request.
%iowait
Show the percentage of time that the CPU or CPUs were idle during
which the system had an outstanding disk I/O request.
%steal
Show the percentage of time spent in involuntary wait by the
virtual CPU or CPUs while the hypervisor was servicing another
virtual processor.
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
* tsdb: shrink txRing with smaller integers
4 billion active transactions ought to be enough for anyone.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: create isolation transaction slice on demand
When Prometheus restarts it creates every series read in from the WAL,
but many of those series will be finished, and never receive any more
samples. By defering allocation of the txRing slice to when it is first
needed, we save 32 bytes per stale series.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* add cluster variable to Overview dashboard
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* promql: simplify Native Histogram arithmetics
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
* Cut 2.49.0-rc.0 (#13270)
* Cut 2.49.0-rc.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Removed the duplicate.
Signed-off-by: bwplotka <bwplotka@gmail.com>
---------
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Add unit protobuf parser
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Go on adding protobuf parsing for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* ui: create a reproduction for https://github.com/prometheus/prometheus/issues/13292
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Get conditional right
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Get VM Scale Set NIC (#13283)
Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set
VM NIC, because these use a different Resource ID format.
Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()`
instead. This needs both the scale set name and the instance ID, so
add an `InstanceID` field to the `virtualMachine` struct. `InstanceID`
is empty for a VM that isn't a ScaleSetVM.
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
* Cut v2.49.0-rc.1
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Delete debugging lines, amend error message for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Correct order in error message
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Consider storage.ErrTooOldSample as non-retryable
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
* scrape_test.go: Increase scrape interval in TestScrapeLoopCache to reduce potential flakiness
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Avoid creating string for suffix, consider counters without _total suffix
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* build(deps): bump github.com/prometheus/client_golang
Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.17.0 to 1.18.0.
- [Release notes](https://github.com/prometheus/client_golang/releases)
- [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/client_golang/compare/v1.17.0...v1.18.0)
---
updated-dependencies:
- dependency-name: github.com/prometheus/client_golang
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump actions/setup-node from 3.8.1 to 4.0.1
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3.8.1 to 4.0.1.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d...b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8)
---
updated-dependencies:
- dependency-name: actions/setup-node
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
* scripts: sort file list in embed directive
Otherwise the resulting string depends on find, which afaict depends on
the underlying filesystem. A stable file list make it easier to detect
UI changes in downstreams that need to track UI assets.
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
* Fix DataTableProps['data'] for resultType string
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* Fix handling of scalar and string in isHeatmapData
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* build(deps): bump github.com/influxdata/influxdb
Bumps [github.com/influxdata/influxdb](https://github.com/influxdata/influxdb) from 1.11.2 to 1.11.4.
- [Release notes](https://github.com/influxdata/influxdb/releases)
- [Commits](https://github.com/influxdata/influxdb/compare/v1.11.2...v1.11.4)
---
updated-dependencies:
- dependency-name: github.com/influxdata/influxdb
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump github.com/prometheus/prometheus
Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.48.0 to 0.48.1.
- [Release notes](https://github.com/prometheus/prometheus/releases)
- [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/prometheus/compare/v0.48.0...v0.48.1)
---
updated-dependencies:
- dependency-name: github.com/prometheus/prometheus
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* Bump client_golang to v1.18.0 (#13373)
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Drop old inmemory samples (#13002)
* Drop old inmemory samples
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Avoid copying timeseries when the feature is disabled
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Run gofmt
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Clarify docs
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Add more logging info
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Remove loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* optimize function and add tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Simplify filter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Update help info from metrics
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use metrics to keep track of drop elements during buildWriteRequest
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var in tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* pass time.Now as parameter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Change buildwriterequest during retries
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Revert "Remove loggers"
This reverts commit 54f91dfcae20488944162335ab4ad8be459df1ab.
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use log level debug for loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Fix linter
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove noisy debug-level logs; add 'reason' label to drop metrics
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove accidentally committed files
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Propagate logger to buildWriteRequest to log dropped data
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix docs comment
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Make drop reason more specific
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove unnecessary pass of logger
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Use snake_case for reason label
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix dropped samples metric
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
---------
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
* fix(discovery): allow requireUpdate util to timeout in discovery/file/file_test.go.
The loop ran indefinitely if the condition isn't met.
Before, each iteration created a new timer channel which was always outpaced by
the other timer channel with smaller duration.
minor detail: There was a memory leak: resources of the ~10 previous timers were
constantly kept. With the fix, we may keep the resources of one timer around for defaultWait
but this isn't worth the changes to make it right.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Merge pull request #13371 from kevinmingtarja/fix-isHeatmapData
ui: fix handling of scalar and string in isHeatmapData
* tsdb/{index,compact}: allow using custom postings encoding format (#13242)
* tsdb/{index,compact}: allow using custom postings encoding format
We would like to experiment with a different postings encoding format in
Thanos so in this change I am proposing adding another argument to
`NewWriter` which would allow users to change the format if needed.
Also, wire the leveled compactor so that it would be possible to change
the format there too.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb/compact: use a struct for leveled compactor options
As discussed on Slack, let's use a struct for the options in leveled
compactor.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: make changes after Bryan's review
- Make changes less intrusive
- Turn the postings encoder type into a function
- Add NewWriterWithEncoder()
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
---------
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0-rc.2
Signed-off-by: bwplotka <bwplotka@gmail.com>
* build(deps): bump actions/setup-go from 3.5.0 to 5.0.0 in /scripts (#13362)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3.5.0 to 5.0.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/6edd4406fa81c3da01a34fa6f6343087c207a568...0c52d547c9bc32b1aa3301fd7a9cb496313a4491)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump github/codeql-action from 2.22.8 to 3.22.12 (#13358)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.8 to 3.22.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/407ffafae6a767df3e0230c3df91b6443ae8df75...012739e5082ff0c22ca6d6ab32e07c36df03c4a4)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* put @nexucis has a release shepherd (#13383)
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
* Add analyze histograms command to promtool (#12331)
Add `query analyze` command to promtool
This command analyzes the buckets of classic and native histograms,
based on data queried from the Prometheus query API, i.e. it
doesn't require direct access to the TSDB files.
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* included instance in all necessary descriptions
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* tsdb/compact: fix passing merge func
Fixing a very small logical problem I've introduced :(.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: add enable overlapping compaction
This functionality is needed in downstream projects because they have a
separate component that does compaction.
Upstreaming
https://github.com/grafana/mimir-prometheus/blob/7c8e9a2a76fc729e9078889782928b2fdfe240e9/tsdb/compact.go#L323-L325.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* promtool: allow setting multiple matchers to "promtool tsdb dump" command. (#13296)
Conditions are ANDed inside the same matcher but matchers are ORed
Including unit tests for "promtool tsdb dump".
Refactor some matchers scraping utils.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Fixed changelog
Signed-off-by: bwplotka <bwplotka@gmail.com>
* tsdb/main: wire "EnableOverlappingCompaction" to tsdb.Options (#13398)
This added the https://github.com/prometheus/prometheus/pull/13393
"EnableOverlappingCompaction" parameter to the compactor code but not to
the tsdb.Options. I forgot about that. Add it to `tsdb.Options` too and
set it to `true` in Prometheus.
Copy/paste the description from
https://github.com/prometheus/prometheus/pull/13393#issuecomment-1891787986
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Issue #13268: fix quality value in accept header
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
* Cut 2.49.1 with scrape q= bugfix.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Cut 2.49.1 web package.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Restore more efficient version of NewPossibleNonCounterInfo annotation (#13022)
Restore more efficient version of NewPossibleNonCounterInfo annotation
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Fix regressions introduced by #13242
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* fix slice copy in 1.20 (#13389)
The slices package is added to the standard library in Go 1.21;
we need to import from the exp area to maintain compatibility with Go 1.20.
Signed-off-by: tyltr <tylitianrui@126.com>
* Docs: Query Basics: link to rate (#10538)
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
* chore(kubernetes): check preconditions earlier and avoid unnecessary checks or iterations
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Examples: link to `rate` for new users (#10535)
* Examples: link to `rate` for new users
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
* promql: use natural sort in sort_by_label and sort_by_label_desc (#13411)
These functions are intended for humans, as robots can already sort the results
however they please. Humans like things sorted "naturally":
* https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
A similar thing has been done to Grafana, which is also used by humans:
* https://github.com/grafana/grafana/pull/78024
* https://github.com/grafana/grafana/pull/78494
Signed-off-by: Ivan Babrou <github@ivan.computer>
* TestLabelValuesWithMatchers: Add test case
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* remove obsolete build tag
Signed-off-by: tyltr <tylitianrui@126.com>
* Upgrade some golang dependencies for resty 2.11
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
* Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config (#13222)
Native Histograms: support native_histogram_min_bucket_factor in scrape_config
---------
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram (#13392)
Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Minor fixes to otlp vendor update script
Signed-off-by: Goutham <gouthamve@gmail.com>
* build(deps): bump github.com/hetznercloud/hcloud-go/v2
Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.4.0 to 2.6.0.
- [Release notes](https://github.com/hetznercloud/hcloud-go/releases)
- [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.4.0...v2.6.0)
---
updated-dependencies:
- dependency-name: github.com/hetznercloud/hcloud-go/v2
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* Enhanced visibility for `promtool test rules` with JSON colored formatting (#13342)
* Added diff flag for unit test to improvise readability & debugging
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Removed blank spaces
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed linting error
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Added cli flags to documentation
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Revert unrrelated linting fixes
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed review suggestions
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Cleanup
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
---------
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* storage: skip merging when no remote storage configured
Prometheus is hard-coded to use a fanout storage between TSDB and
a remote storage which by default is empty.
This change detects the empty storage and skips merging between
result sets, which would make `Select()` sort results.
Bottom line: we skip a sort unless there really is some remote storage
configured.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Remove csmarchbanks from remote write owners (#13432)
I have not had the time to keep up with remote write and have no plans
to work on it in the near future so I am withdrawing my maintainership
of that part of the codebase. I continue to focus on client_python.
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
* add more context cancellation check at evaluation time
Signed-off-by: Ben Ye <benye@amazon.com>
* Optimize label values with matchers by taking shortcuts (#13426)
Don't calculate postings beforehand: we may not need them. If all
matchers are for the requested label, we can just filter its values.
Also, if there are no values at all, no need to run any kind of
logic.
Also add more labelValuesWithMatchers benchmarks
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Add automatic memory limit handling
Enable automatic detection of memory limits and configure GOMEMLIMIT to
match.
* Also includes a flag to allow controlling the reserved ratio.
Signed-off-by: SuperQ <superq@gmail.com>
* Update OSSF badge link (#13433)
Provide a more user friendly interface
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
* SD Managers taking over responsibility for registration of debug metrics (#13375)
SD Managers take over responsibility for SD metrics registration
---------
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Optimize histogram iterators (#13340)
Optimize histogram iterators
Histogram iterators allocate new objects in the AtHistogram and
AtFloatHistogram methods, which makes calculating rates over long
ranges expensive.
In #13215 we allowed an existing object to be reused
when converting an integer histogram to a float histogram. This commit follows
the same idea and allows injecting an existing object in the AtHistogram and
AtFloatHistogram methods. When the injected value is nil, iterators allocate
new histograms, otherwise they populate and return the injected object.
The commit also adds a CopyTo method to Histogram and FloatHistogram which
is used in the BufferedIterator to overwrite items in the ring instead of making
new copies.
Note that a specialized HPoint pool is needed for all of this to work
(`matrixSelectorHPool`).
---------
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
* doc: Mark `mad_over_time` as experimental (#13440)
We forgot to do that in
https://github.com/prometheus/prometheus/pull/13059
Signed-off-by: beorn7 <beorn@grafana.com>
* Change metric label for Puppetdb from 'http' to 'puppetdb'
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
* mirror metrics.proto change & generate code
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
* TestHeadLabelValuesWithMatchers: Add test case (#13414)
Add test case to TestHeadLabelValuesWithMatchers, while fixing a couple
of typos in other test cases. Also enclosing some implicit sub-tests in a
`t.Run` call to make them explicitly sub-tests.
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* update all go dependencies (#13438)
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
* build(deps): bump the k8s-io group with 2 updates (#13454)
Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).
Updates `k8s.io/api` from 0.28.4 to 0.29.1
- [Commits](https://github.com/kubernetes/api/compare/v0.28.4...v0.29.1)
Updates `k8s.io/client-go` from 0.28.4 to 0.29.1
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.4...v0.29.1)
---
updated-dependencies:
- dependency-name: k8s.io/api
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
- dependency-name: k8s.io/client-go
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump the go-opentelemetry-io group with 1 update (#13453)
Bumps the go-opentelemetry-io group with 1 update: [go.opentelemetry.io/collector/semconv](https://github.com/open-telemetry/opentelemetry-collector).
Updates `go.opentelemetry.io/collector/semconv` from 0.92.0 to 0.93.0
- [Release notes](https://github.com/open-telemetry/opentelemetry-collector/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-collector/blob/main/CHANGELOG-API.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-collector/compare/v0.92.0...v0.93.0)
---
updated-dependencies:
- dependency-name: go.opentelemetry.io/collector/semconv
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: go-opentelemetry-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump actions/upload-artifact from 3.1.3 to 4.0.0 (#13355)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump bufbuild/buf-push-action (#13357)
Bumps [bufbuild/buf-push-action](https://github.com/bufbuild/buf-push-action) from 342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 to a654ff18effe4641ebea4a4ce242c49800728459.
- [Release notes](https://github.com/bufbuild/buf-push-action/releases)
- [Commits](https://github.com/bufbuild/buf-push-action/compare/342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1...a654ff18effe4641ebea4a4ce242c49800728459)
---
updated-dependencies:
- dependency-name: bufbuild/buf-push-action
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* Labels: Add DropMetricName function, used in PromQL (#13446)
This function is called very frequently when executing PromQL functions,
and we can do it much more efficiently inside Labels.
In the common case that `__name__` comes first in the labels, we simply
re-point to start at the next label, which is nearly free.
`DropMetricName` is now so cheap I removed the cache - benchmarks show
everything still goes faster.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: simplify internal series delete function (#13261)
Lifting an optimisation from Agent code, `seriesHashmap.del` can use
the unique series reference, doesn't need to check Labels.
Also streamline the logic for deleting from `unique` and `conflicts` maps,
and add some comments to help the next person.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* otlptranslator/update-copy.sh: Fix sed command lines
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Rollback k8s.io requirements (#13462)
Rollback k8s.io Go modules to v0.28.6 to avoid forcing upgrade of Go to
1.21. This allows us to keep compatibility with the currently supported
upstream Go releases.
Signed-off-by: SuperQ <superq@gmail.com>
* Make update-copy.sh work for both OSX and GNU sed
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Name @beorn7 and @krajorama as maintainers for native histograms
I have been the de-facto maintainer for native histograms from the
beginning. So let's put this into MAINTAINERS.md.
In addition, I hereby proposose George Krajcsovits AKA Krajo as a
co-maintainer. He has contributed a lot of native histogram code, but
more importantly, he has contributed substantially to reviewing other
contributors' native histogram code, up to a point where I was merely
rubberstamping the PRs he had already reviewed. I'm confident that he
is ready to to be granted commit rights as outlined in the
"Maintainers" section of the governance:
https://prometheus.io/governance/#maintainers
According to the same section of the governance, I will announce the
proposed change on the developers mailing list and will give some time
for lazy consensus before merging this PR.
Signed-off-by: beorn7 <beorn@grafana.com>
* ui/fix: correct url handling for stacked graphs (#13460)
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
* tsdb: use cheaper Mutex on series
Mutex is 8 bytes; RWMutex is 24 bytes and much more complicated. Since
`RLock` is only used in two places, `UpdateMetadata` and `Delete`,
neither of which are hotspots, we should use the cheaper one.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Fix last_over_time for native histograms
The last_over_time retains a histogram sample without making a copy.
This sample is now coming from the buffered iterator used for windowing functions,
and can be reused for reading subsequent samples as the iterator progresses.
I would propose copying the sample in the last_over_time function, similar to
how it is done for rate, sum_over_time and others.
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
* Implementation
NOTE:
Rebased from main after refactor in #13014
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Add feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactor concurrency control
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Optimising dependencies/dependents funcs to not produce new slices each request
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Rename flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring for performance, and to allow controller to be overridden
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Block until all rules, both sync & async, have completed evaluating
Updated & added tests
Review feedback nits
Return empty map if not indeterminate
Use highWatermark to track inflight requests counter
Appease the linter
Clarify feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Fix typo in CLI flag description
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Fixed auto-generated doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improve doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Simplify the design to update concurrency controller once the rule evaluation has done
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Add more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Added more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improved RuleConcurrencyController interface doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Introduced sequentialRuleEvalController
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Remove superfluous nil check in Group.metrics
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* api: Serialize discovered and target labels into JSON directly (#13469)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* api: Serialize discovered labels into JSON directly in dropped targets (#13484)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* Add ShardedPostings() support to TSDB (#10421)
This PR is a reference implementation of the proposal described in #10420.
In addition to what described in #10420, in this PR I've introduced labels.StableHash(). The idea is to offer an hashing function which doesn't change over time, and that's used by query sharding in order to get a stable behaviour over time. The implementation of labels.StableHash() is the hashing function used by Prometheus before stringlabels, and what's used by Grafana Mimir for query sharding (because built before stringlabels was a thing).
Follow up work
As mentioned in #10420, if this PR is accepted I'm also open to upload another foundamental piece used by Grafana Mimir query sharding to accelerate the query execution: an optional, configurable and fast in-memory cache for the series hashes.
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* storage/remote: document why two benchmarks are skipped
One was silently doing nothing; one was doing something but the work
didn't go up linearly with iteration count.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Pod status changes not discovered by Kube Endpoints SD (#13337)
* fix(discovery/kubernetes/endpoints): react to changes on Pods because some modifications can occur on them without triggering an update on the related Endpoints (The Pod phase changing from Pending to Running e.g.).
---------
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
* Small improvements, add const, remove copypasta (#8106)
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
* Proposal to improve FPointSlice and HPointSlice allocation. (#13448)
* Reusing points slice from previous series when the slice is under utilized
* Adding comments on the bench test
Signed-off-by: Alan Protasio <alanprot@gmail.com>
* lint
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
* go mod tidy
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
---------
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
Signed-off-by: Erik Sommer <ersotech@posteo.de>
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
Signed-off-by: bwplotka <bwplotka@gmail.com>
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: tyltr <tylitianrui@126.com>
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Signed-off-by: Ivan Babrou <github@ivan.computer>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Signed-off-by: Goutham <gouthamve@gmail.com>
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
Signed-off-by: Ben Ye <benye@amazon.com>
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Signed-off-by: beorn7 <beorn@grafana.com>
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
Signed-off-by: Alan Protasio <alanprot@gmail.com>
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
Co-authored-by: Julian Wiedmann <jwi@linux.ibm.com>
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
Co-authored-by: Erik Sommer <ersotech@posteo.de>
Co-authored-by: Linas Medziunas <linas.medziunas@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Co-authored-by: Arianna Vespri <arianna.vespri@yahoo.it>
Co-authored-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com>
Co-authored-by: Daniel Kerbel <nmdanny@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jan Fajerski <jfajersk@redhat.com>
Co-authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Marc Tudurí <marctc@protonmail.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Co-authored-by: Augustin Husson <husson.augustin@gmail.com>
Co-authored-by: Björn Rabenstein <beorn@grafana.com>
Co-authored-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
Co-authored-by: Ben Kochie <superq@gmail.com>
Co-authored-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: tyltr <tylitianrui@126.com>
Co-authored-by: Ted Robertson <10043369+tredondo@users.noreply.github.com>
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
Co-authored-by: Matthias Loibl <mail@matthiasloibl.com>
Co-authored-by: Ivan Babrou <github@ivan.computer>
Co-authored-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: Israel Blancas <iblancasa@gmail.com>
Co-authored-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Goutham <gouthamve@gmail.com>
Co-authored-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Co-authored-by: Chris Marchbanks <csmarchbanks@gmail.com>
Co-authored-by: Ben Ye <benye@amazon.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Co-authored-by: Paulin Todev <paulin.todev@gmail.com>
Co-authored-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
Co-authored-by: Mikhail Fesenko <proggga@gmail.com>
Co-authored-by: Alan Protasio <alanprot@gmail.com>
2024-02-02 10:38:50 -08:00
WALSegmentSize : wlog . DefaultSegmentSize ,
MaxBlockChunkSegmentSize : chunks . DefaultChunkSegmentSize ,
RetentionDuration : int64 ( 15 * 24 * time . Hour / time . Millisecond ) ,
MinBlockDuration : DefaultBlockDuration ,
MaxBlockDuration : DefaultBlockDuration ,
NoLockfile : false ,
SamplesPerChunk : DefaultSamplesPerChunk ,
WALCompression : wlog . CompressionNone ,
StripeSize : DefaultStripeSize ,
HeadChunksWriteBufferSize : chunks . DefaultWriteBufferSize ,
IsolationDisabled : defaultIsolationDisabled ,
HeadChunksWriteQueueSize : chunks . DefaultWriteQueueSize ,
OutOfOrderCapMax : DefaultOutOfOrderCapMax ,
EnableOverlappingCompaction : true ,
EnableSharding : false ,
2020-02-06 07:58:38 -08:00
}
2016-11-15 01:34:25 -08:00
}
// Options of the DB storage.
type Options struct {
2019-03-25 16:38:12 -07:00
// Segments (wal files) max size.
// WALSegmentSize = 0, segment size is default size.
// WALSegmentSize > 0, segment size is WALSegmentSize.
// WALSegmentSize < 0, wal is disabled.
2020-02-11 08:34:09 -08:00
WALSegmentSize int
2017-01-28 23:11:47 -08:00
2021-04-15 01:55:01 -07:00
// MaxBlockChunkSegmentSize is the max size of block chunk segment files.
// MaxBlockChunkSegmentSize = 0, chunk segment size is default size.
// MaxBlockChunkSegmentSize > 0, chunk segment size is MaxBlockChunkSegmentSize.
MaxBlockChunkSegmentSize int64
2017-02-09 17:54:26 -08:00
// Duration of persisted data to keep.
2020-02-11 08:34:09 -08:00
// Unit agnostic as long as unit is consistent with MinBlockDuration and MaxBlockDuration.
// Typically it is in milliseconds.
RetentionDuration int64
2017-02-09 17:54:26 -08:00
2019-01-16 02:03:52 -08:00
// Maximum number of bytes in blocks to be retained.
// 0 or less means disabled.
// NOTE: For proper storage calculations need to consider
// the size of the WAL folder which is not added when calculating
// the current size of the database.
2020-02-11 08:34:09 -08:00
MaxBytes int64
2017-01-28 23:11:47 -08:00
2017-05-09 03:52:47 -07:00
// NoLockfile disables creation and consideration of a lock file.
NoLockfile bool
2019-02-26 11:50:37 -08:00
2023-07-11 05:57:57 -07:00
// WALCompression configures the compression type to use on records in the WAL.
WALCompression wlog . CompressionType
2020-01-29 23:12:43 -08:00
2023-03-07 08:41:33 -08:00
// Maximum number of CPUs that can simultaneously processes WAL replay.
// If it is <=0, then GOMAXPROCS is used.
WALReplayConcurrency int
2020-02-17 03:45:11 -08:00
// StripeSize is the size in entries of the series hash map. Reducing the size will save memory but impact performance.
2020-01-29 23:12:43 -08:00
StripeSize int
2016-11-15 01:34:25 -08:00
2020-02-06 07:58:38 -08:00
// The timestamp range of head blocks after which they get persisted.
// It's the minimum duration of any persisted block.
2020-02-11 08:34:09 -08:00
// Unit agnostic as long as unit is consistent with RetentionDuration and MaxBlockDuration.
// Typically it is in milliseconds.
MinBlockDuration int64
2020-02-06 07:58:38 -08:00
// The maximum timestamp range of compacted blocks.
2020-02-11 08:34:09 -08:00
// Unit agnostic as long as unit is consistent with MinBlockDuration and RetentionDuration.
// Typically it is in milliseconds.
MaxBlockDuration int64
2020-05-20 06:22:08 -07:00
2020-11-19 05:00:47 -08:00
// HeadChunksWriteBufferSize configures the write buffer size used by the head chunks mapper.
HeadChunksWriteBufferSize int
2022-01-10 05:36:45 -08:00
// HeadChunksWriteQueueSize configures the size of the chunk write queue used in the head chunks mapper.
HeadChunksWriteQueueSize int
2023-04-12 09:48:35 -07:00
// SamplesPerChunk configures the target number of samples per chunk.
SamplesPerChunk int
2020-05-20 06:22:08 -07:00
// SeriesLifecycleCallback specifies a list of callbacks that will be called during a lifecycle of a series.
// It is always a no-op in Prometheus and mainly meant for external users who import TSDB.
SeriesLifecycleCallback SeriesLifecycleCallback
2020-07-22 08:19:33 -07:00
// BlocksToDelete is a function which returns the blocks which can be deleted.
// It is always the default time and size based retention in Prometheus and
// mainly meant for external users who import TSDB.
BlocksToDelete BlocksToDeleteFunc
2021-03-16 02:47:45 -07:00
2021-11-19 02:11:32 -08:00
// Enables the in memory exemplar storage.
2021-07-19 21:52:57 -07:00
EnableExemplarStorage bool
2021-08-06 09:51:01 -07:00
// Enables the snapshot of in-memory chunks on shutdown. This makes restarts faster.
EnableMemorySnapshotOnShutdown bool
2021-03-16 02:47:45 -07:00
// MaxExemplars sets the size, in # of exemplars stored, of the single circular buffer used to store exemplars in memory.
// See tsdb/exemplar.go, specifically the CircularExemplarStorage struct and it's constructor NewCircularExemplarStorage.
2021-07-19 21:52:57 -07:00
MaxExemplars int64
2021-11-19 02:11:32 -08:00
// Disables isolation between reads and in-flight appends.
IsolationDisabled bool
2022-09-20 10:05:50 -07:00
2022-09-14 05:08:34 -07:00
// EnableNativeHistograms enables the ingestion of native histograms.
EnableNativeHistograms bool
2022-10-05 13:14:49 -07:00
2022-09-20 10:05:50 -07:00
// OutOfOrderTimeWindow specifies how much out of order is allowed, if any.
// This can change during run-time, so this value from here should only be used
// while initialising.
OutOfOrderTimeWindow int64
// OutOfOrderCapMax is maximum capacity for OOO chunks (in samples).
// If it is <=0, the default value is assumed.
OutOfOrderCapMax int64
remote write 2.0: sync with `main` branch (#13510)
* consoles: exclude iowait and steal from CPU Utilisation
'iowait' and 'steal' indicate specific idle/wait states, which shouldn't
be counted into CPU Utilisation. Also see
https://github.com/prometheus-operator/kube-prometheus/pull/796 and
https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667.
Per the iostat man page:
%idle
Show the percentage of time that the CPU or CPUs were idle and the
system did not have an outstanding disk I/O request.
%iowait
Show the percentage of time that the CPU or CPUs were idle during
which the system had an outstanding disk I/O request.
%steal
Show the percentage of time spent in involuntary wait by the
virtual CPU or CPUs while the hypervisor was servicing another
virtual processor.
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
* tsdb: shrink txRing with smaller integers
4 billion active transactions ought to be enough for anyone.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: create isolation transaction slice on demand
When Prometheus restarts it creates every series read in from the WAL,
but many of those series will be finished, and never receive any more
samples. By defering allocation of the txRing slice to when it is first
needed, we save 32 bytes per stale series.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* add cluster variable to Overview dashboard
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* promql: simplify Native Histogram arithmetics
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
* Cut 2.49.0-rc.0 (#13270)
* Cut 2.49.0-rc.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Removed the duplicate.
Signed-off-by: bwplotka <bwplotka@gmail.com>
---------
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Add unit protobuf parser
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Go on adding protobuf parsing for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* ui: create a reproduction for https://github.com/prometheus/prometheus/issues/13292
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Get conditional right
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Get VM Scale Set NIC (#13283)
Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set
VM NIC, because these use a different Resource ID format.
Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()`
instead. This needs both the scale set name and the instance ID, so
add an `InstanceID` field to the `virtualMachine` struct. `InstanceID`
is empty for a VM that isn't a ScaleSetVM.
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
* Cut v2.49.0-rc.1
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Delete debugging lines, amend error message for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Correct order in error message
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Consider storage.ErrTooOldSample as non-retryable
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
* scrape_test.go: Increase scrape interval in TestScrapeLoopCache to reduce potential flakiness
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Avoid creating string for suffix, consider counters without _total suffix
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* build(deps): bump github.com/prometheus/client_golang
Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.17.0 to 1.18.0.
- [Release notes](https://github.com/prometheus/client_golang/releases)
- [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/client_golang/compare/v1.17.0...v1.18.0)
---
updated-dependencies:
- dependency-name: github.com/prometheus/client_golang
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump actions/setup-node from 3.8.1 to 4.0.1
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3.8.1 to 4.0.1.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d...b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8)
---
updated-dependencies:
- dependency-name: actions/setup-node
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
* scripts: sort file list in embed directive
Otherwise the resulting string depends on find, which afaict depends on
the underlying filesystem. A stable file list make it easier to detect
UI changes in downstreams that need to track UI assets.
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
* Fix DataTableProps['data'] for resultType string
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* Fix handling of scalar and string in isHeatmapData
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* build(deps): bump github.com/influxdata/influxdb
Bumps [github.com/influxdata/influxdb](https://github.com/influxdata/influxdb) from 1.11.2 to 1.11.4.
- [Release notes](https://github.com/influxdata/influxdb/releases)
- [Commits](https://github.com/influxdata/influxdb/compare/v1.11.2...v1.11.4)
---
updated-dependencies:
- dependency-name: github.com/influxdata/influxdb
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump github.com/prometheus/prometheus
Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.48.0 to 0.48.1.
- [Release notes](https://github.com/prometheus/prometheus/releases)
- [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/prometheus/compare/v0.48.0...v0.48.1)
---
updated-dependencies:
- dependency-name: github.com/prometheus/prometheus
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* Bump client_golang to v1.18.0 (#13373)
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Drop old inmemory samples (#13002)
* Drop old inmemory samples
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Avoid copying timeseries when the feature is disabled
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Run gofmt
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Clarify docs
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Add more logging info
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Remove loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* optimize function and add tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Simplify filter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Update help info from metrics
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use metrics to keep track of drop elements during buildWriteRequest
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var in tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* pass time.Now as parameter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Change buildwriterequest during retries
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Revert "Remove loggers"
This reverts commit 54f91dfcae20488944162335ab4ad8be459df1ab.
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use log level debug for loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Fix linter
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove noisy debug-level logs; add 'reason' label to drop metrics
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove accidentally committed files
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Propagate logger to buildWriteRequest to log dropped data
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix docs comment
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Make drop reason more specific
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove unnecessary pass of logger
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Use snake_case for reason label
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix dropped samples metric
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
---------
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
* fix(discovery): allow requireUpdate util to timeout in discovery/file/file_test.go.
The loop ran indefinitely if the condition isn't met.
Before, each iteration created a new timer channel which was always outpaced by
the other timer channel with smaller duration.
minor detail: There was a memory leak: resources of the ~10 previous timers were
constantly kept. With the fix, we may keep the resources of one timer around for defaultWait
but this isn't worth the changes to make it right.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Merge pull request #13371 from kevinmingtarja/fix-isHeatmapData
ui: fix handling of scalar and string in isHeatmapData
* tsdb/{index,compact}: allow using custom postings encoding format (#13242)
* tsdb/{index,compact}: allow using custom postings encoding format
We would like to experiment with a different postings encoding format in
Thanos so in this change I am proposing adding another argument to
`NewWriter` which would allow users to change the format if needed.
Also, wire the leveled compactor so that it would be possible to change
the format there too.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb/compact: use a struct for leveled compactor options
As discussed on Slack, let's use a struct for the options in leveled
compactor.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: make changes after Bryan's review
- Make changes less intrusive
- Turn the postings encoder type into a function
- Add NewWriterWithEncoder()
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
---------
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0-rc.2
Signed-off-by: bwplotka <bwplotka@gmail.com>
* build(deps): bump actions/setup-go from 3.5.0 to 5.0.0 in /scripts (#13362)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3.5.0 to 5.0.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/6edd4406fa81c3da01a34fa6f6343087c207a568...0c52d547c9bc32b1aa3301fd7a9cb496313a4491)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump github/codeql-action from 2.22.8 to 3.22.12 (#13358)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.8 to 3.22.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/407ffafae6a767df3e0230c3df91b6443ae8df75...012739e5082ff0c22ca6d6ab32e07c36df03c4a4)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* put @nexucis has a release shepherd (#13383)
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
* Add analyze histograms command to promtool (#12331)
Add `query analyze` command to promtool
This command analyzes the buckets of classic and native histograms,
based on data queried from the Prometheus query API, i.e. it
doesn't require direct access to the TSDB files.
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* included instance in all necessary descriptions
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* tsdb/compact: fix passing merge func
Fixing a very small logical problem I've introduced :(.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: add enable overlapping compaction
This functionality is needed in downstream projects because they have a
separate component that does compaction.
Upstreaming
https://github.com/grafana/mimir-prometheus/blob/7c8e9a2a76fc729e9078889782928b2fdfe240e9/tsdb/compact.go#L323-L325.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* promtool: allow setting multiple matchers to "promtool tsdb dump" command. (#13296)
Conditions are ANDed inside the same matcher but matchers are ORed
Including unit tests for "promtool tsdb dump".
Refactor some matchers scraping utils.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Fixed changelog
Signed-off-by: bwplotka <bwplotka@gmail.com>
* tsdb/main: wire "EnableOverlappingCompaction" to tsdb.Options (#13398)
This added the https://github.com/prometheus/prometheus/pull/13393
"EnableOverlappingCompaction" parameter to the compactor code but not to
the tsdb.Options. I forgot about that. Add it to `tsdb.Options` too and
set it to `true` in Prometheus.
Copy/paste the description from
https://github.com/prometheus/prometheus/pull/13393#issuecomment-1891787986
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Issue #13268: fix quality value in accept header
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
* Cut 2.49.1 with scrape q= bugfix.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Cut 2.49.1 web package.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Restore more efficient version of NewPossibleNonCounterInfo annotation (#13022)
Restore more efficient version of NewPossibleNonCounterInfo annotation
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Fix regressions introduced by #13242
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* fix slice copy in 1.20 (#13389)
The slices package is added to the standard library in Go 1.21;
we need to import from the exp area to maintain compatibility with Go 1.20.
Signed-off-by: tyltr <tylitianrui@126.com>
* Docs: Query Basics: link to rate (#10538)
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
* chore(kubernetes): check preconditions earlier and avoid unnecessary checks or iterations
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Examples: link to `rate` for new users (#10535)
* Examples: link to `rate` for new users
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
* promql: use natural sort in sort_by_label and sort_by_label_desc (#13411)
These functions are intended for humans, as robots can already sort the results
however they please. Humans like things sorted "naturally":
* https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
A similar thing has been done to Grafana, which is also used by humans:
* https://github.com/grafana/grafana/pull/78024
* https://github.com/grafana/grafana/pull/78494
Signed-off-by: Ivan Babrou <github@ivan.computer>
* TestLabelValuesWithMatchers: Add test case
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* remove obsolete build tag
Signed-off-by: tyltr <tylitianrui@126.com>
* Upgrade some golang dependencies for resty 2.11
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
* Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config (#13222)
Native Histograms: support native_histogram_min_bucket_factor in scrape_config
---------
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram (#13392)
Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Minor fixes to otlp vendor update script
Signed-off-by: Goutham <gouthamve@gmail.com>
* build(deps): bump github.com/hetznercloud/hcloud-go/v2
Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.4.0 to 2.6.0.
- [Release notes](https://github.com/hetznercloud/hcloud-go/releases)
- [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.4.0...v2.6.0)
---
updated-dependencies:
- dependency-name: github.com/hetznercloud/hcloud-go/v2
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* Enhanced visibility for `promtool test rules` with JSON colored formatting (#13342)
* Added diff flag for unit test to improvise readability & debugging
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Removed blank spaces
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed linting error
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Added cli flags to documentation
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Revert unrrelated linting fixes
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed review suggestions
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Cleanup
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
---------
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* storage: skip merging when no remote storage configured
Prometheus is hard-coded to use a fanout storage between TSDB and
a remote storage which by default is empty.
This change detects the empty storage and skips merging between
result sets, which would make `Select()` sort results.
Bottom line: we skip a sort unless there really is some remote storage
configured.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Remove csmarchbanks from remote write owners (#13432)
I have not had the time to keep up with remote write and have no plans
to work on it in the near future so I am withdrawing my maintainership
of that part of the codebase. I continue to focus on client_python.
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
* add more context cancellation check at evaluation time
Signed-off-by: Ben Ye <benye@amazon.com>
* Optimize label values with matchers by taking shortcuts (#13426)
Don't calculate postings beforehand: we may not need them. If all
matchers are for the requested label, we can just filter its values.
Also, if there are no values at all, no need to run any kind of
logic.
Also add more labelValuesWithMatchers benchmarks
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Add automatic memory limit handling
Enable automatic detection of memory limits and configure GOMEMLIMIT to
match.
* Also includes a flag to allow controlling the reserved ratio.
Signed-off-by: SuperQ <superq@gmail.com>
* Update OSSF badge link (#13433)
Provide a more user friendly interface
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
* SD Managers taking over responsibility for registration of debug metrics (#13375)
SD Managers take over responsibility for SD metrics registration
---------
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Optimize histogram iterators (#13340)
Optimize histogram iterators
Histogram iterators allocate new objects in the AtHistogram and
AtFloatHistogram methods, which makes calculating rates over long
ranges expensive.
In #13215 we allowed an existing object to be reused
when converting an integer histogram to a float histogram. This commit follows
the same idea and allows injecting an existing object in the AtHistogram and
AtFloatHistogram methods. When the injected value is nil, iterators allocate
new histograms, otherwise they populate and return the injected object.
The commit also adds a CopyTo method to Histogram and FloatHistogram which
is used in the BufferedIterator to overwrite items in the ring instead of making
new copies.
Note that a specialized HPoint pool is needed for all of this to work
(`matrixSelectorHPool`).
---------
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
* doc: Mark `mad_over_time` as experimental (#13440)
We forgot to do that in
https://github.com/prometheus/prometheus/pull/13059
Signed-off-by: beorn7 <beorn@grafana.com>
* Change metric label for Puppetdb from 'http' to 'puppetdb'
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
* mirror metrics.proto change & generate code
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
* TestHeadLabelValuesWithMatchers: Add test case (#13414)
Add test case to TestHeadLabelValuesWithMatchers, while fixing a couple
of typos in other test cases. Also enclosing some implicit sub-tests in a
`t.Run` call to make them explicitly sub-tests.
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* update all go dependencies (#13438)
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
* build(deps): bump the k8s-io group with 2 updates (#13454)
Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).
Updates `k8s.io/api` from 0.28.4 to 0.29.1
- [Commits](https://github.com/kubernetes/api/compare/v0.28.4...v0.29.1)
Updates `k8s.io/client-go` from 0.28.4 to 0.29.1
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.4...v0.29.1)
---
updated-dependencies:
- dependency-name: k8s.io/api
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
- dependency-name: k8s.io/client-go
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump the go-opentelemetry-io group with 1 update (#13453)
Bumps the go-opentelemetry-io group with 1 update: [go.opentelemetry.io/collector/semconv](https://github.com/open-telemetry/opentelemetry-collector).
Updates `go.opentelemetry.io/collector/semconv` from 0.92.0 to 0.93.0
- [Release notes](https://github.com/open-telemetry/opentelemetry-collector/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-collector/blob/main/CHANGELOG-API.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-collector/compare/v0.92.0...v0.93.0)
---
updated-dependencies:
- dependency-name: go.opentelemetry.io/collector/semconv
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: go-opentelemetry-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump actions/upload-artifact from 3.1.3 to 4.0.0 (#13355)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump bufbuild/buf-push-action (#13357)
Bumps [bufbuild/buf-push-action](https://github.com/bufbuild/buf-push-action) from 342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 to a654ff18effe4641ebea4a4ce242c49800728459.
- [Release notes](https://github.com/bufbuild/buf-push-action/releases)
- [Commits](https://github.com/bufbuild/buf-push-action/compare/342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1...a654ff18effe4641ebea4a4ce242c49800728459)
---
updated-dependencies:
- dependency-name: bufbuild/buf-push-action
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* Labels: Add DropMetricName function, used in PromQL (#13446)
This function is called very frequently when executing PromQL functions,
and we can do it much more efficiently inside Labels.
In the common case that `__name__` comes first in the labels, we simply
re-point to start at the next label, which is nearly free.
`DropMetricName` is now so cheap I removed the cache - benchmarks show
everything still goes faster.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: simplify internal series delete function (#13261)
Lifting an optimisation from Agent code, `seriesHashmap.del` can use
the unique series reference, doesn't need to check Labels.
Also streamline the logic for deleting from `unique` and `conflicts` maps,
and add some comments to help the next person.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* otlptranslator/update-copy.sh: Fix sed command lines
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Rollback k8s.io requirements (#13462)
Rollback k8s.io Go modules to v0.28.6 to avoid forcing upgrade of Go to
1.21. This allows us to keep compatibility with the currently supported
upstream Go releases.
Signed-off-by: SuperQ <superq@gmail.com>
* Make update-copy.sh work for both OSX and GNU sed
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Name @beorn7 and @krajorama as maintainers for native histograms
I have been the de-facto maintainer for native histograms from the
beginning. So let's put this into MAINTAINERS.md.
In addition, I hereby proposose George Krajcsovits AKA Krajo as a
co-maintainer. He has contributed a lot of native histogram code, but
more importantly, he has contributed substantially to reviewing other
contributors' native histogram code, up to a point where I was merely
rubberstamping the PRs he had already reviewed. I'm confident that he
is ready to to be granted commit rights as outlined in the
"Maintainers" section of the governance:
https://prometheus.io/governance/#maintainers
According to the same section of the governance, I will announce the
proposed change on the developers mailing list and will give some time
for lazy consensus before merging this PR.
Signed-off-by: beorn7 <beorn@grafana.com>
* ui/fix: correct url handling for stacked graphs (#13460)
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
* tsdb: use cheaper Mutex on series
Mutex is 8 bytes; RWMutex is 24 bytes and much more complicated. Since
`RLock` is only used in two places, `UpdateMetadata` and `Delete`,
neither of which are hotspots, we should use the cheaper one.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Fix last_over_time for native histograms
The last_over_time retains a histogram sample without making a copy.
This sample is now coming from the buffered iterator used for windowing functions,
and can be reused for reading subsequent samples as the iterator progresses.
I would propose copying the sample in the last_over_time function, similar to
how it is done for rate, sum_over_time and others.
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
* Implementation
NOTE:
Rebased from main after refactor in #13014
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Add feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactor concurrency control
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Optimising dependencies/dependents funcs to not produce new slices each request
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Rename flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring for performance, and to allow controller to be overridden
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Block until all rules, both sync & async, have completed evaluating
Updated & added tests
Review feedback nits
Return empty map if not indeterminate
Use highWatermark to track inflight requests counter
Appease the linter
Clarify feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Fix typo in CLI flag description
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Fixed auto-generated doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improve doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Simplify the design to update concurrency controller once the rule evaluation has done
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Add more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Added more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improved RuleConcurrencyController interface doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Introduced sequentialRuleEvalController
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Remove superfluous nil check in Group.metrics
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* api: Serialize discovered and target labels into JSON directly (#13469)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* api: Serialize discovered labels into JSON directly in dropped targets (#13484)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* Add ShardedPostings() support to TSDB (#10421)
This PR is a reference implementation of the proposal described in #10420.
In addition to what described in #10420, in this PR I've introduced labels.StableHash(). The idea is to offer an hashing function which doesn't change over time, and that's used by query sharding in order to get a stable behaviour over time. The implementation of labels.StableHash() is the hashing function used by Prometheus before stringlabels, and what's used by Grafana Mimir for query sharding (because built before stringlabels was a thing).
Follow up work
As mentioned in #10420, if this PR is accepted I'm also open to upload another foundamental piece used by Grafana Mimir query sharding to accelerate the query execution: an optional, configurable and fast in-memory cache for the series hashes.
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* storage/remote: document why two benchmarks are skipped
One was silently doing nothing; one was doing something but the work
didn't go up linearly with iteration count.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Pod status changes not discovered by Kube Endpoints SD (#13337)
* fix(discovery/kubernetes/endpoints): react to changes on Pods because some modifications can occur on them without triggering an update on the related Endpoints (The Pod phase changing from Pending to Running e.g.).
---------
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
* Small improvements, add const, remove copypasta (#8106)
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
* Proposal to improve FPointSlice and HPointSlice allocation. (#13448)
* Reusing points slice from previous series when the slice is under utilized
* Adding comments on the bench test
Signed-off-by: Alan Protasio <alanprot@gmail.com>
* lint
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
* go mod tidy
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
---------
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
Signed-off-by: Erik Sommer <ersotech@posteo.de>
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
Signed-off-by: bwplotka <bwplotka@gmail.com>
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: tyltr <tylitianrui@126.com>
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Signed-off-by: Ivan Babrou <github@ivan.computer>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Signed-off-by: Goutham <gouthamve@gmail.com>
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
Signed-off-by: Ben Ye <benye@amazon.com>
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Signed-off-by: beorn7 <beorn@grafana.com>
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
Signed-off-by: Alan Protasio <alanprot@gmail.com>
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
Co-authored-by: Julian Wiedmann <jwi@linux.ibm.com>
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
Co-authored-by: Erik Sommer <ersotech@posteo.de>
Co-authored-by: Linas Medziunas <linas.medziunas@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Co-authored-by: Arianna Vespri <arianna.vespri@yahoo.it>
Co-authored-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com>
Co-authored-by: Daniel Kerbel <nmdanny@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jan Fajerski <jfajersk@redhat.com>
Co-authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Marc Tudurí <marctc@protonmail.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Co-authored-by: Augustin Husson <husson.augustin@gmail.com>
Co-authored-by: Björn Rabenstein <beorn@grafana.com>
Co-authored-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
Co-authored-by: Ben Kochie <superq@gmail.com>
Co-authored-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: tyltr <tylitianrui@126.com>
Co-authored-by: Ted Robertson <10043369+tredondo@users.noreply.github.com>
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
Co-authored-by: Matthias Loibl <mail@matthiasloibl.com>
Co-authored-by: Ivan Babrou <github@ivan.computer>
Co-authored-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: Israel Blancas <iblancasa@gmail.com>
Co-authored-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Goutham <gouthamve@gmail.com>
Co-authored-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Co-authored-by: Chris Marchbanks <csmarchbanks@gmail.com>
Co-authored-by: Ben Ye <benye@amazon.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Co-authored-by: Paulin Todev <paulin.todev@gmail.com>
Co-authored-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
Co-authored-by: Mikhail Fesenko <proggga@gmail.com>
Co-authored-by: Alan Protasio <alanprot@gmail.com>
2024-02-02 10:38:50 -08:00
// Compaction of overlapping blocks are allowed if EnableOverlappingCompaction is true.
// This is an optional flag for overlapping blocks.
// The reason why this flag exists is because there are various users of the TSDB
// that do not want vertical compaction happening on ingest time. Instead,
// they'd rather keep overlapping blocks and let another component do the overlapping compaction later.
// For Prometheus, this will always be true.
EnableOverlappingCompaction bool
// EnableSharding enables query sharding support in TSDB.
EnableSharding bool
2016-12-10 09:08:50 -08:00
}
2020-07-22 08:19:33 -07:00
type BlocksToDeleteFunc func ( blocks [ ] * Block ) map [ ulid . ULID ] struct { }
2017-01-06 02:40:09 -08:00
// DB handles reads and writes of time series falling into
// a hashed partition of a seriedb.
type DB struct {
2021-11-11 08:45:25 -08:00
dir string
locker * tsdbutil . DirLocker
2017-03-04 07:50:48 -08:00
2020-07-22 08:19:33 -07:00
logger log . Logger
metrics * dbMetrics
opts * Options
chunkPool chunkenc . Pool
compactor Compactor
blocksToDelete BlocksToDeleteFunc
2016-12-09 01:00:14 -08:00
2023-11-24 03:38:38 -08:00
// Mutex for that must be held when modifying the general block layout or lastGarbageCollectedMmapRef.
2017-03-20 00:41:56 -07:00
mtx sync . RWMutex
2017-10-09 06:21:46 -07:00
blocks [ ] * Block
2017-03-04 07:50:48 -08:00
2023-11-24 03:38:38 -08:00
// The last OOO chunk that was compacted and written to disk. New queriers must not read chunks less
// than or equal to this reference, as these chunks could be garbage collected at any time.
lastGarbageCollectedMmapRef chunks . ChunkDiskMapperRef
2017-08-28 15:39:17 -07:00
head * Head
2017-01-06 03:37:28 -08:00
compactc chan struct { }
donec chan struct { }
stopc chan struct { }
2017-05-20 00:51:10 -07:00
2018-11-20 02:34:26 -08:00
// cmtx ensures that compactions and deletions don't run simultaneously.
cmtx sync . Mutex
// autoCompactMtx ensures that no compaction gets triggered while
// changing the autoCompact var.
autoCompactMtx sync . Mutex
autoCompact bool
2018-12-05 08:34:42 -08:00
// Cancel a running compaction when a shutdown is initiated.
2019-02-06 04:07:35 -08:00
compactCancel context . CancelFunc
2022-09-20 10:05:50 -07:00
// oooWasEnabled is true if out of order support was enabled at least one time
// during the time TSDB was up. In which case we need to keep supporting
// out-of-order compaction and vertical queries.
oooWasEnabled atomic . Bool
2023-05-15 12:31:49 -07:00
writeNotified wlog . WriteNotified
2022-09-20 10:05:50 -07:00
registerer prometheus . Registerer
2016-12-09 01:00:14 -08:00
}
2017-01-06 02:40:09 -08:00
type dbMetrics struct {
2021-11-11 08:45:25 -08:00
loadedBlocks prometheus . GaugeFunc
symbolTableSize prometheus . GaugeFunc
reloads prometheus . Counter
reloadsFailed prometheus . Counter
compactionsFailed prometheus . Counter
compactionsTriggered prometheus . Counter
compactionsSkipped prometheus . Counter
sizeRetentionCount prometheus . Counter
timeRetentionCount prometheus . Counter
startTime prometheus . GaugeFunc
tombCleanTimer prometheus . Histogram
blocksBytes prometheus . Gauge
maxBytes prometheus . Gauge
2023-10-24 04:34:42 -07:00
retentionDuration prometheus . Gauge
2016-12-31 00:48:49 -08:00
}
2020-02-14 10:48:55 -08:00
func newDBMetrics ( db * DB , r prometheus . Registerer ) * dbMetrics {
2017-01-06 02:40:09 -08:00
m := & dbMetrics { }
2017-01-03 06:43:26 -08:00
2017-05-26 06:13:03 -07:00
m . loadedBlocks = prometheus . NewGaugeFunc ( prometheus . GaugeOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_blocks_loaded" ,
2017-05-26 06:13:03 -07:00
Help : "Number of currently loaded data blocks" ,
} , func ( ) float64 {
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
return float64 ( len ( db . blocks ) )
} )
2018-09-08 11:28:36 -07:00
m . symbolTableSize = prometheus . NewGaugeFunc ( prometheus . GaugeOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_symbol_table_size_bytes" ,
2020-10-21 06:35:40 -07:00
Help : "Size of symbol table in memory for loaded blocks" ,
2018-09-08 11:28:36 -07:00
} , func ( ) float64 {
db . mtx . RLock ( )
2023-04-09 00:08:40 -07:00
blocks := db . blocks
2018-09-08 11:28:36 -07:00
db . mtx . RUnlock ( )
2018-09-12 02:09:02 -07:00
symTblSize := uint64 ( 0 )
2018-09-08 11:28:36 -07:00
for _ , b := range blocks {
2018-09-12 02:09:02 -07:00
symTblSize += b . GetSymbolTableSize ( )
2018-09-08 11:28:36 -07:00
}
2018-09-12 02:09:02 -07:00
return float64 ( symTblSize )
2018-09-08 11:28:36 -07:00
} )
2017-05-26 06:13:03 -07:00
m . reloads = prometheus . NewCounter ( prometheus . CounterOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_reloads_total" ,
2017-05-26 06:13:03 -07:00
Help : "Number of times the database reloaded block data from disk." ,
} )
m . reloadsFailed = prometheus . NewCounter ( prometheus . CounterOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_reloads_failures_total" ,
2020-10-19 08:27:08 -07:00
Help : "Number of times the database failed to reloadBlocks block data from disk." ,
2017-05-26 06:13:03 -07:00
} )
2017-01-06 03:37:28 -08:00
m . compactionsTriggered = prometheus . NewCounter ( prometheus . CounterOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_compactions_triggered_total" ,
2017-01-06 03:37:28 -08:00
Help : "Total number of triggered compactions for the partition." ,
} )
2019-05-30 04:57:28 -07:00
m . compactionsFailed = prometheus . NewCounter ( prometheus . CounterOpts {
Name : "prometheus_tsdb_compactions_failed_total" ,
Help : "Total number of compactions that failed for the partition." ,
} )
2019-01-16 02:03:52 -08:00
m . timeRetentionCount = prometheus . NewCounter ( prometheus . CounterOpts {
Name : "prometheus_tsdb_time_retentions_total" ,
Help : "The number of times that blocks were deleted because the maximum time limit was exceeded." ,
} )
2018-11-20 02:34:26 -08:00
m . compactionsSkipped = prometheus . NewCounter ( prometheus . CounterOpts {
Name : "prometheus_tsdb_compactions_skipped_total" ,
Help : "Total number of skipped compactions due to disabled auto compaction." ,
} )
2018-09-14 05:07:45 -07:00
m . startTime = prometheus . NewGaugeFunc ( prometheus . GaugeOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_lowest_timestamp" ,
2018-11-30 10:18:12 -08:00
Help : "Lowest timestamp value stored in the database. The unit is decided by the library consumer." ,
2018-09-14 05:07:45 -07:00
} , func ( ) float64 {
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
if len ( db . blocks ) == 0 {
2019-10-09 08:41:46 -07:00
return float64 ( db . head . MinTime ( ) )
2018-09-14 05:07:45 -07:00
}
return float64 ( db . blocks [ 0 ] . meta . MinTime )
} )
2017-11-22 04:34:50 -08:00
m . tombCleanTimer = prometheus . NewHistogram ( prometheus . HistogramOpts {
2018-09-18 10:17:41 -07:00
Name : "prometheus_tsdb_tombstone_cleanup_seconds" ,
2017-11-22 04:34:50 -08:00
Help : "The time taken to recompact blocks to remove tombstones." ,
} )
2019-01-16 02:03:52 -08:00
m . blocksBytes = prometheus . NewGauge ( prometheus . GaugeOpts {
2019-01-23 05:46:58 -08:00
Name : "prometheus_tsdb_storage_blocks_bytes" ,
2019-01-16 02:03:52 -08:00
Help : "The number of bytes that are currently used for local storage by all blocks." ,
} )
2019-07-27 01:52:25 -07:00
m . maxBytes = prometheus . NewGauge ( prometheus . GaugeOpts {
Name : "prometheus_tsdb_retention_limit_bytes" ,
Help : "Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled" ,
} )
2023-10-24 04:34:42 -07:00
m . retentionDuration = prometheus . NewGauge ( prometheus . GaugeOpts {
Name : "prometheus_tsdb_retention_limit_seconds" ,
Help : "How long to retain samples in storage." ,
} )
2019-01-16 02:03:52 -08:00
m . sizeRetentionCount = prometheus . NewCounter ( prometheus . CounterOpts {
Name : "prometheus_tsdb_size_retentions_total" ,
Help : "The number of times that blocks were deleted because the maximum number of bytes was exceeded." ,
} )
2020-02-11 08:34:09 -08:00
2016-12-31 00:48:49 -08:00
if r != nil {
r . MustRegister (
2017-05-26 06:13:03 -07:00
m . loadedBlocks ,
2018-09-08 11:28:36 -07:00
m . symbolTableSize ,
2017-05-26 06:13:03 -07:00
m . reloads ,
m . reloadsFailed ,
2019-05-30 04:57:28 -07:00
m . compactionsFailed ,
2020-01-13 14:15:45 -08:00
m . compactionsTriggered ,
m . compactionsSkipped ,
m . sizeRetentionCount ,
m . timeRetentionCount ,
2018-09-14 05:07:45 -07:00
m . startTime ,
2017-11-22 04:34:50 -08:00
m . tombCleanTimer ,
2019-01-16 02:03:52 -08:00
m . blocksBytes ,
2019-07-27 01:52:25 -07:00
m . maxBytes ,
2023-10-24 04:34:42 -07:00
m . retentionDuration ,
2016-12-31 00:48:49 -08:00
)
}
return m
}
2022-03-03 04:03:07 -08:00
// DBStats contains statistics about the DB separated by component (eg. head).
2021-06-05 07:29:32 -07:00
// They are available before the DB has finished initializing.
type DBStats struct {
Head * HeadStats
}
// NewDBStats returns a new DBStats object initialized using the
2022-08-27 13:21:41 -07:00
// new function from each component.
2021-06-05 07:29:32 -07:00
func NewDBStats ( ) * DBStats {
return & DBStats {
Head : NewHeadStats ( ) ,
}
}
2019-07-23 01:04:48 -07:00
// ErrClosed is returned when the db is closed.
var ErrClosed = errors . New ( "db already closed" )
// DBReadOnly provides APIs for read only operations on a database.
2019-09-30 08:54:55 -07:00
// Current implementation doesn't support concurrency so
2019-07-23 01:04:48 -07:00
// all API calls should happen in the same go routine.
type DBReadOnly struct {
logger log . Logger
dir string
closers [ ] io . Closer
closed chan struct { }
}
// OpenDBReadOnly opens DB in the given directory for read only operations.
func OpenDBReadOnly ( dir string , l log . Logger ) ( * DBReadOnly , error ) {
if _ , err := os . Stat ( dir ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "opening the db dir: %w" , err )
2019-07-23 01:04:48 -07:00
}
if l == nil {
l = log . NewNopLogger ( )
}
return & DBReadOnly {
logger : l ,
dir : dir ,
closed : make ( chan struct { } ) ,
} , nil
}
2019-09-13 03:25:21 -07:00
// FlushWAL creates a new block containing all data that's currently in the memory buffer/WAL.
// Samples that are in existing blocks will not be written to the new block.
// Note that if the read only database is running concurrently with a
// writable database then writing the WAL to the database directory can race.
2020-03-23 02:19:44 -07:00
func ( db * DBReadOnly ) FlushWAL ( dir string ) ( returnErr error ) {
2019-09-13 03:25:21 -07:00
blockReaders , err := db . Blocks ( )
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "read blocks: %w" , err )
2019-09-13 03:25:21 -07:00
}
maxBlockTime := int64 ( math . MinInt64 )
if len ( blockReaders ) > 0 {
maxBlockTime = blockReaders [ len ( blockReaders ) - 1 ] . Meta ( ) . MaxTime
}
2022-10-10 08:08:46 -07:00
w , err := wlog . Open ( db . logger , filepath . Join ( db . dir , "wal" ) )
2019-09-13 03:25:21 -07:00
if err != nil {
return err
}
2022-10-10 08:08:46 -07:00
var wbl * wlog . WL
wblDir := filepath . Join ( db . dir , wlog . WblDirName )
2022-09-20 10:05:50 -07:00
if _ , err := os . Stat ( wblDir ) ; ! os . IsNotExist ( err ) {
2022-10-10 08:08:46 -07:00
wbl , err = wlog . Open ( db . logger , wblDir )
2022-09-20 10:05:50 -07:00
if err != nil {
return err
}
}
2021-02-09 06:12:48 -08:00
opts := DefaultHeadOptions ( )
opts . ChunkDirRoot = db . dir
2022-09-20 10:05:50 -07:00
head , err := NewHead ( nil , db . logger , w , wbl , opts , NewHeadStats ( ) )
2019-09-13 03:25:21 -07:00
if err != nil {
return err
}
2020-03-23 02:19:44 -07:00
defer func ( ) {
2023-11-16 10:54:41 -08:00
errs := tsdb_errors . NewMulti ( returnErr )
if err := head . Close ( ) ; err != nil {
errs . Add ( fmt . Errorf ( "closing Head: %w" , err ) )
}
returnErr = errs . Err ( )
2020-03-23 02:19:44 -07:00
} ( )
2019-09-13 03:25:21 -07:00
// Set the min valid time for the ingested wal samples
// to be no lower than the maxt of the last block.
if err := head . Init ( maxBlockTime ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "read WAL: %w" , err )
2019-09-13 03:25:21 -07:00
}
mint := head . MinTime ( )
maxt := head . MaxTime ( )
2020-08-13 02:55:35 -07:00
rh := NewRangeHead ( head , mint , maxt )
2020-02-06 07:58:38 -08:00
compactor , err := NewLeveledCompactor (
context . Background ( ) ,
nil ,
db . logger ,
2020-02-11 08:34:09 -08:00
ExponentialBlockRanges ( DefaultOptions ( ) . MinBlockDuration , 3 , 5 ) ,
remote write 2.0: sync with `main` branch (#13510)
* consoles: exclude iowait and steal from CPU Utilisation
'iowait' and 'steal' indicate specific idle/wait states, which shouldn't
be counted into CPU Utilisation. Also see
https://github.com/prometheus-operator/kube-prometheus/pull/796 and
https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667.
Per the iostat man page:
%idle
Show the percentage of time that the CPU or CPUs were idle and the
system did not have an outstanding disk I/O request.
%iowait
Show the percentage of time that the CPU or CPUs were idle during
which the system had an outstanding disk I/O request.
%steal
Show the percentage of time spent in involuntary wait by the
virtual CPU or CPUs while the hypervisor was servicing another
virtual processor.
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
* tsdb: shrink txRing with smaller integers
4 billion active transactions ought to be enough for anyone.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: create isolation transaction slice on demand
When Prometheus restarts it creates every series read in from the WAL,
but many of those series will be finished, and never receive any more
samples. By defering allocation of the txRing slice to when it is first
needed, we save 32 bytes per stale series.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* add cluster variable to Overview dashboard
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* promql: simplify Native Histogram arithmetics
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
* Cut 2.49.0-rc.0 (#13270)
* Cut 2.49.0-rc.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Removed the duplicate.
Signed-off-by: bwplotka <bwplotka@gmail.com>
---------
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Add unit protobuf parser
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Go on adding protobuf parsing for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* ui: create a reproduction for https://github.com/prometheus/prometheus/issues/13292
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Get conditional right
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Get VM Scale Set NIC (#13283)
Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set
VM NIC, because these use a different Resource ID format.
Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()`
instead. This needs both the scale set name and the instance ID, so
add an `InstanceID` field to the `virtualMachine` struct. `InstanceID`
is empty for a VM that isn't a ScaleSetVM.
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
* Cut v2.49.0-rc.1
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Delete debugging lines, amend error message for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Correct order in error message
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Consider storage.ErrTooOldSample as non-retryable
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
* scrape_test.go: Increase scrape interval in TestScrapeLoopCache to reduce potential flakiness
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Avoid creating string for suffix, consider counters without _total suffix
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* build(deps): bump github.com/prometheus/client_golang
Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.17.0 to 1.18.0.
- [Release notes](https://github.com/prometheus/client_golang/releases)
- [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/client_golang/compare/v1.17.0...v1.18.0)
---
updated-dependencies:
- dependency-name: github.com/prometheus/client_golang
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump actions/setup-node from 3.8.1 to 4.0.1
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3.8.1 to 4.0.1.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d...b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8)
---
updated-dependencies:
- dependency-name: actions/setup-node
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
* scripts: sort file list in embed directive
Otherwise the resulting string depends on find, which afaict depends on
the underlying filesystem. A stable file list make it easier to detect
UI changes in downstreams that need to track UI assets.
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
* Fix DataTableProps['data'] for resultType string
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* Fix handling of scalar and string in isHeatmapData
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* build(deps): bump github.com/influxdata/influxdb
Bumps [github.com/influxdata/influxdb](https://github.com/influxdata/influxdb) from 1.11.2 to 1.11.4.
- [Release notes](https://github.com/influxdata/influxdb/releases)
- [Commits](https://github.com/influxdata/influxdb/compare/v1.11.2...v1.11.4)
---
updated-dependencies:
- dependency-name: github.com/influxdata/influxdb
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump github.com/prometheus/prometheus
Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.48.0 to 0.48.1.
- [Release notes](https://github.com/prometheus/prometheus/releases)
- [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/prometheus/compare/v0.48.0...v0.48.1)
---
updated-dependencies:
- dependency-name: github.com/prometheus/prometheus
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* Bump client_golang to v1.18.0 (#13373)
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Drop old inmemory samples (#13002)
* Drop old inmemory samples
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Avoid copying timeseries when the feature is disabled
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Run gofmt
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Clarify docs
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Add more logging info
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Remove loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* optimize function and add tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Simplify filter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Update help info from metrics
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use metrics to keep track of drop elements during buildWriteRequest
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var in tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* pass time.Now as parameter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Change buildwriterequest during retries
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Revert "Remove loggers"
This reverts commit 54f91dfcae20488944162335ab4ad8be459df1ab.
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use log level debug for loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Fix linter
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove noisy debug-level logs; add 'reason' label to drop metrics
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove accidentally committed files
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Propagate logger to buildWriteRequest to log dropped data
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix docs comment
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Make drop reason more specific
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove unnecessary pass of logger
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Use snake_case for reason label
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix dropped samples metric
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
---------
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
* fix(discovery): allow requireUpdate util to timeout in discovery/file/file_test.go.
The loop ran indefinitely if the condition isn't met.
Before, each iteration created a new timer channel which was always outpaced by
the other timer channel with smaller duration.
minor detail: There was a memory leak: resources of the ~10 previous timers were
constantly kept. With the fix, we may keep the resources of one timer around for defaultWait
but this isn't worth the changes to make it right.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Merge pull request #13371 from kevinmingtarja/fix-isHeatmapData
ui: fix handling of scalar and string in isHeatmapData
* tsdb/{index,compact}: allow using custom postings encoding format (#13242)
* tsdb/{index,compact}: allow using custom postings encoding format
We would like to experiment with a different postings encoding format in
Thanos so in this change I am proposing adding another argument to
`NewWriter` which would allow users to change the format if needed.
Also, wire the leveled compactor so that it would be possible to change
the format there too.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb/compact: use a struct for leveled compactor options
As discussed on Slack, let's use a struct for the options in leveled
compactor.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: make changes after Bryan's review
- Make changes less intrusive
- Turn the postings encoder type into a function
- Add NewWriterWithEncoder()
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
---------
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0-rc.2
Signed-off-by: bwplotka <bwplotka@gmail.com>
* build(deps): bump actions/setup-go from 3.5.0 to 5.0.0 in /scripts (#13362)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3.5.0 to 5.0.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/6edd4406fa81c3da01a34fa6f6343087c207a568...0c52d547c9bc32b1aa3301fd7a9cb496313a4491)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump github/codeql-action from 2.22.8 to 3.22.12 (#13358)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.8 to 3.22.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/407ffafae6a767df3e0230c3df91b6443ae8df75...012739e5082ff0c22ca6d6ab32e07c36df03c4a4)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* put @nexucis has a release shepherd (#13383)
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
* Add analyze histograms command to promtool (#12331)
Add `query analyze` command to promtool
This command analyzes the buckets of classic and native histograms,
based on data queried from the Prometheus query API, i.e. it
doesn't require direct access to the TSDB files.
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* included instance in all necessary descriptions
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* tsdb/compact: fix passing merge func
Fixing a very small logical problem I've introduced :(.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: add enable overlapping compaction
This functionality is needed in downstream projects because they have a
separate component that does compaction.
Upstreaming
https://github.com/grafana/mimir-prometheus/blob/7c8e9a2a76fc729e9078889782928b2fdfe240e9/tsdb/compact.go#L323-L325.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* promtool: allow setting multiple matchers to "promtool tsdb dump" command. (#13296)
Conditions are ANDed inside the same matcher but matchers are ORed
Including unit tests for "promtool tsdb dump".
Refactor some matchers scraping utils.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Fixed changelog
Signed-off-by: bwplotka <bwplotka@gmail.com>
* tsdb/main: wire "EnableOverlappingCompaction" to tsdb.Options (#13398)
This added the https://github.com/prometheus/prometheus/pull/13393
"EnableOverlappingCompaction" parameter to the compactor code but not to
the tsdb.Options. I forgot about that. Add it to `tsdb.Options` too and
set it to `true` in Prometheus.
Copy/paste the description from
https://github.com/prometheus/prometheus/pull/13393#issuecomment-1891787986
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Issue #13268: fix quality value in accept header
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
* Cut 2.49.1 with scrape q= bugfix.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Cut 2.49.1 web package.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Restore more efficient version of NewPossibleNonCounterInfo annotation (#13022)
Restore more efficient version of NewPossibleNonCounterInfo annotation
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Fix regressions introduced by #13242
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* fix slice copy in 1.20 (#13389)
The slices package is added to the standard library in Go 1.21;
we need to import from the exp area to maintain compatibility with Go 1.20.
Signed-off-by: tyltr <tylitianrui@126.com>
* Docs: Query Basics: link to rate (#10538)
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
* chore(kubernetes): check preconditions earlier and avoid unnecessary checks or iterations
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Examples: link to `rate` for new users (#10535)
* Examples: link to `rate` for new users
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
* promql: use natural sort in sort_by_label and sort_by_label_desc (#13411)
These functions are intended for humans, as robots can already sort the results
however they please. Humans like things sorted "naturally":
* https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
A similar thing has been done to Grafana, which is also used by humans:
* https://github.com/grafana/grafana/pull/78024
* https://github.com/grafana/grafana/pull/78494
Signed-off-by: Ivan Babrou <github@ivan.computer>
* TestLabelValuesWithMatchers: Add test case
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* remove obsolete build tag
Signed-off-by: tyltr <tylitianrui@126.com>
* Upgrade some golang dependencies for resty 2.11
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
* Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config (#13222)
Native Histograms: support native_histogram_min_bucket_factor in scrape_config
---------
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram (#13392)
Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Minor fixes to otlp vendor update script
Signed-off-by: Goutham <gouthamve@gmail.com>
* build(deps): bump github.com/hetznercloud/hcloud-go/v2
Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.4.0 to 2.6.0.
- [Release notes](https://github.com/hetznercloud/hcloud-go/releases)
- [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.4.0...v2.6.0)
---
updated-dependencies:
- dependency-name: github.com/hetznercloud/hcloud-go/v2
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* Enhanced visibility for `promtool test rules` with JSON colored formatting (#13342)
* Added diff flag for unit test to improvise readability & debugging
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Removed blank spaces
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed linting error
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Added cli flags to documentation
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Revert unrrelated linting fixes
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed review suggestions
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Cleanup
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
---------
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* storage: skip merging when no remote storage configured
Prometheus is hard-coded to use a fanout storage between TSDB and
a remote storage which by default is empty.
This change detects the empty storage and skips merging between
result sets, which would make `Select()` sort results.
Bottom line: we skip a sort unless there really is some remote storage
configured.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Remove csmarchbanks from remote write owners (#13432)
I have not had the time to keep up with remote write and have no plans
to work on it in the near future so I am withdrawing my maintainership
of that part of the codebase. I continue to focus on client_python.
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
* add more context cancellation check at evaluation time
Signed-off-by: Ben Ye <benye@amazon.com>
* Optimize label values with matchers by taking shortcuts (#13426)
Don't calculate postings beforehand: we may not need them. If all
matchers are for the requested label, we can just filter its values.
Also, if there are no values at all, no need to run any kind of
logic.
Also add more labelValuesWithMatchers benchmarks
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Add automatic memory limit handling
Enable automatic detection of memory limits and configure GOMEMLIMIT to
match.
* Also includes a flag to allow controlling the reserved ratio.
Signed-off-by: SuperQ <superq@gmail.com>
* Update OSSF badge link (#13433)
Provide a more user friendly interface
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
* SD Managers taking over responsibility for registration of debug metrics (#13375)
SD Managers take over responsibility for SD metrics registration
---------
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Optimize histogram iterators (#13340)
Optimize histogram iterators
Histogram iterators allocate new objects in the AtHistogram and
AtFloatHistogram methods, which makes calculating rates over long
ranges expensive.
In #13215 we allowed an existing object to be reused
when converting an integer histogram to a float histogram. This commit follows
the same idea and allows injecting an existing object in the AtHistogram and
AtFloatHistogram methods. When the injected value is nil, iterators allocate
new histograms, otherwise they populate and return the injected object.
The commit also adds a CopyTo method to Histogram and FloatHistogram which
is used in the BufferedIterator to overwrite items in the ring instead of making
new copies.
Note that a specialized HPoint pool is needed for all of this to work
(`matrixSelectorHPool`).
---------
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
* doc: Mark `mad_over_time` as experimental (#13440)
We forgot to do that in
https://github.com/prometheus/prometheus/pull/13059
Signed-off-by: beorn7 <beorn@grafana.com>
* Change metric label for Puppetdb from 'http' to 'puppetdb'
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
* mirror metrics.proto change & generate code
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
* TestHeadLabelValuesWithMatchers: Add test case (#13414)
Add test case to TestHeadLabelValuesWithMatchers, while fixing a couple
of typos in other test cases. Also enclosing some implicit sub-tests in a
`t.Run` call to make them explicitly sub-tests.
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* update all go dependencies (#13438)
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
* build(deps): bump the k8s-io group with 2 updates (#13454)
Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).
Updates `k8s.io/api` from 0.28.4 to 0.29.1
- [Commits](https://github.com/kubernetes/api/compare/v0.28.4...v0.29.1)
Updates `k8s.io/client-go` from 0.28.4 to 0.29.1
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.4...v0.29.1)
---
updated-dependencies:
- dependency-name: k8s.io/api
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
- dependency-name: k8s.io/client-go
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump the go-opentelemetry-io group with 1 update (#13453)
Bumps the go-opentelemetry-io group with 1 update: [go.opentelemetry.io/collector/semconv](https://github.com/open-telemetry/opentelemetry-collector).
Updates `go.opentelemetry.io/collector/semconv` from 0.92.0 to 0.93.0
- [Release notes](https://github.com/open-telemetry/opentelemetry-collector/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-collector/blob/main/CHANGELOG-API.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-collector/compare/v0.92.0...v0.93.0)
---
updated-dependencies:
- dependency-name: go.opentelemetry.io/collector/semconv
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: go-opentelemetry-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump actions/upload-artifact from 3.1.3 to 4.0.0 (#13355)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump bufbuild/buf-push-action (#13357)
Bumps [bufbuild/buf-push-action](https://github.com/bufbuild/buf-push-action) from 342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 to a654ff18effe4641ebea4a4ce242c49800728459.
- [Release notes](https://github.com/bufbuild/buf-push-action/releases)
- [Commits](https://github.com/bufbuild/buf-push-action/compare/342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1...a654ff18effe4641ebea4a4ce242c49800728459)
---
updated-dependencies:
- dependency-name: bufbuild/buf-push-action
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* Labels: Add DropMetricName function, used in PromQL (#13446)
This function is called very frequently when executing PromQL functions,
and we can do it much more efficiently inside Labels.
In the common case that `__name__` comes first in the labels, we simply
re-point to start at the next label, which is nearly free.
`DropMetricName` is now so cheap I removed the cache - benchmarks show
everything still goes faster.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: simplify internal series delete function (#13261)
Lifting an optimisation from Agent code, `seriesHashmap.del` can use
the unique series reference, doesn't need to check Labels.
Also streamline the logic for deleting from `unique` and `conflicts` maps,
and add some comments to help the next person.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* otlptranslator/update-copy.sh: Fix sed command lines
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Rollback k8s.io requirements (#13462)
Rollback k8s.io Go modules to v0.28.6 to avoid forcing upgrade of Go to
1.21. This allows us to keep compatibility with the currently supported
upstream Go releases.
Signed-off-by: SuperQ <superq@gmail.com>
* Make update-copy.sh work for both OSX and GNU sed
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Name @beorn7 and @krajorama as maintainers for native histograms
I have been the de-facto maintainer for native histograms from the
beginning. So let's put this into MAINTAINERS.md.
In addition, I hereby proposose George Krajcsovits AKA Krajo as a
co-maintainer. He has contributed a lot of native histogram code, but
more importantly, he has contributed substantially to reviewing other
contributors' native histogram code, up to a point where I was merely
rubberstamping the PRs he had already reviewed. I'm confident that he
is ready to to be granted commit rights as outlined in the
"Maintainers" section of the governance:
https://prometheus.io/governance/#maintainers
According to the same section of the governance, I will announce the
proposed change on the developers mailing list and will give some time
for lazy consensus before merging this PR.
Signed-off-by: beorn7 <beorn@grafana.com>
* ui/fix: correct url handling for stacked graphs (#13460)
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
* tsdb: use cheaper Mutex on series
Mutex is 8 bytes; RWMutex is 24 bytes and much more complicated. Since
`RLock` is only used in two places, `UpdateMetadata` and `Delete`,
neither of which are hotspots, we should use the cheaper one.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Fix last_over_time for native histograms
The last_over_time retains a histogram sample without making a copy.
This sample is now coming from the buffered iterator used for windowing functions,
and can be reused for reading subsequent samples as the iterator progresses.
I would propose copying the sample in the last_over_time function, similar to
how it is done for rate, sum_over_time and others.
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
* Implementation
NOTE:
Rebased from main after refactor in #13014
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Add feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactor concurrency control
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Optimising dependencies/dependents funcs to not produce new slices each request
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Rename flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring for performance, and to allow controller to be overridden
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Block until all rules, both sync & async, have completed evaluating
Updated & added tests
Review feedback nits
Return empty map if not indeterminate
Use highWatermark to track inflight requests counter
Appease the linter
Clarify feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Fix typo in CLI flag description
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Fixed auto-generated doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improve doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Simplify the design to update concurrency controller once the rule evaluation has done
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Add more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Added more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improved RuleConcurrencyController interface doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Introduced sequentialRuleEvalController
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Remove superfluous nil check in Group.metrics
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* api: Serialize discovered and target labels into JSON directly (#13469)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* api: Serialize discovered labels into JSON directly in dropped targets (#13484)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* Add ShardedPostings() support to TSDB (#10421)
This PR is a reference implementation of the proposal described in #10420.
In addition to what described in #10420, in this PR I've introduced labels.StableHash(). The idea is to offer an hashing function which doesn't change over time, and that's used by query sharding in order to get a stable behaviour over time. The implementation of labels.StableHash() is the hashing function used by Prometheus before stringlabels, and what's used by Grafana Mimir for query sharding (because built before stringlabels was a thing).
Follow up work
As mentioned in #10420, if this PR is accepted I'm also open to upload another foundamental piece used by Grafana Mimir query sharding to accelerate the query execution: an optional, configurable and fast in-memory cache for the series hashes.
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* storage/remote: document why two benchmarks are skipped
One was silently doing nothing; one was doing something but the work
didn't go up linearly with iteration count.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Pod status changes not discovered by Kube Endpoints SD (#13337)
* fix(discovery/kubernetes/endpoints): react to changes on Pods because some modifications can occur on them without triggering an update on the related Endpoints (The Pod phase changing from Pending to Running e.g.).
---------
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
* Small improvements, add const, remove copypasta (#8106)
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
* Proposal to improve FPointSlice and HPointSlice allocation. (#13448)
* Reusing points slice from previous series when the slice is under utilized
* Adding comments on the bench test
Signed-off-by: Alan Protasio <alanprot@gmail.com>
* lint
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
* go mod tidy
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
---------
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
Signed-off-by: Erik Sommer <ersotech@posteo.de>
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
Signed-off-by: bwplotka <bwplotka@gmail.com>
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: tyltr <tylitianrui@126.com>
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Signed-off-by: Ivan Babrou <github@ivan.computer>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Signed-off-by: Goutham <gouthamve@gmail.com>
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
Signed-off-by: Ben Ye <benye@amazon.com>
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Signed-off-by: beorn7 <beorn@grafana.com>
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
Signed-off-by: Alan Protasio <alanprot@gmail.com>
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
Co-authored-by: Julian Wiedmann <jwi@linux.ibm.com>
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
Co-authored-by: Erik Sommer <ersotech@posteo.de>
Co-authored-by: Linas Medziunas <linas.medziunas@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Co-authored-by: Arianna Vespri <arianna.vespri@yahoo.it>
Co-authored-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com>
Co-authored-by: Daniel Kerbel <nmdanny@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jan Fajerski <jfajersk@redhat.com>
Co-authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Marc Tudurí <marctc@protonmail.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Co-authored-by: Augustin Husson <husson.augustin@gmail.com>
Co-authored-by: Björn Rabenstein <beorn@grafana.com>
Co-authored-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
Co-authored-by: Ben Kochie <superq@gmail.com>
Co-authored-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: tyltr <tylitianrui@126.com>
Co-authored-by: Ted Robertson <10043369+tredondo@users.noreply.github.com>
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
Co-authored-by: Matthias Loibl <mail@matthiasloibl.com>
Co-authored-by: Ivan Babrou <github@ivan.computer>
Co-authored-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: Israel Blancas <iblancasa@gmail.com>
Co-authored-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Goutham <gouthamve@gmail.com>
Co-authored-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Co-authored-by: Chris Marchbanks <csmarchbanks@gmail.com>
Co-authored-by: Ben Ye <benye@amazon.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Co-authored-by: Paulin Todev <paulin.todev@gmail.com>
Co-authored-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
Co-authored-by: Mikhail Fesenko <proggga@gmail.com>
Co-authored-by: Alan Protasio <alanprot@gmail.com>
2024-02-02 10:38:50 -08:00
chunkenc . NewPool ( ) , nil ,
2020-02-06 07:58:38 -08:00
)
2019-09-13 03:25:21 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "create leveled compactor: %w" , err )
2019-09-13 03:25:21 -07:00
}
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
_ , err = compactor . Write ( dir , rh , mint , maxt + 1 , nil )
2023-11-16 10:54:41 -08:00
if err != nil {
return fmt . Errorf ( "writing WAL: %w" , err )
}
return nil
2019-09-13 03:25:21 -07:00
}
2020-07-31 08:03:02 -07:00
func ( db * DBReadOnly ) loadDataAsQueryable ( maxt int64 ) ( storage . SampleAndChunkQueryable , error ) {
2019-07-23 01:04:48 -07:00
select {
case <- db . closed :
return nil , ErrClosed
default :
}
2019-09-13 03:25:21 -07:00
blockReaders , err := db . Blocks ( )
2019-07-23 01:04:48 -07:00
if err != nil {
return nil , err
}
2019-09-13 03:25:21 -07:00
blocks := make ( [ ] * Block , len ( blockReaders ) )
for i , b := range blockReaders {
2019-07-23 01:04:48 -07:00
b , ok := b . ( * Block )
if ! ok {
return nil , errors . New ( "unable to convert a read only block to a normal block" )
}
blocks [ i ] = b
}
2021-02-09 06:12:48 -08:00
opts := DefaultHeadOptions ( )
opts . ChunkDirRoot = db . dir
2022-09-20 10:05:50 -07:00
head , err := NewHead ( nil , db . logger , nil , nil , opts , NewHeadStats ( ) )
2019-07-23 01:04:48 -07:00
if err != nil {
return nil , err
}
maxBlockTime := int64 ( math . MinInt64 )
if len ( blocks ) > 0 {
maxBlockTime = blocks [ len ( blocks ) - 1 ] . Meta ( ) . MaxTime
}
2019-09-30 08:54:55 -07:00
// Also add the WAL if the current blocks don't cover the requests time range.
2019-07-23 01:04:48 -07:00
if maxBlockTime <= maxt {
2020-05-06 08:30:00 -07:00
if err := head . Close ( ) ; err != nil {
return nil , err
}
2022-10-10 08:08:46 -07:00
w , err := wlog . Open ( db . logger , filepath . Join ( db . dir , "wal" ) )
2019-07-23 01:04:48 -07:00
if err != nil {
return nil , err
}
2022-10-10 08:08:46 -07:00
var wbl * wlog . WL
wblDir := filepath . Join ( db . dir , wlog . WblDirName )
2022-09-20 10:05:50 -07:00
if _ , err := os . Stat ( wblDir ) ; ! os . IsNotExist ( err ) {
2022-10-10 08:08:46 -07:00
wbl , err = wlog . Open ( db . logger , wblDir )
2022-09-20 10:05:50 -07:00
if err != nil {
return nil , err
}
}
2021-02-09 06:12:48 -08:00
opts := DefaultHeadOptions ( )
opts . ChunkDirRoot = db . dir
2022-09-20 10:05:50 -07:00
head , err = NewHead ( nil , db . logger , w , wbl , opts , NewHeadStats ( ) )
2019-07-23 01:04:48 -07:00
if err != nil {
return nil , err
}
// Set the min valid time for the ingested wal samples
// to be no lower than the maxt of the last block.
if err := head . Init ( maxBlockTime ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "read WAL: %w" , err )
2019-07-23 01:04:48 -07:00
}
// Set the wal to nil to disable all wal operations.
// This is mainly to avoid blocking when closing the head.
head . wal = nil
}
2020-05-06 08:30:00 -07:00
db . closers = append ( db . closers , head )
2020-07-31 08:03:02 -07:00
return & DB {
2019-07-23 01:04:48 -07:00
dir : db . dir ,
logger : db . logger ,
blocks : blocks ,
head : head ,
2020-07-31 08:03:02 -07:00
} , nil
}
2019-07-23 01:04:48 -07:00
2020-07-31 08:03:02 -07:00
// Querier loads the blocks and wal and returns a new querier over the data partition for the given time range.
// Current implementation doesn't support multiple Queriers.
2023-09-12 03:37:38 -07:00
func ( db * DBReadOnly ) Querier ( mint , maxt int64 ) ( storage . Querier , error ) {
2020-07-31 08:03:02 -07:00
q , err := db . loadDataAsQueryable ( maxt )
if err != nil {
return nil , err
}
2023-09-12 03:37:38 -07:00
return q . Querier ( mint , maxt )
2019-07-23 01:04:48 -07:00
}
2020-07-31 08:03:02 -07:00
// ChunkQuerier loads blocks and the wal and returns a new chunk querier over the data partition for the given time range.
// Current implementation doesn't support multiple ChunkQueriers.
2023-09-12 03:37:38 -07:00
func ( db * DBReadOnly ) ChunkQuerier ( mint , maxt int64 ) ( storage . ChunkQuerier , error ) {
2020-07-31 08:03:02 -07:00
q , err := db . loadDataAsQueryable ( maxt )
if err != nil {
return nil , err
}
2023-09-12 03:37:38 -07:00
return q . ChunkQuerier ( mint , maxt )
2020-06-24 06:41:52 -07:00
}
2019-07-23 01:04:48 -07:00
// Blocks returns a slice of block readers for persisted blocks.
func ( db * DBReadOnly ) Blocks ( ) ( [ ] BlockReader , error ) {
select {
case <- db . closed :
return nil , ErrClosed
default :
}
loadable , corrupted , err := openBlocks ( db . logger , db . dir , nil , nil )
if err != nil {
return nil , err
}
// Corrupted blocks that have been superseded by a loadable block can be safely ignored.
for _ , block := range loadable {
for _ , b := range block . Meta ( ) . Compaction . Parents {
delete ( corrupted , b . ULID )
}
}
if len ( corrupted ) > 0 {
for _ , b := range loadable {
if err := b . Close ( ) ; err != nil {
2020-06-17 07:40:00 -07:00
level . Warn ( db . logger ) . Log ( "msg" , "Closing block failed" , "err" , err , "block" , b )
2019-07-23 01:04:48 -07:00
}
}
2020-10-28 08:24:58 -07:00
errs := tsdb_errors . NewMulti ( )
2020-06-17 07:40:00 -07:00
for ulid , err := range corrupted {
2023-11-16 10:54:41 -08:00
if err != nil {
errs . Add ( fmt . Errorf ( "corrupted block %s: %w" , ulid . String ( ) , err ) )
}
2020-06-17 07:40:00 -07:00
}
2020-10-28 08:24:58 -07:00
return nil , errs . Err ( )
2019-07-23 01:04:48 -07:00
}
if len ( loadable ) == 0 {
2019-09-13 03:25:21 -07:00
return nil , nil
2019-07-23 01:04:48 -07:00
}
2023-09-21 13:53:51 -07:00
slices . SortFunc ( loadable , func ( a , b * Block ) int {
2023-10-16 07:23:26 -07:00
switch {
case a . Meta ( ) . MinTime < b . Meta ( ) . MinTime :
return - 1
case a . Meta ( ) . MinTime > b . Meta ( ) . MinTime :
return 1
default :
return 0
}
2019-07-23 01:04:48 -07:00
} )
blockMetas := make ( [ ] BlockMeta , 0 , len ( loadable ) )
for _ , b := range loadable {
blockMetas = append ( blockMetas , b . Meta ( ) )
}
if overlaps := OverlappingBlocks ( blockMetas ) ; len ( overlaps ) > 0 {
2020-04-11 01:22:18 -07:00
level . Warn ( db . logger ) . Log ( "msg" , "Overlapping blocks found during opening" , "detail" , overlaps . String ( ) )
2019-07-23 01:04:48 -07:00
}
// Close all previously open readers and add the new ones to the cache.
for _ , closer := range db . closers {
closer . Close ( )
}
blockClosers := make ( [ ] io . Closer , len ( loadable ) )
blockReaders := make ( [ ] BlockReader , len ( loadable ) )
for i , b := range loadable {
blockClosers [ i ] = b
blockReaders [ i ] = b
}
db . closers = blockClosers
return blockReaders , nil
}
2023-06-01 04:43:09 -07:00
// LastBlockID returns the BlockID of latest block.
func ( db * DBReadOnly ) LastBlockID ( ) ( string , error ) {
entries , err := os . ReadDir ( db . dir )
if err != nil {
return "" , err
}
max := uint64 ( 0 )
lastBlockID := ""
for _ , e := range entries {
// Check if dir is a block dir or not.
dirName := e . Name ( )
ulidObj , err := ulid . ParseStrict ( dirName )
if err != nil {
continue // Not a block dir.
}
timestamp := ulidObj . Time ( )
if timestamp > max {
max = timestamp
lastBlockID = dirName
}
}
if lastBlockID == "" {
return "" , errors . New ( "no blocks found" )
}
return lastBlockID , nil
}
// Block returns a block reader by given block id.
func ( db * DBReadOnly ) Block ( blockID string ) ( BlockReader , error ) {
select {
case <- db . closed :
return nil , ErrClosed
default :
}
_ , err := os . Stat ( filepath . Join ( db . dir , blockID ) )
if os . IsNotExist ( err ) {
2023-11-14 05:04:31 -08:00
return nil , fmt . Errorf ( "invalid block ID %s" , blockID )
2023-06-01 04:43:09 -07:00
}
block , err := OpenBlock ( db . logger , filepath . Join ( db . dir , blockID ) , nil )
if err != nil {
return nil , err
}
db . closers = append ( db . closers , block )
return block , nil
}
2019-07-23 01:04:48 -07:00
// Close all block readers.
func ( db * DBReadOnly ) Close ( ) error {
select {
case <- db . closed :
return ErrClosed
default :
}
close ( db . closed )
2020-10-28 08:24:58 -07:00
return tsdb_errors . CloseAll ( db . closers )
2019-07-23 01:04:48 -07:00
}
2020-02-07 08:24:17 -08:00
// Open returns a new DB in the given directory. If options are empty, DefaultOptions will be used.
2021-06-05 07:29:32 -07:00
func Open ( dir string , l log . Logger , r prometheus . Registerer , opts * Options , stats * DBStats ) ( db * DB , err error ) {
2020-02-06 07:58:38 -08:00
var rngs [ ] int64
opts , rngs = validateOpts ( opts , nil )
2021-06-05 07:29:32 -07:00
return open ( dir , l , r , opts , rngs , stats )
2020-02-06 07:58:38 -08:00
}
func validateOpts ( opts * Options , rngs [ ] int64 ) ( * Options , [ ] int64 ) {
if opts == nil {
opts = DefaultOptions ( )
}
if opts . StripeSize <= 0 {
opts . StripeSize = DefaultStripeSize
}
2020-11-19 05:00:47 -08:00
if opts . HeadChunksWriteBufferSize <= 0 {
opts . HeadChunksWriteBufferSize = chunks . DefaultWriteBufferSize
}
2022-01-10 05:36:45 -08:00
if opts . HeadChunksWriteQueueSize < 0 {
opts . HeadChunksWriteQueueSize = chunks . DefaultWriteQueueSize
}
2023-05-24 04:00:21 -07:00
if opts . SamplesPerChunk <= 0 {
opts . SamplesPerChunk = DefaultSamplesPerChunk
}
2021-04-15 01:55:01 -07:00
if opts . MaxBlockChunkSegmentSize <= 0 {
opts . MaxBlockChunkSegmentSize = chunks . DefaultChunkSegmentSize
}
2020-02-06 07:58:38 -08:00
if opts . MinBlockDuration <= 0 {
2020-02-11 08:34:09 -08:00
opts . MinBlockDuration = DefaultBlockDuration
2020-02-06 07:58:38 -08:00
}
if opts . MinBlockDuration > opts . MaxBlockDuration {
opts . MaxBlockDuration = opts . MinBlockDuration
}
2022-09-20 10:05:50 -07:00
if opts . OutOfOrderCapMax <= 0 {
opts . OutOfOrderCapMax = DefaultOutOfOrderCapMax
}
if opts . OutOfOrderTimeWindow < 0 {
opts . OutOfOrderTimeWindow = 0
}
2020-02-06 07:58:38 -08:00
if len ( rngs ) == 0 {
// Start with smallest block duration and create exponential buckets until the exceed the
// configured maximum block duration.
2020-02-11 08:34:09 -08:00
rngs = ExponentialBlockRanges ( opts . MinBlockDuration , 10 , 3 )
2020-02-06 07:58:38 -08:00
}
return opts , rngs
}
2021-11-17 02:21:27 -08:00
// open returns a new DB in the given directory.
// It initializes the lockfile, WAL, compactor, and Head (by replaying the WAL), and runs the database.
// It is not safe to open more than one DB in the same directory.
2021-06-05 07:29:32 -07:00
func open ( dir string , l log . Logger , r prometheus . Registerer , opts * Options , rngs [ ] int64 , stats * DBStats ) ( _ * DB , returnedErr error ) {
2021-10-22 01:06:44 -07:00
if err := os . MkdirAll ( dir , 0 o777 ) ; err != nil {
2017-02-19 04:01:19 -08:00
return nil , err
}
2017-02-19 07:04:37 -08:00
if l == nil {
2017-09-13 01:17:20 -07:00
l = log . NewNopLogger ( )
2017-02-19 07:04:37 -08:00
}
2021-06-05 07:29:32 -07:00
if stats == nil {
stats = NewDBStats ( )
}
2020-02-06 07:58:38 -08:00
for i , v := range rngs {
2020-02-11 08:34:09 -08:00
if v > opts . MaxBlockDuration {
2020-02-06 07:58:38 -08:00
rngs = rngs [ : i ]
break
}
2020-01-29 23:12:43 -08:00
}
2020-02-06 07:58:38 -08:00
2018-02-12 02:40:12 -08:00
// Fixup bad format written by Prometheus 2.1.
2018-02-09 04:37:10 -08:00
if err := repairBadIndexVersion ( l , dir ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "repair bad index version: %w" , err )
2018-02-09 04:11:03 -08:00
}
2020-08-14 02:45:08 -07:00
walDir := filepath . Join ( dir , "wal" )
2022-10-10 08:08:46 -07:00
wblDir := filepath . Join ( dir , wlog . WblDirName )
2020-08-14 02:45:08 -07:00
2018-09-17 09:30:56 -07:00
// Migrate old WAL if one exists.
2020-08-14 02:45:08 -07:00
if err := MigrateWAL ( l , walDir ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "migrate WAL: %w" , err )
2018-05-27 10:05:11 -07:00
}
2022-03-24 03:44:14 -07:00
for _ , tmpDir := range [ ] string { walDir , dir } {
// Remove tmp dirs.
if err := removeBestEffortTmpDirs ( l , tmpDir ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "remove tmp dirs: %w" , err )
2022-03-24 03:44:14 -07:00
}
2020-08-10 22:56:08 -07:00
}
2017-01-17 21:18:32 -08:00
2020-10-28 03:09:03 -07:00
db := & DB {
2020-07-22 08:19:33 -07:00
dir : dir ,
logger : l ,
opts : opts ,
compactc : make ( chan struct { } , 1 ) ,
donec : make ( chan struct { } ) ,
stopc : make ( chan struct { } ) ,
autoCompact : true ,
chunkPool : chunkenc . NewPool ( ) ,
blocksToDelete : opts . BlocksToDelete ,
2022-09-20 10:05:50 -07:00
registerer : r ,
2020-07-22 08:19:33 -07:00
}
2020-10-21 08:08:28 -07:00
defer func ( ) {
// Close files if startup fails somewhere.
if returnedErr == nil {
return
}
2020-10-28 03:09:03 -07:00
close ( db . donec ) // DB is never run if it was an error, so close this channel here.
2023-11-16 10:54:41 -08:00
errs := tsdb_errors . NewMulti ( returnedErr )
if err := db . Close ( ) ; err != nil {
errs . Add ( fmt . Errorf ( "close DB after failed startup: %w" , err ) )
}
returnedErr = errs . Err ( )
2020-10-21 08:08:28 -07:00
} ( )
2020-07-22 08:19:33 -07:00
if db . blocksToDelete == nil {
db . blocksToDelete = DefaultBlocksToDelete ( db )
2017-01-06 00:26:39 -08:00
}
2019-07-27 01:52:25 -07:00
2021-11-11 08:45:25 -08:00
var err error
db . locker , err = tsdbutil . NewDirLocker ( dir , "tsdb" , db . logger , r )
if err != nil {
return nil , err
}
2017-05-09 03:52:47 -07:00
if ! opts . NoLockfile {
2021-11-11 08:45:25 -08:00
if err := db . locker . Lock ( ) ; err != nil {
2017-05-18 07:09:30 -07:00
return nil , err
}
2017-05-09 03:52:47 -07:00
}
2019-02-06 04:07:35 -08:00
ctx , cancel := context . WithCancel ( context . Background ( ) )
remote write 2.0: sync with `main` branch (#13510)
* consoles: exclude iowait and steal from CPU Utilisation
'iowait' and 'steal' indicate specific idle/wait states, which shouldn't
be counted into CPU Utilisation. Also see
https://github.com/prometheus-operator/kube-prometheus/pull/796 and
https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667.
Per the iostat man page:
%idle
Show the percentage of time that the CPU or CPUs were idle and the
system did not have an outstanding disk I/O request.
%iowait
Show the percentage of time that the CPU or CPUs were idle during
which the system had an outstanding disk I/O request.
%steal
Show the percentage of time spent in involuntary wait by the
virtual CPU or CPUs while the hypervisor was servicing another
virtual processor.
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
* tsdb: shrink txRing with smaller integers
4 billion active transactions ought to be enough for anyone.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: create isolation transaction slice on demand
When Prometheus restarts it creates every series read in from the WAL,
but many of those series will be finished, and never receive any more
samples. By defering allocation of the txRing slice to when it is first
needed, we save 32 bytes per stale series.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* add cluster variable to Overview dashboard
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* promql: simplify Native Histogram arithmetics
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
* Cut 2.49.0-rc.0 (#13270)
* Cut 2.49.0-rc.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Removed the duplicate.
Signed-off-by: bwplotka <bwplotka@gmail.com>
---------
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Add unit protobuf parser
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Go on adding protobuf parsing for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* ui: create a reproduction for https://github.com/prometheus/prometheus/issues/13292
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Get conditional right
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Get VM Scale Set NIC (#13283)
Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set
VM NIC, because these use a different Resource ID format.
Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()`
instead. This needs both the scale set name and the instance ID, so
add an `InstanceID` field to the `virtualMachine` struct. `InstanceID`
is empty for a VM that isn't a ScaleSetVM.
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
* Cut v2.49.0-rc.1
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Delete debugging lines, amend error message for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Correct order in error message
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Consider storage.ErrTooOldSample as non-retryable
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
* scrape_test.go: Increase scrape interval in TestScrapeLoopCache to reduce potential flakiness
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Avoid creating string for suffix, consider counters without _total suffix
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* build(deps): bump github.com/prometheus/client_golang
Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.17.0 to 1.18.0.
- [Release notes](https://github.com/prometheus/client_golang/releases)
- [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/client_golang/compare/v1.17.0...v1.18.0)
---
updated-dependencies:
- dependency-name: github.com/prometheus/client_golang
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump actions/setup-node from 3.8.1 to 4.0.1
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3.8.1 to 4.0.1.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d...b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8)
---
updated-dependencies:
- dependency-name: actions/setup-node
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
* scripts: sort file list in embed directive
Otherwise the resulting string depends on find, which afaict depends on
the underlying filesystem. A stable file list make it easier to detect
UI changes in downstreams that need to track UI assets.
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
* Fix DataTableProps['data'] for resultType string
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* Fix handling of scalar and string in isHeatmapData
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* build(deps): bump github.com/influxdata/influxdb
Bumps [github.com/influxdata/influxdb](https://github.com/influxdata/influxdb) from 1.11.2 to 1.11.4.
- [Release notes](https://github.com/influxdata/influxdb/releases)
- [Commits](https://github.com/influxdata/influxdb/compare/v1.11.2...v1.11.4)
---
updated-dependencies:
- dependency-name: github.com/influxdata/influxdb
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump github.com/prometheus/prometheus
Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.48.0 to 0.48.1.
- [Release notes](https://github.com/prometheus/prometheus/releases)
- [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/prometheus/compare/v0.48.0...v0.48.1)
---
updated-dependencies:
- dependency-name: github.com/prometheus/prometheus
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* Bump client_golang to v1.18.0 (#13373)
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Drop old inmemory samples (#13002)
* Drop old inmemory samples
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Avoid copying timeseries when the feature is disabled
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Run gofmt
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Clarify docs
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Add more logging info
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Remove loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* optimize function and add tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Simplify filter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Update help info from metrics
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use metrics to keep track of drop elements during buildWriteRequest
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var in tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* pass time.Now as parameter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Change buildwriterequest during retries
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Revert "Remove loggers"
This reverts commit 54f91dfcae20488944162335ab4ad8be459df1ab.
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use log level debug for loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Fix linter
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove noisy debug-level logs; add 'reason' label to drop metrics
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove accidentally committed files
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Propagate logger to buildWriteRequest to log dropped data
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix docs comment
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Make drop reason more specific
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove unnecessary pass of logger
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Use snake_case for reason label
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix dropped samples metric
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
---------
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
* fix(discovery): allow requireUpdate util to timeout in discovery/file/file_test.go.
The loop ran indefinitely if the condition isn't met.
Before, each iteration created a new timer channel which was always outpaced by
the other timer channel with smaller duration.
minor detail: There was a memory leak: resources of the ~10 previous timers were
constantly kept. With the fix, we may keep the resources of one timer around for defaultWait
but this isn't worth the changes to make it right.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Merge pull request #13371 from kevinmingtarja/fix-isHeatmapData
ui: fix handling of scalar and string in isHeatmapData
* tsdb/{index,compact}: allow using custom postings encoding format (#13242)
* tsdb/{index,compact}: allow using custom postings encoding format
We would like to experiment with a different postings encoding format in
Thanos so in this change I am proposing adding another argument to
`NewWriter` which would allow users to change the format if needed.
Also, wire the leveled compactor so that it would be possible to change
the format there too.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb/compact: use a struct for leveled compactor options
As discussed on Slack, let's use a struct for the options in leveled
compactor.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: make changes after Bryan's review
- Make changes less intrusive
- Turn the postings encoder type into a function
- Add NewWriterWithEncoder()
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
---------
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0-rc.2
Signed-off-by: bwplotka <bwplotka@gmail.com>
* build(deps): bump actions/setup-go from 3.5.0 to 5.0.0 in /scripts (#13362)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3.5.0 to 5.0.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/6edd4406fa81c3da01a34fa6f6343087c207a568...0c52d547c9bc32b1aa3301fd7a9cb496313a4491)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump github/codeql-action from 2.22.8 to 3.22.12 (#13358)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.8 to 3.22.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/407ffafae6a767df3e0230c3df91b6443ae8df75...012739e5082ff0c22ca6d6ab32e07c36df03c4a4)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* put @nexucis has a release shepherd (#13383)
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
* Add analyze histograms command to promtool (#12331)
Add `query analyze` command to promtool
This command analyzes the buckets of classic and native histograms,
based on data queried from the Prometheus query API, i.e. it
doesn't require direct access to the TSDB files.
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* included instance in all necessary descriptions
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* tsdb/compact: fix passing merge func
Fixing a very small logical problem I've introduced :(.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: add enable overlapping compaction
This functionality is needed in downstream projects because they have a
separate component that does compaction.
Upstreaming
https://github.com/grafana/mimir-prometheus/blob/7c8e9a2a76fc729e9078889782928b2fdfe240e9/tsdb/compact.go#L323-L325.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* promtool: allow setting multiple matchers to "promtool tsdb dump" command. (#13296)
Conditions are ANDed inside the same matcher but matchers are ORed
Including unit tests for "promtool tsdb dump".
Refactor some matchers scraping utils.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Fixed changelog
Signed-off-by: bwplotka <bwplotka@gmail.com>
* tsdb/main: wire "EnableOverlappingCompaction" to tsdb.Options (#13398)
This added the https://github.com/prometheus/prometheus/pull/13393
"EnableOverlappingCompaction" parameter to the compactor code but not to
the tsdb.Options. I forgot about that. Add it to `tsdb.Options` too and
set it to `true` in Prometheus.
Copy/paste the description from
https://github.com/prometheus/prometheus/pull/13393#issuecomment-1891787986
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Issue #13268: fix quality value in accept header
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
* Cut 2.49.1 with scrape q= bugfix.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Cut 2.49.1 web package.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Restore more efficient version of NewPossibleNonCounterInfo annotation (#13022)
Restore more efficient version of NewPossibleNonCounterInfo annotation
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Fix regressions introduced by #13242
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* fix slice copy in 1.20 (#13389)
The slices package is added to the standard library in Go 1.21;
we need to import from the exp area to maintain compatibility with Go 1.20.
Signed-off-by: tyltr <tylitianrui@126.com>
* Docs: Query Basics: link to rate (#10538)
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
* chore(kubernetes): check preconditions earlier and avoid unnecessary checks or iterations
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Examples: link to `rate` for new users (#10535)
* Examples: link to `rate` for new users
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
* promql: use natural sort in sort_by_label and sort_by_label_desc (#13411)
These functions are intended for humans, as robots can already sort the results
however they please. Humans like things sorted "naturally":
* https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
A similar thing has been done to Grafana, which is also used by humans:
* https://github.com/grafana/grafana/pull/78024
* https://github.com/grafana/grafana/pull/78494
Signed-off-by: Ivan Babrou <github@ivan.computer>
* TestLabelValuesWithMatchers: Add test case
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* remove obsolete build tag
Signed-off-by: tyltr <tylitianrui@126.com>
* Upgrade some golang dependencies for resty 2.11
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
* Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config (#13222)
Native Histograms: support native_histogram_min_bucket_factor in scrape_config
---------
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram (#13392)
Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Minor fixes to otlp vendor update script
Signed-off-by: Goutham <gouthamve@gmail.com>
* build(deps): bump github.com/hetznercloud/hcloud-go/v2
Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.4.0 to 2.6.0.
- [Release notes](https://github.com/hetznercloud/hcloud-go/releases)
- [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.4.0...v2.6.0)
---
updated-dependencies:
- dependency-name: github.com/hetznercloud/hcloud-go/v2
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* Enhanced visibility for `promtool test rules` with JSON colored formatting (#13342)
* Added diff flag for unit test to improvise readability & debugging
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Removed blank spaces
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed linting error
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Added cli flags to documentation
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Revert unrrelated linting fixes
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed review suggestions
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Cleanup
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
---------
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* storage: skip merging when no remote storage configured
Prometheus is hard-coded to use a fanout storage between TSDB and
a remote storage which by default is empty.
This change detects the empty storage and skips merging between
result sets, which would make `Select()` sort results.
Bottom line: we skip a sort unless there really is some remote storage
configured.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Remove csmarchbanks from remote write owners (#13432)
I have not had the time to keep up with remote write and have no plans
to work on it in the near future so I am withdrawing my maintainership
of that part of the codebase. I continue to focus on client_python.
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
* add more context cancellation check at evaluation time
Signed-off-by: Ben Ye <benye@amazon.com>
* Optimize label values with matchers by taking shortcuts (#13426)
Don't calculate postings beforehand: we may not need them. If all
matchers are for the requested label, we can just filter its values.
Also, if there are no values at all, no need to run any kind of
logic.
Also add more labelValuesWithMatchers benchmarks
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Add automatic memory limit handling
Enable automatic detection of memory limits and configure GOMEMLIMIT to
match.
* Also includes a flag to allow controlling the reserved ratio.
Signed-off-by: SuperQ <superq@gmail.com>
* Update OSSF badge link (#13433)
Provide a more user friendly interface
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
* SD Managers taking over responsibility for registration of debug metrics (#13375)
SD Managers take over responsibility for SD metrics registration
---------
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Optimize histogram iterators (#13340)
Optimize histogram iterators
Histogram iterators allocate new objects in the AtHistogram and
AtFloatHistogram methods, which makes calculating rates over long
ranges expensive.
In #13215 we allowed an existing object to be reused
when converting an integer histogram to a float histogram. This commit follows
the same idea and allows injecting an existing object in the AtHistogram and
AtFloatHistogram methods. When the injected value is nil, iterators allocate
new histograms, otherwise they populate and return the injected object.
The commit also adds a CopyTo method to Histogram and FloatHistogram which
is used in the BufferedIterator to overwrite items in the ring instead of making
new copies.
Note that a specialized HPoint pool is needed for all of this to work
(`matrixSelectorHPool`).
---------
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
* doc: Mark `mad_over_time` as experimental (#13440)
We forgot to do that in
https://github.com/prometheus/prometheus/pull/13059
Signed-off-by: beorn7 <beorn@grafana.com>
* Change metric label for Puppetdb from 'http' to 'puppetdb'
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
* mirror metrics.proto change & generate code
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
* TestHeadLabelValuesWithMatchers: Add test case (#13414)
Add test case to TestHeadLabelValuesWithMatchers, while fixing a couple
of typos in other test cases. Also enclosing some implicit sub-tests in a
`t.Run` call to make them explicitly sub-tests.
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* update all go dependencies (#13438)
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
* build(deps): bump the k8s-io group with 2 updates (#13454)
Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).
Updates `k8s.io/api` from 0.28.4 to 0.29.1
- [Commits](https://github.com/kubernetes/api/compare/v0.28.4...v0.29.1)
Updates `k8s.io/client-go` from 0.28.4 to 0.29.1
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.4...v0.29.1)
---
updated-dependencies:
- dependency-name: k8s.io/api
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
- dependency-name: k8s.io/client-go
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump the go-opentelemetry-io group with 1 update (#13453)
Bumps the go-opentelemetry-io group with 1 update: [go.opentelemetry.io/collector/semconv](https://github.com/open-telemetry/opentelemetry-collector).
Updates `go.opentelemetry.io/collector/semconv` from 0.92.0 to 0.93.0
- [Release notes](https://github.com/open-telemetry/opentelemetry-collector/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-collector/blob/main/CHANGELOG-API.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-collector/compare/v0.92.0...v0.93.0)
---
updated-dependencies:
- dependency-name: go.opentelemetry.io/collector/semconv
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: go-opentelemetry-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump actions/upload-artifact from 3.1.3 to 4.0.0 (#13355)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump bufbuild/buf-push-action (#13357)
Bumps [bufbuild/buf-push-action](https://github.com/bufbuild/buf-push-action) from 342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 to a654ff18effe4641ebea4a4ce242c49800728459.
- [Release notes](https://github.com/bufbuild/buf-push-action/releases)
- [Commits](https://github.com/bufbuild/buf-push-action/compare/342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1...a654ff18effe4641ebea4a4ce242c49800728459)
---
updated-dependencies:
- dependency-name: bufbuild/buf-push-action
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* Labels: Add DropMetricName function, used in PromQL (#13446)
This function is called very frequently when executing PromQL functions,
and we can do it much more efficiently inside Labels.
In the common case that `__name__` comes first in the labels, we simply
re-point to start at the next label, which is nearly free.
`DropMetricName` is now so cheap I removed the cache - benchmarks show
everything still goes faster.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: simplify internal series delete function (#13261)
Lifting an optimisation from Agent code, `seriesHashmap.del` can use
the unique series reference, doesn't need to check Labels.
Also streamline the logic for deleting from `unique` and `conflicts` maps,
and add some comments to help the next person.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* otlptranslator/update-copy.sh: Fix sed command lines
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Rollback k8s.io requirements (#13462)
Rollback k8s.io Go modules to v0.28.6 to avoid forcing upgrade of Go to
1.21. This allows us to keep compatibility with the currently supported
upstream Go releases.
Signed-off-by: SuperQ <superq@gmail.com>
* Make update-copy.sh work for both OSX and GNU sed
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Name @beorn7 and @krajorama as maintainers for native histograms
I have been the de-facto maintainer for native histograms from the
beginning. So let's put this into MAINTAINERS.md.
In addition, I hereby proposose George Krajcsovits AKA Krajo as a
co-maintainer. He has contributed a lot of native histogram code, but
more importantly, he has contributed substantially to reviewing other
contributors' native histogram code, up to a point where I was merely
rubberstamping the PRs he had already reviewed. I'm confident that he
is ready to to be granted commit rights as outlined in the
"Maintainers" section of the governance:
https://prometheus.io/governance/#maintainers
According to the same section of the governance, I will announce the
proposed change on the developers mailing list and will give some time
for lazy consensus before merging this PR.
Signed-off-by: beorn7 <beorn@grafana.com>
* ui/fix: correct url handling for stacked graphs (#13460)
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
* tsdb: use cheaper Mutex on series
Mutex is 8 bytes; RWMutex is 24 bytes and much more complicated. Since
`RLock` is only used in two places, `UpdateMetadata` and `Delete`,
neither of which are hotspots, we should use the cheaper one.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Fix last_over_time for native histograms
The last_over_time retains a histogram sample without making a copy.
This sample is now coming from the buffered iterator used for windowing functions,
and can be reused for reading subsequent samples as the iterator progresses.
I would propose copying the sample in the last_over_time function, similar to
how it is done for rate, sum_over_time and others.
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
* Implementation
NOTE:
Rebased from main after refactor in #13014
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Add feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactor concurrency control
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Optimising dependencies/dependents funcs to not produce new slices each request
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Rename flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring for performance, and to allow controller to be overridden
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Block until all rules, both sync & async, have completed evaluating
Updated & added tests
Review feedback nits
Return empty map if not indeterminate
Use highWatermark to track inflight requests counter
Appease the linter
Clarify feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Fix typo in CLI flag description
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Fixed auto-generated doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improve doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Simplify the design to update concurrency controller once the rule evaluation has done
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Add more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Added more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improved RuleConcurrencyController interface doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Introduced sequentialRuleEvalController
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Remove superfluous nil check in Group.metrics
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* api: Serialize discovered and target labels into JSON directly (#13469)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* api: Serialize discovered labels into JSON directly in dropped targets (#13484)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* Add ShardedPostings() support to TSDB (#10421)
This PR is a reference implementation of the proposal described in #10420.
In addition to what described in #10420, in this PR I've introduced labels.StableHash(). The idea is to offer an hashing function which doesn't change over time, and that's used by query sharding in order to get a stable behaviour over time. The implementation of labels.StableHash() is the hashing function used by Prometheus before stringlabels, and what's used by Grafana Mimir for query sharding (because built before stringlabels was a thing).
Follow up work
As mentioned in #10420, if this PR is accepted I'm also open to upload another foundamental piece used by Grafana Mimir query sharding to accelerate the query execution: an optional, configurable and fast in-memory cache for the series hashes.
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* storage/remote: document why two benchmarks are skipped
One was silently doing nothing; one was doing something but the work
didn't go up linearly with iteration count.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Pod status changes not discovered by Kube Endpoints SD (#13337)
* fix(discovery/kubernetes/endpoints): react to changes on Pods because some modifications can occur on them without triggering an update on the related Endpoints (The Pod phase changing from Pending to Running e.g.).
---------
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
* Small improvements, add const, remove copypasta (#8106)
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
* Proposal to improve FPointSlice and HPointSlice allocation. (#13448)
* Reusing points slice from previous series when the slice is under utilized
* Adding comments on the bench test
Signed-off-by: Alan Protasio <alanprot@gmail.com>
* lint
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
* go mod tidy
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
---------
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
Signed-off-by: Erik Sommer <ersotech@posteo.de>
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
Signed-off-by: bwplotka <bwplotka@gmail.com>
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: tyltr <tylitianrui@126.com>
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Signed-off-by: Ivan Babrou <github@ivan.computer>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Signed-off-by: Goutham <gouthamve@gmail.com>
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
Signed-off-by: Ben Ye <benye@amazon.com>
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Signed-off-by: beorn7 <beorn@grafana.com>
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
Signed-off-by: Alan Protasio <alanprot@gmail.com>
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
Co-authored-by: Julian Wiedmann <jwi@linux.ibm.com>
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
Co-authored-by: Erik Sommer <ersotech@posteo.de>
Co-authored-by: Linas Medziunas <linas.medziunas@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Co-authored-by: Arianna Vespri <arianna.vespri@yahoo.it>
Co-authored-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com>
Co-authored-by: Daniel Kerbel <nmdanny@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jan Fajerski <jfajersk@redhat.com>
Co-authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Marc Tudurí <marctc@protonmail.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Co-authored-by: Augustin Husson <husson.augustin@gmail.com>
Co-authored-by: Björn Rabenstein <beorn@grafana.com>
Co-authored-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
Co-authored-by: Ben Kochie <superq@gmail.com>
Co-authored-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: tyltr <tylitianrui@126.com>
Co-authored-by: Ted Robertson <10043369+tredondo@users.noreply.github.com>
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
Co-authored-by: Matthias Loibl <mail@matthiasloibl.com>
Co-authored-by: Ivan Babrou <github@ivan.computer>
Co-authored-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: Israel Blancas <iblancasa@gmail.com>
Co-authored-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Goutham <gouthamve@gmail.com>
Co-authored-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Co-authored-by: Chris Marchbanks <csmarchbanks@gmail.com>
Co-authored-by: Ben Ye <benye@amazon.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Co-authored-by: Paulin Todev <paulin.todev@gmail.com>
Co-authored-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
Co-authored-by: Mikhail Fesenko <proggga@gmail.com>
Co-authored-by: Alan Protasio <alanprot@gmail.com>
2024-02-02 10:38:50 -08:00
db . compactor , err = NewLeveledCompactorWithOptions ( ctx , r , l , rngs , db . chunkPool , LeveledCompactorOptions {
MaxBlockChunkSegmentSize : opts . MaxBlockChunkSegmentSize ,
EnableOverlappingCompaction : opts . EnableOverlappingCompaction ,
} )
2020-10-28 03:09:03 -07:00
if err != nil {
2019-02-06 04:07:35 -08:00
cancel ( )
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "create leveled compactor: %w" , err )
2017-07-07 04:46:41 -07:00
}
2019-02-06 04:07:35 -08:00
db . compactCancel = cancel
2017-07-07 04:46:41 -07:00
2022-10-10 08:08:46 -07:00
var wal , wbl * wlog . WL
segmentSize := wlog . DefaultSegmentSize
2019-03-25 16:38:12 -07:00
// Wal is enabled.
if opts . WALSegmentSize >= 0 {
// Wal is set to a custom size.
if opts . WALSegmentSize > 0 {
2020-02-11 08:34:09 -08:00
segmentSize = opts . WALSegmentSize
2019-03-25 16:38:12 -07:00
}
2022-10-10 08:08:46 -07:00
wal , err = wlog . NewSize ( l , r , walDir , segmentSize , opts . WALCompression )
2020-10-28 03:09:03 -07:00
if err != nil {
return nil , err
2019-03-25 16:38:12 -07:00
}
2022-09-20 10:05:50 -07:00
// Check if there is a WBL on disk, in which case we should replay that data.
wblSize , err := fileutil . DirSize ( wblDir )
if err != nil && ! os . IsNotExist ( err ) {
return nil , err
}
if opts . OutOfOrderTimeWindow > 0 || wblSize > 0 {
2022-10-10 08:08:46 -07:00
wbl , err = wlog . NewSize ( l , r , wblDir , segmentSize , opts . WALCompression )
2022-09-20 10:05:50 -07:00
if err != nil {
return nil , err
}
}
2017-08-28 15:39:17 -07:00
}
2022-09-20 10:05:50 -07:00
db . oooWasEnabled . Store ( opts . OutOfOrderTimeWindow > 0 )
2021-02-09 06:12:48 -08:00
headOpts := DefaultHeadOptions ( )
headOpts . ChunkRange = rngs [ 0 ]
headOpts . ChunkDirRoot = dir
headOpts . ChunkPool = db . chunkPool
headOpts . ChunkWriteBufferSize = opts . HeadChunksWriteBufferSize
2022-01-10 05:36:45 -08:00
headOpts . ChunkWriteQueueSize = opts . HeadChunksWriteQueueSize
2023-04-12 09:48:35 -07:00
headOpts . SamplesPerChunk = opts . SamplesPerChunk
2021-02-09 06:12:48 -08:00
headOpts . StripeSize = opts . StripeSize
headOpts . SeriesCallback = opts . SeriesLifecycleCallback
2021-07-19 21:52:57 -07:00
headOpts . EnableExemplarStorage = opts . EnableExemplarStorage
headOpts . MaxExemplars . Store ( opts . MaxExemplars )
2021-08-06 09:51:01 -07:00
headOpts . EnableMemorySnapshotOnShutdown = opts . EnableMemorySnapshotOnShutdown
2022-09-14 05:08:34 -07:00
headOpts . EnableNativeHistograms . Store ( opts . EnableNativeHistograms )
2022-09-20 10:05:50 -07:00
headOpts . OutOfOrderTimeWindow . Store ( opts . OutOfOrderTimeWindow )
headOpts . OutOfOrderCapMax . Store ( opts . OutOfOrderCapMax )
remote write 2.0: sync with `main` branch (#13510)
* consoles: exclude iowait and steal from CPU Utilisation
'iowait' and 'steal' indicate specific idle/wait states, which shouldn't
be counted into CPU Utilisation. Also see
https://github.com/prometheus-operator/kube-prometheus/pull/796 and
https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667.
Per the iostat man page:
%idle
Show the percentage of time that the CPU or CPUs were idle and the
system did not have an outstanding disk I/O request.
%iowait
Show the percentage of time that the CPU or CPUs were idle during
which the system had an outstanding disk I/O request.
%steal
Show the percentage of time spent in involuntary wait by the
virtual CPU or CPUs while the hypervisor was servicing another
virtual processor.
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
* tsdb: shrink txRing with smaller integers
4 billion active transactions ought to be enough for anyone.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: create isolation transaction slice on demand
When Prometheus restarts it creates every series read in from the WAL,
but many of those series will be finished, and never receive any more
samples. By defering allocation of the txRing slice to when it is first
needed, we save 32 bytes per stale series.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* add cluster variable to Overview dashboard
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* promql: simplify Native Histogram arithmetics
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
* Cut 2.49.0-rc.0 (#13270)
* Cut 2.49.0-rc.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Removed the duplicate.
Signed-off-by: bwplotka <bwplotka@gmail.com>
---------
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Add unit protobuf parser
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Go on adding protobuf parsing for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* ui: create a reproduction for https://github.com/prometheus/prometheus/issues/13292
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Get conditional right
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Get VM Scale Set NIC (#13283)
Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set
VM NIC, because these use a different Resource ID format.
Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()`
instead. This needs both the scale set name and the instance ID, so
add an `InstanceID` field to the `virtualMachine` struct. `InstanceID`
is empty for a VM that isn't a ScaleSetVM.
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
* Cut v2.49.0-rc.1
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Delete debugging lines, amend error message for unit
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Correct order in error message
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* Consider storage.ErrTooOldSample as non-retryable
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
* scrape_test.go: Increase scrape interval in TestScrapeLoopCache to reduce potential flakiness
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Avoid creating string for suffix, consider counters without _total suffix
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
* build(deps): bump github.com/prometheus/client_golang
Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.17.0 to 1.18.0.
- [Release notes](https://github.com/prometheus/client_golang/releases)
- [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/client_golang/compare/v1.17.0...v1.18.0)
---
updated-dependencies:
- dependency-name: github.com/prometheus/client_golang
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump actions/setup-node from 3.8.1 to 4.0.1
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3.8.1 to 4.0.1.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d...b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8)
---
updated-dependencies:
- dependency-name: actions/setup-node
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
* scripts: sort file list in embed directive
Otherwise the resulting string depends on find, which afaict depends on
the underlying filesystem. A stable file list make it easier to detect
UI changes in downstreams that need to track UI assets.
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
* Fix DataTableProps['data'] for resultType string
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* Fix handling of scalar and string in isHeatmapData
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
* build(deps): bump github.com/influxdata/influxdb
Bumps [github.com/influxdata/influxdb](https://github.com/influxdata/influxdb) from 1.11.2 to 1.11.4.
- [Release notes](https://github.com/influxdata/influxdb/releases)
- [Commits](https://github.com/influxdata/influxdb/compare/v1.11.2...v1.11.4)
---
updated-dependencies:
- dependency-name: github.com/influxdata/influxdb
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* build(deps): bump github.com/prometheus/prometheus
Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.48.0 to 0.48.1.
- [Release notes](https://github.com/prometheus/prometheus/releases)
- [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prometheus/prometheus/compare/v0.48.0...v0.48.1)
---
updated-dependencies:
- dependency-name: github.com/prometheus/prometheus
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
* Bump client_golang to v1.18.0 (#13373)
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Drop old inmemory samples (#13002)
* Drop old inmemory samples
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Avoid copying timeseries when the feature is disabled
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Run gofmt
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Clarify docs
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Add more logging info
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Remove loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* optimize function and add tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Simplify filter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Update help info from metrics
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use metrics to keep track of drop elements during buildWriteRequest
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* rename var in tests
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* pass time.Now as parameter
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Change buildwriterequest during retries
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Revert "Remove loggers"
This reverts commit 54f91dfcae20488944162335ab4ad8be459df1ab.
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* use log level debug for loggers
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
* Fix linter
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove noisy debug-level logs; add 'reason' label to drop metrics
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove accidentally committed files
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Propagate logger to buildWriteRequest to log dropped data
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix docs comment
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Make drop reason more specific
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Remove unnecessary pass of logger
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Use snake_case for reason label
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
* Fix dropped samples metric
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
---------
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
* fix(discovery): allow requireUpdate util to timeout in discovery/file/file_test.go.
The loop ran indefinitely if the condition isn't met.
Before, each iteration created a new timer channel which was always outpaced by
the other timer channel with smaller duration.
minor detail: There was a memory leak: resources of the ~10 previous timers were
constantly kept. With the fix, we may keep the resources of one timer around for defaultWait
but this isn't worth the changes to make it right.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Merge pull request #13371 from kevinmingtarja/fix-isHeatmapData
ui: fix handling of scalar and string in isHeatmapData
* tsdb/{index,compact}: allow using custom postings encoding format (#13242)
* tsdb/{index,compact}: allow using custom postings encoding format
We would like to experiment with a different postings encoding format in
Thanos so in this change I am proposing adding another argument to
`NewWriter` which would allow users to change the format if needed.
Also, wire the leveled compactor so that it would be possible to change
the format there too.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb/compact: use a struct for leveled compactor options
As discussed on Slack, let's use a struct for the options in leveled
compactor.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: make changes after Bryan's review
- Make changes less intrusive
- Turn the postings encoder type into a function
- Add NewWriterWithEncoder()
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
---------
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0-rc.2
Signed-off-by: bwplotka <bwplotka@gmail.com>
* build(deps): bump actions/setup-go from 3.5.0 to 5.0.0 in /scripts (#13362)
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3.5.0 to 5.0.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/6edd4406fa81c3da01a34fa6f6343087c207a568...0c52d547c9bc32b1aa3301fd7a9cb496313a4491)
---
updated-dependencies:
- dependency-name: actions/setup-go
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump github/codeql-action from 2.22.8 to 3.22.12 (#13358)
Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.8 to 3.22.12.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/407ffafae6a767df3e0230c3df91b6443ae8df75...012739e5082ff0c22ca6d6ab32e07c36df03c4a4)
---
updated-dependencies:
- dependency-name: github/codeql-action
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* put @nexucis has a release shepherd (#13383)
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
* Add analyze histograms command to promtool (#12331)
Add `query analyze` command to promtool
This command analyzes the buckets of classic and native histograms,
based on data queried from the Prometheus query API, i.e. it
doesn't require direct access to the TSDB files.
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* included instance in all necessary descriptions
Signed-off-by: Erik Sommer <ersotech@posteo.de>
* tsdb/compact: fix passing merge func
Fixing a very small logical problem I've introduced :(.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* tsdb: add enable overlapping compaction
This functionality is needed in downstream projects because they have a
separate component that does compaction.
Upstreaming
https://github.com/grafana/mimir-prometheus/blob/7c8e9a2a76fc729e9078889782928b2fdfe240e9/tsdb/compact.go#L323-L325.
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Cut 2.49.0
Signed-off-by: bwplotka <bwplotka@gmail.com>
* promtool: allow setting multiple matchers to "promtool tsdb dump" command. (#13296)
Conditions are ANDed inside the same matcher but matchers are ORed
Including unit tests for "promtool tsdb dump".
Refactor some matchers scraping utils.
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Fixed changelog
Signed-off-by: bwplotka <bwplotka@gmail.com>
* tsdb/main: wire "EnableOverlappingCompaction" to tsdb.Options (#13398)
This added the https://github.com/prometheus/prometheus/pull/13393
"EnableOverlappingCompaction" parameter to the compactor code but not to
the tsdb.Options. I forgot about that. Add it to `tsdb.Options` too and
set it to `true` in Prometheus.
Copy/paste the description from
https://github.com/prometheus/prometheus/pull/13393#issuecomment-1891787986
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
* Issue #13268: fix quality value in accept header
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
* Cut 2.49.1 with scrape q= bugfix.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Cut 2.49.1 web package.
Signed-off-by: bwplotka <bwplotka@gmail.com>
* Restore more efficient version of NewPossibleNonCounterInfo annotation (#13022)
Restore more efficient version of NewPossibleNonCounterInfo annotation
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Fix regressions introduced by #13242
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* fix slice copy in 1.20 (#13389)
The slices package is added to the standard library in Go 1.21;
we need to import from the exp area to maintain compatibility with Go 1.20.
Signed-off-by: tyltr <tylitianrui@126.com>
* Docs: Query Basics: link to rate (#10538)
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
* chore(kubernetes): check preconditions earlier and avoid unnecessary checks or iterations
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
* Examples: link to `rate` for new users (#10535)
* Examples: link to `rate` for new users
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
* promql: use natural sort in sort_by_label and sort_by_label_desc (#13411)
These functions are intended for humans, as robots can already sort the results
however they please. Humans like things sorted "naturally":
* https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
A similar thing has been done to Grafana, which is also used by humans:
* https://github.com/grafana/grafana/pull/78024
* https://github.com/grafana/grafana/pull/78494
Signed-off-by: Ivan Babrou <github@ivan.computer>
* TestLabelValuesWithMatchers: Add test case
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* remove obsolete build tag
Signed-off-by: tyltr <tylitianrui@126.com>
* Upgrade some golang dependencies for resty 2.11
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
* Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config (#13222)
Native Histograms: support native_histogram_min_bucket_factor in scrape_config
---------
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram (#13392)
Add warnings for histogramRate applied with isCounter not matching counter/gauge histogram
---------
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
* Minor fixes to otlp vendor update script
Signed-off-by: Goutham <gouthamve@gmail.com>
* build(deps): bump github.com/hetznercloud/hcloud-go/v2
Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.4.0 to 2.6.0.
- [Release notes](https://github.com/hetznercloud/hcloud-go/releases)
- [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.4.0...v2.6.0)
---
updated-dependencies:
- dependency-name: github.com/hetznercloud/hcloud-go/v2
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* Enhanced visibility for `promtool test rules` with JSON colored formatting (#13342)
* Added diff flag for unit test to improvise readability & debugging
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Removed blank spaces
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed linting error
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Added cli flags to documentation
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Revert unrrelated linting fixes
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Fixed review suggestions
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Cleanup
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* Updated flag description
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
---------
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
* storage: skip merging when no remote storage configured
Prometheus is hard-coded to use a fanout storage between TSDB and
a remote storage which by default is empty.
This change detects the empty storage and skips merging between
result sets, which would make `Select()` sort results.
Bottom line: we skip a sort unless there really is some remote storage
configured.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Remove csmarchbanks from remote write owners (#13432)
I have not had the time to keep up with remote write and have no plans
to work on it in the near future so I am withdrawing my maintainership
of that part of the codebase. I continue to focus on client_python.
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
* add more context cancellation check at evaluation time
Signed-off-by: Ben Ye <benye@amazon.com>
* Optimize label values with matchers by taking shortcuts (#13426)
Don't calculate postings beforehand: we may not need them. If all
matchers are for the requested label, we can just filter its values.
Also, if there are no values at all, no need to run any kind of
logic.
Also add more labelValuesWithMatchers benchmarks
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Add automatic memory limit handling
Enable automatic detection of memory limits and configure GOMEMLIMIT to
match.
* Also includes a flag to allow controlling the reserved ratio.
Signed-off-by: SuperQ <superq@gmail.com>
* Update OSSF badge link (#13433)
Provide a more user friendly interface
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
* SD Managers taking over responsibility for registration of debug metrics (#13375)
SD Managers take over responsibility for SD metrics registration
---------
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
* Optimize histogram iterators (#13340)
Optimize histogram iterators
Histogram iterators allocate new objects in the AtHistogram and
AtFloatHistogram methods, which makes calculating rates over long
ranges expensive.
In #13215 we allowed an existing object to be reused
when converting an integer histogram to a float histogram. This commit follows
the same idea and allows injecting an existing object in the AtHistogram and
AtFloatHistogram methods. When the injected value is nil, iterators allocate
new histograms, otherwise they populate and return the injected object.
The commit also adds a CopyTo method to Histogram and FloatHistogram which
is used in the BufferedIterator to overwrite items in the ring instead of making
new copies.
Note that a specialized HPoint pool is needed for all of this to work
(`matrixSelectorHPool`).
---------
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
* doc: Mark `mad_over_time` as experimental (#13440)
We forgot to do that in
https://github.com/prometheus/prometheus/pull/13059
Signed-off-by: beorn7 <beorn@grafana.com>
* Change metric label for Puppetdb from 'http' to 'puppetdb'
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
* mirror metrics.proto change & generate code
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
* TestHeadLabelValuesWithMatchers: Add test case (#13414)
Add test case to TestHeadLabelValuesWithMatchers, while fixing a couple
of typos in other test cases. Also enclosing some implicit sub-tests in a
`t.Run` call to make them explicitly sub-tests.
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* update all go dependencies (#13438)
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
* build(deps): bump the k8s-io group with 2 updates (#13454)
Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).
Updates `k8s.io/api` from 0.28.4 to 0.29.1
- [Commits](https://github.com/kubernetes/api/compare/v0.28.4...v0.29.1)
Updates `k8s.io/client-go` from 0.28.4 to 0.29.1
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.4...v0.29.1)
---
updated-dependencies:
- dependency-name: k8s.io/api
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
- dependency-name: k8s.io/client-go
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: k8s-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump the go-opentelemetry-io group with 1 update (#13453)
Bumps the go-opentelemetry-io group with 1 update: [go.opentelemetry.io/collector/semconv](https://github.com/open-telemetry/opentelemetry-collector).
Updates `go.opentelemetry.io/collector/semconv` from 0.92.0 to 0.93.0
- [Release notes](https://github.com/open-telemetry/opentelemetry-collector/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-collector/blob/main/CHANGELOG-API.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-collector/compare/v0.92.0...v0.93.0)
---
updated-dependencies:
- dependency-name: go.opentelemetry.io/collector/semconv
dependency-type: direct:production
update-type: version-update:semver-minor
dependency-group: go-opentelemetry-io
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump actions/upload-artifact from 3.1.3 to 4.0.0 (#13355)
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/a8a3f3ad30e3422c9c7b888a15615d19a852ae32...c7d193f32edcb7bfad88892161225aeda64e9392)
---
updated-dependencies:
- dependency-name: actions/upload-artifact
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* build(deps): bump bufbuild/buf-push-action (#13357)
Bumps [bufbuild/buf-push-action](https://github.com/bufbuild/buf-push-action) from 342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 to a654ff18effe4641ebea4a4ce242c49800728459.
- [Release notes](https://github.com/bufbuild/buf-push-action/releases)
- [Commits](https://github.com/bufbuild/buf-push-action/compare/342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1...a654ff18effe4641ebea4a4ce242c49800728459)
---
updated-dependencies:
- dependency-name: bufbuild/buf-push-action
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
* Labels: Add DropMetricName function, used in PromQL (#13446)
This function is called very frequently when executing PromQL functions,
and we can do it much more efficiently inside Labels.
In the common case that `__name__` comes first in the labels, we simply
re-point to start at the next label, which is nearly free.
`DropMetricName` is now so cheap I removed the cache - benchmarks show
everything still goes faster.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* tsdb: simplify internal series delete function (#13261)
Lifting an optimisation from Agent code, `seriesHashmap.del` can use
the unique series reference, doesn't need to check Labels.
Also streamline the logic for deleting from `unique` and `conflicts` maps,
and add some comments to help the next person.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* otlptranslator/update-copy.sh: Fix sed command lines
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Rollback k8s.io requirements (#13462)
Rollback k8s.io Go modules to v0.28.6 to avoid forcing upgrade of Go to
1.21. This allows us to keep compatibility with the currently supported
upstream Go releases.
Signed-off-by: SuperQ <superq@gmail.com>
* Make update-copy.sh work for both OSX and GNU sed
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
* Name @beorn7 and @krajorama as maintainers for native histograms
I have been the de-facto maintainer for native histograms from the
beginning. So let's put this into MAINTAINERS.md.
In addition, I hereby proposose George Krajcsovits AKA Krajo as a
co-maintainer. He has contributed a lot of native histogram code, but
more importantly, he has contributed substantially to reviewing other
contributors' native histogram code, up to a point where I was merely
rubberstamping the PRs he had already reviewed. I'm confident that he
is ready to to be granted commit rights as outlined in the
"Maintainers" section of the governance:
https://prometheus.io/governance/#maintainers
According to the same section of the governance, I will announce the
proposed change on the developers mailing list and will give some time
for lazy consensus before merging this PR.
Signed-off-by: beorn7 <beorn@grafana.com>
* ui/fix: correct url handling for stacked graphs (#13460)
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
* tsdb: use cheaper Mutex on series
Mutex is 8 bytes; RWMutex is 24 bytes and much more complicated. Since
`RLock` is only used in two places, `UpdateMetadata` and `Delete`,
neither of which are hotspots, we should use the cheaper one.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Fix last_over_time for native histograms
The last_over_time retains a histogram sample without making a copy.
This sample is now coming from the buffered iterator used for windowing functions,
and can be reused for reading subsequent samples as the iterator progresses.
I would propose copying the sample in the last_over_time function, similar to
how it is done for rate, sum_over_time and others.
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
* Implementation
NOTE:
Rebased from main after refactor in #13014
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Add feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactor concurrency control
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Optimising dependencies/dependents funcs to not produce new slices each request
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Rename flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Refactoring for performance, and to allow controller to be overridden
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Block until all rules, both sync & async, have completed evaluating
Updated & added tests
Review feedback nits
Return empty map if not indeterminate
Use highWatermark to track inflight requests counter
Appease the linter
Clarify feature flag
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
* Fix typo in CLI flag description
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Fixed auto-generated doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improve doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Simplify the design to update concurrency controller once the rule evaluation has done
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Add more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Added more test cases to TestDependenciesEdgeCases
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Improved RuleConcurrencyController interface doc
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Introduced sequentialRuleEvalController
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* Remove superfluous nil check in Group.metrics
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* api: Serialize discovered and target labels into JSON directly (#13469)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* api: Serialize discovered labels into JSON directly in dropped targets (#13484)
Converted maps into labels.Labels to avoid a lot of copying of data which leads to very high memory consumption while opening the /service-discovery endpoint in the Prometheus UI
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
* Add ShardedPostings() support to TSDB (#10421)
This PR is a reference implementation of the proposal described in #10420.
In addition to what described in #10420, in this PR I've introduced labels.StableHash(). The idea is to offer an hashing function which doesn't change over time, and that's used by query sharding in order to get a stable behaviour over time. The implementation of labels.StableHash() is the hashing function used by Prometheus before stringlabels, and what's used by Grafana Mimir for query sharding (because built before stringlabels was a thing).
Follow up work
As mentioned in #10420, if this PR is accepted I'm also open to upload another foundamental piece used by Grafana Mimir query sharding to accelerate the query execution: an optional, configurable and fast in-memory cache for the series hashes.
Signed-off-by: Marco Pracucci <marco@pracucci.com>
* storage/remote: document why two benchmarks are skipped
One was silently doing nothing; one was doing something but the work
didn't go up linearly with iteration count.
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
* Pod status changes not discovered by Kube Endpoints SD (#13337)
* fix(discovery/kubernetes/endpoints): react to changes on Pods because some modifications can occur on them without triggering an update on the related Endpoints (The Pod phase changing from Pending to Running e.g.).
---------
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
* Small improvements, add const, remove copypasta (#8106)
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
* Proposal to improve FPointSlice and HPointSlice allocation. (#13448)
* Reusing points slice from previous series when the slice is under utilized
* Adding comments on the bench test
Signed-off-by: Alan Protasio <alanprot@gmail.com>
* lint
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
* go mod tidy
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
---------
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
Signed-off-by: Erik Sommer <ersotech@posteo.de>
Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
Signed-off-by: bwplotka <bwplotka@gmail.com>
Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
Signed-off-by: machine424 <ayoubmrini424@gmail.com>
Signed-off-by: Daniel Nicholls <daniel.nicholls@resdiary.com>
Signed-off-by: Daniel Kerbel <nmdanny@gmail.com>
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
Signed-off-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Signed-off-by: Marc Tuduri <marctc@protonmail.com>
Signed-off-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Signed-off-by: Augustin Husson <augustin.husson@amadeus.com>
Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: tyltr <tylitianrui@126.com>
Signed-off-by: Ted Robertson 10043369+tredondo@users.noreply.github.com
Signed-off-by: Ivan Babrou <github@ivan.computer>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
Signed-off-by: Israel Blancas <iblancasa@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Signed-off-by: Björn Rabenstein <github@rabenste.in>
Signed-off-by: Goutham <gouthamve@gmail.com>
Signed-off-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
Signed-off-by: Ben Ye <benye@amazon.com>
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
Signed-off-by: beorn7 <beorn@grafana.com>
Signed-off-by: Augustin Husson <husson.augustin@gmail.com>
Signed-off-by: Yury Moladau <yurymolodov@gmail.com>
Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
Signed-off-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Signed-off-by: Mikhail Fesenko <proggga@gmail.com>
Signed-off-by: Jesus Vazquez <jesusvzpg@gmail.com>
Signed-off-by: Alan Protasio <alanprot@gmail.com>
Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
Co-authored-by: Julian Wiedmann <jwi@linux.ibm.com>
Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
Co-authored-by: Erik Sommer <ersotech@posteo.de>
Co-authored-by: Linas Medziunas <linas.medziunas@gmail.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Co-authored-by: Arianna Vespri <arianna.vespri@yahoo.it>
Co-authored-by: machine424 <ayoubmrini424@gmail.com>
Co-authored-by: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com>
Co-authored-by: Daniel Kerbel <nmdanny@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jan Fajerski <jfajersk@redhat.com>
Co-authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Co-authored-by: Paschalis Tsilias <tpaschalis@users.noreply.github.com>
Co-authored-by: Marc Tudurí <marctc@protonmail.com>
Co-authored-by: Paschalis Tsilias <paschalis.tsilias@grafana.com>
Co-authored-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
Co-authored-by: Augustin Husson <husson.augustin@gmail.com>
Co-authored-by: Björn Rabenstein <beorn@grafana.com>
Co-authored-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
Co-authored-by: Ben Kochie <superq@gmail.com>
Co-authored-by: Kumar Kalpadiptya Roy <kalpadiptya.roy@outlook.com>
Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: tyltr <tylitianrui@126.com>
Co-authored-by: Ted Robertson <10043369+tredondo@users.noreply.github.com>
Co-authored-by: Julien Pivotto <roidelapluie@o11y.eu>
Co-authored-by: Matthias Loibl <mail@matthiasloibl.com>
Co-authored-by: Ivan Babrou <github@ivan.computer>
Co-authored-by: Arve Knudsen <arve.knudsen@gmail.com>
Co-authored-by: Israel Blancas <iblancasa@gmail.com>
Co-authored-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
Co-authored-by: Goutham <gouthamve@gmail.com>
Co-authored-by: Rewanth Tammana <22347290+rewanthtammana@users.noreply.github.com>
Co-authored-by: Chris Marchbanks <csmarchbanks@gmail.com>
Co-authored-by: Ben Ye <benye@amazon.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Matthieu MOREL <matthieu.morel35@gmail.com>
Co-authored-by: Paulin Todev <paulin.todev@gmail.com>
Co-authored-by: Filip Petkovski <filip.petkovsky@gmail.com>
Co-authored-by: Yury Molodov <yurymolodov@gmail.com>
Co-authored-by: Danny Kopping <danny.kopping@grafana.com>
Co-authored-by: Leegin <114397475+Leegin-darknight@users.noreply.github.com>
Co-authored-by: Guillermo Sanchez Gavier <gsanchez@newrelic.com>
Co-authored-by: Mikhail Fesenko <proggga@gmail.com>
Co-authored-by: Alan Protasio <alanprot@gmail.com>
2024-02-02 10:38:50 -08:00
headOpts . EnableSharding = opts . EnableSharding
2023-03-07 08:41:33 -08:00
if opts . WALReplayConcurrency > 0 {
headOpts . WALReplayConcurrency = opts . WALReplayConcurrency
}
2021-11-19 02:11:32 -08:00
if opts . IsolationDisabled {
// We only override this flag if isolation is disabled at DB level. We use the default otherwise.
headOpts . IsolationDisabled = opts . IsolationDisabled
}
2022-10-10 08:08:46 -07:00
db . head , err = NewHead ( r , l , wal , wbl , headOpts , stats . Head )
2020-10-28 03:09:03 -07:00
if err != nil {
return nil , err
2017-08-30 08:38:25 -07:00
}
2023-05-15 12:31:49 -07:00
db . head . writeNotified = db . writeNotified
2018-12-04 02:30:49 -08:00
2020-07-04 21:41:42 -07:00
// Register metrics after assigning the head block.
db . metrics = newDBMetrics ( db , r )
maxBytes := opts . MaxBytes
if maxBytes < 0 {
maxBytes = 0
}
db . metrics . maxBytes . Set ( float64 ( maxBytes ) )
2023-10-24 04:34:42 -07:00
db . metrics . retentionDuration . Set ( ( time . Duration ( opts . RetentionDuration ) * time . Millisecond ) . Seconds ( ) )
2020-07-04 21:41:42 -07:00
2018-11-28 01:23:50 -08:00
if err := db . reload ( ) ; err != nil {
return nil , err
}
2018-12-04 02:30:49 -08:00
// Set the min valid time for the ingested samples
// to be no lower than the maxt of the last block.
minValidTime := int64 ( math . MinInt64 )
2022-09-20 10:05:50 -07:00
// We do not consider blocks created from out-of-order samples for Head's minValidTime
// since minValidTime is only for the in-order data and we do not want to discard unnecessary
// samples from the Head.
inOrderMaxTime , ok := db . inOrderBlocksMaxTime ( )
if ok {
minValidTime = inOrderMaxTime
2018-12-04 02:30:49 -08:00
}
2019-06-14 08:39:22 -07:00
if initErr := db . head . Init ( minValidTime ) ; initErr != nil {
db . head . metrics . walCorruptionsTotal . Inc ( )
2023-11-16 10:54:41 -08:00
var e * errLoadWbl
if errors . As ( initErr , & e ) {
2023-10-13 05:21:35 -07:00
level . Warn ( db . logger ) . Log ( "msg" , "Encountered WBL read error, attempting repair" , "err" , initErr )
if err := wbl . Repair ( e . err ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "repair corrupted WBL: %w" , err )
2022-09-20 10:05:50 -07:00
}
2023-10-13 05:21:35 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Successfully repaired WBL" )
2022-09-20 10:05:50 -07:00
} else {
level . Warn ( db . logger ) . Log ( "msg" , "Encountered WAL read error, attempting repair" , "err" , initErr )
2022-10-10 08:08:46 -07:00
if err := wal . Repair ( initErr ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "repair corrupted WAL: %w" , err )
2022-09-20 10:05:50 -07:00
}
2023-03-21 07:03:43 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Successfully repaired WAL" )
2019-06-14 08:39:22 -07:00
}
2018-12-04 02:30:49 -08:00
}
2017-08-28 15:39:17 -07:00
2022-09-20 10:05:50 -07:00
if db . head . MinOOOTime ( ) != int64 ( math . MaxInt64 ) {
// Some OOO data was replayed from the disk that needs compaction and cleanup.
db . oooWasEnabled . Store ( true )
}
2023-09-13 08:45:06 -07:00
go db . run ( ctx )
2017-01-06 03:37:28 -08:00
return db , nil
}
2020-08-10 22:56:08 -07:00
func removeBestEffortTmpDirs ( l log . Logger , dir string ) error {
2022-04-27 02:24:36 -07:00
files , err := os . ReadDir ( dir )
2022-03-24 03:44:14 -07:00
if os . IsNotExist ( err ) {
return nil
}
2020-08-10 22:56:08 -07:00
if err != nil {
return err
}
2022-04-27 02:24:36 -07:00
for _ , f := range files {
if isTmpDir ( f ) {
if err := os . RemoveAll ( filepath . Join ( dir , f . Name ( ) ) ) ; err != nil {
level . Error ( l ) . Log ( "msg" , "failed to delete tmp block dir" , "dir" , filepath . Join ( dir , f . Name ( ) ) , "err" , err )
2020-08-10 22:56:08 -07:00
continue
}
2022-04-27 02:24:36 -07:00
level . Info ( l ) . Log ( "msg" , "Found and deleted tmp block dir" , "dir" , filepath . Join ( dir , f . Name ( ) ) )
2020-08-10 22:56:08 -07:00
}
}
return nil
}
2020-02-06 07:58:38 -08:00
// StartTime implements the Storage interface.
func ( db * DB ) StartTime ( ) ( int64 , error ) {
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
if len ( db . blocks ) > 0 {
return db . blocks [ 0 ] . Meta ( ) . MinTime , nil
}
return db . head . MinTime ( ) , nil
}
2017-06-08 03:14:13 -07:00
// Dir returns the directory of the database.
func ( db * DB ) Dir ( ) string {
return db . dir
}
2023-09-13 08:45:06 -07:00
func ( db * DB ) run ( ctx context . Context ) {
2017-01-06 03:37:28 -08:00
defer close ( db . donec )
2017-08-28 15:39:17 -07:00
backoff := time . Duration ( 0 )
2017-02-28 06:08:52 -08:00
2017-01-19 22:58:19 -08:00
for {
select {
2017-08-28 15:39:17 -07:00
case <- db . stopc :
2017-08-30 09:34:54 -07:00
return
2017-08-28 15:39:17 -07:00
case <- time . After ( backoff ) :
}
select {
case <- time . After ( 1 * time . Minute ) :
2021-01-06 23:30:08 -08:00
db . cmtx . Lock ( )
if err := db . reloadBlocks ( ) ; err != nil {
level . Error ( db . logger ) . Log ( "msg" , "reloadBlocks" , "err" , err )
}
db . cmtx . Unlock ( )
2017-02-28 06:08:52 -08:00
select {
case db . compactc <- struct { } { } :
default :
}
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 02:10:24 -07:00
// We attempt mmapping of head chunks regularly.
db . head . mmapHeadChunks ( )
2017-01-06 03:37:28 -08:00
case <- db . compactc :
db . metrics . compactionsTriggered . Inc ( )
2018-11-20 02:34:26 -08:00
db . autoCompactMtx . Lock ( )
if db . autoCompact {
2023-09-13 08:45:06 -07:00
if err := db . Compact ( ctx ) ; err != nil {
2018-11-20 02:34:26 -08:00
level . Error ( db . logger ) . Log ( "msg" , "compaction failed" , "err" , err )
backoff = exponential ( backoff , 1 * time . Second , 1 * time . Minute )
} else {
backoff = 0
}
2017-08-30 09:34:54 -07:00
} else {
2018-11-20 02:34:26 -08:00
db . metrics . compactionsSkipped . Inc ( )
2017-01-06 03:37:28 -08:00
}
2018-11-20 02:34:26 -08:00
db . autoCompactMtx . Unlock ( )
2017-01-06 03:37:28 -08:00
case <- db . stopc :
return
}
}
}
2017-08-30 09:34:54 -07:00
// Appender opens a new appender against the database.
2020-07-24 07:10:51 -07:00
func ( db * DB ) Appender ( ctx context . Context ) storage . Appender {
2020-07-30 04:11:13 -07:00
return dbAppender { db : db , Appender : db . head . Appender ( ctx ) }
2017-08-30 09:34:54 -07:00
}
2022-09-20 10:05:50 -07:00
// ApplyConfig applies a new config to the DB.
// Behaviour of 'OutOfOrderTimeWindow' is as follows:
// OOO enabled = oooTimeWindow > 0. OOO disabled = oooTimeWindow is 0.
// 1) Before: OOO disabled, Now: OOO enabled =>
// - A new WBL is created for the head block.
// - OOO compaction is enabled.
// - Overlapping queries are enabled.
//
// 2) Before: OOO enabled, Now: OOO enabled =>
// - Only the time window is updated.
//
// 3) Before: OOO enabled, Now: OOO disabled =>
// - Time Window set to 0. So no new OOO samples will be allowed.
// - OOO WBL will stay and will be eventually cleaned up.
// - OOO Compaction and overlapping queries will remain enabled until a restart or until all OOO samples are compacted.
//
// 4) Before: OOO disabled, Now: OOO disabled => no-op.
2021-07-19 21:52:57 -07:00
func ( db * DB ) ApplyConfig ( conf * config . Config ) error {
2022-09-20 10:05:50 -07:00
oooTimeWindow := int64 ( 0 )
if conf . StorageConfig . TSDBConfig != nil {
oooTimeWindow = conf . StorageConfig . TSDBConfig . OutOfOrderTimeWindow
}
if oooTimeWindow < 0 {
oooTimeWindow = 0
}
// Create WBL if it was not present and if OOO is enabled with WAL enabled.
2022-10-10 08:08:46 -07:00
var wblog * wlog . WL
2022-09-20 10:05:50 -07:00
var err error
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch {
case db . head . wbl != nil :
2022-09-20 10:05:50 -07:00
// The existing WBL from the disk might have been replayed while OOO was disabled.
wblog = db . head . wbl
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case ! db . oooWasEnabled . Load ( ) && oooTimeWindow > 0 && db . opts . WALSegmentSize >= 0 :
2022-10-10 08:08:46 -07:00
segmentSize := wlog . DefaultSegmentSize
2022-09-20 10:05:50 -07:00
// Wal is set to a custom size.
if db . opts . WALSegmentSize > 0 {
segmentSize = db . opts . WALSegmentSize
}
2022-10-10 08:08:46 -07:00
oooWalDir := filepath . Join ( db . dir , wlog . WblDirName )
wblog , err = wlog . NewSize ( db . logger , db . registerer , oooWalDir , segmentSize , db . opts . WALCompression )
2022-09-20 10:05:50 -07:00
if err != nil {
return err
}
}
db . opts . OutOfOrderTimeWindow = oooTimeWindow
db . head . ApplyConfig ( conf , wblog )
if ! db . oooWasEnabled . Load ( ) {
db . oooWasEnabled . Store ( oooTimeWindow > 0 )
}
return nil
2021-07-19 21:52:57 -07:00
}
2022-09-14 05:08:34 -07:00
// EnableNativeHistograms enables the native histogram feature.
func ( db * DB ) EnableNativeHistograms ( ) {
db . head . EnableNativeHistograms ( )
}
// DisableNativeHistograms disables the native histogram feature.
func ( db * DB ) DisableNativeHistograms ( ) {
db . head . DisableNativeHistograms ( )
}
2017-08-30 09:34:54 -07:00
// dbAppender wraps the DB's head appender and triggers compactions on commit
// if necessary.
type dbAppender struct {
2020-02-06 07:58:38 -08:00
storage . Appender
2017-08-30 09:34:54 -07:00
db * DB
}
2021-03-19 12:28:55 -07:00
var _ storage . GetRef = dbAppender { }
2022-10-24 01:17:45 -07:00
func ( a dbAppender ) GetRef ( lset labels . Labels , hash uint64 ) ( storage . SeriesRef , labels . Labels ) {
2021-03-19 12:28:55 -07:00
if g , ok := a . Appender . ( storage . GetRef ) ; ok {
2022-10-24 01:17:45 -07:00
return g . GetRef ( lset , hash )
2021-03-19 12:28:55 -07:00
}
2022-03-09 14:17:40 -08:00
return 0 , labels . EmptyLabels ( )
2021-03-19 12:28:55 -07:00
}
2017-08-30 09:34:54 -07:00
func ( a dbAppender ) Commit ( ) error {
err := a . Appender . Commit ( )
2017-09-04 06:07:30 -07:00
// We could just run this check every few minutes practically. But for benchmarks
// and high frequency use cases this is the safer way.
2019-04-01 01:19:06 -07:00
if a . db . head . compactable ( ) {
2017-08-30 09:34:54 -07:00
select {
case a . db . compactc <- struct { } { } :
default :
}
}
return err
}
2018-06-27 09:05:21 -07:00
// Compact data if possible. After successful compaction blocks are reloaded
2020-10-19 08:27:08 -07:00
// which will also delete the blocks that fall out of the retention window.
// Old blocks are only deleted on reloadBlocks based on the new block's parent information.
// See DB.reloadBlocks documentation for further information.
2023-09-13 08:45:06 -07:00
func ( db * DB ) Compact ( ctx context . Context ) ( returnErr error ) {
2017-07-13 07:15:13 -07:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
2019-05-30 04:57:28 -07:00
defer func ( ) {
2022-06-16 22:51:43 -07:00
if returnErr != nil && ! errors . Is ( returnErr , context . Canceled ) {
// If we got an error because context was canceled then we're most likely
// shutting down TSDB and we don't need to report this on metrics
2019-05-30 04:57:28 -07:00
db . metrics . compactionsFailed . Inc ( )
}
} ( )
2020-10-19 08:27:08 -07:00
lastBlockMaxt := int64 ( math . MinInt64 )
defer func ( ) {
2023-11-16 10:54:41 -08:00
errs := tsdb_errors . NewMulti ( returnErr )
if err := db . head . truncateWAL ( lastBlockMaxt ) ; err != nil {
errs . Add ( fmt . Errorf ( "WAL truncation in Compact defer: %w" , err ) )
}
returnErr = errs . Err ( )
2020-10-19 08:27:08 -07:00
} ( )
2020-12-07 13:29:43 -08:00
start := time . Now ( )
2017-07-13 07:15:13 -07:00
// Check whether we have pending head blocks that are ready to be persisted.
// They have the highest priority.
2017-08-28 15:39:17 -07:00
for {
2017-03-04 07:50:48 -08:00
select {
case <- db . stopc :
2018-09-20 23:24:01 -07:00
return nil
2017-03-04 07:50:48 -08:00
default :
2017-02-02 00:32:06 -08:00
}
2019-04-01 01:19:06 -07:00
if ! db . head . compactable ( ) {
2017-08-28 15:39:17 -07:00
break
}
2018-12-04 02:30:49 -08:00
mint := db . head . MinTime ( )
2020-07-27 21:42:42 -07:00
maxt := rangeForTimestamp ( mint , db . head . chunkRange . Load ( ) )
2017-02-02 00:32:06 -08:00
2017-08-28 15:39:17 -07:00
// Wrap head into a range that bounds all reads to it.
2020-02-14 01:50:24 -08:00
// We remove 1 millisecond from maxt because block
// intervals are half-open: [b.MinTime, b.MaxTime). But
// chunk intervals are closed: [c.MinTime, c.MaxTime];
// so in order to make sure that overlaps are evaluated
// consistently, we explicitly remove the last value
// from the block interval here.
2022-09-27 07:01:23 -07:00
rh := NewRangeHeadWithIsolationDisabled ( db . head , mint , maxt - 1 )
// Compaction runs with isolation disabled, because head.compactable()
// ensures that maxt is more than chunkRange/2 back from now, and
// head.appendableMinValidTime() ensures that no new appends can start within the compaction range.
// We do need to wait for any overlapping appenders that started previously to finish.
db . head . WaitForAppendersOverlapping ( rh . MaxTime ( ) )
if err := db . compactHead ( rh ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "compact head: %w" , err )
2017-03-04 07:50:48 -08:00
}
2020-10-19 08:27:08 -07:00
// Consider only successful compactions for WAL truncation.
lastBlockMaxt = maxt
}
// Clear some disk space before compacting blocks, especially important
// when Head compaction happened over a long time range.
if err := db . head . truncateWAL ( lastBlockMaxt ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "WAL truncation in Compact: %w" , err )
2020-02-14 01:50:24 -08:00
}
2017-08-09 02:10:29 -07:00
2020-12-07 13:29:43 -08:00
compactionDuration := time . Since ( start )
2020-12-25 05:45:23 -08:00
if compactionDuration . Milliseconds ( ) > db . head . chunkRange . Load ( ) {
2020-12-07 13:29:43 -08:00
level . Warn ( db . logger ) . Log (
"msg" , "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up" ,
"duration" , compactionDuration . String ( ) ,
"block_range" , db . head . chunkRange . Load ( ) ,
)
}
2022-09-20 10:05:50 -07:00
if lastBlockMaxt != math . MinInt64 {
// The head was compacted, so we compact OOO head as well.
2023-09-13 08:45:06 -07:00
if err := db . compactOOOHead ( ctx ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "compact ooo head: %w" , err )
2022-09-20 10:05:50 -07:00
}
}
2020-02-14 01:50:24 -08:00
return db . compactBlocks ( )
}
2017-09-08 06:09:24 -07:00
2020-10-19 08:27:08 -07:00
// CompactHead compacts the given RangeHead.
func ( db * DB ) CompactHead ( head * RangeHead ) error {
2020-02-14 01:50:24 -08:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
2020-10-19 08:27:08 -07:00
if err := db . compactHead ( head ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "compact head: %w" , err )
2020-10-19 08:27:08 -07:00
}
if err := db . head . truncateWAL ( head . BlockMaxTime ( ) ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "WAL truncation: %w" , err )
2020-10-19 08:27:08 -07:00
}
return nil
2020-02-14 01:50:24 -08:00
}
2022-09-20 10:05:50 -07:00
// CompactOOOHead compacts the OOO Head.
2023-09-13 08:45:06 -07:00
func ( db * DB ) CompactOOOHead ( ctx context . Context ) error {
2022-09-20 10:05:50 -07:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
2023-09-13 08:45:06 -07:00
return db . compactOOOHead ( ctx )
2022-09-20 10:05:50 -07:00
}
2023-09-13 08:45:06 -07:00
func ( db * DB ) compactOOOHead ( ctx context . Context ) error {
2022-09-20 10:05:50 -07:00
if ! db . oooWasEnabled . Load ( ) {
return nil
}
2023-09-13 08:45:06 -07:00
oooHead , err := NewOOOCompactionHead ( ctx , db . head )
2022-09-20 10:05:50 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "get ooo compaction head: %w" , err )
2022-09-20 10:05:50 -07:00
}
ulids , err := db . compactOOO ( db . dir , oooHead )
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "compact ooo head: %w" , err )
2022-09-20 10:05:50 -07:00
}
if err := db . reloadBlocks ( ) ; err != nil {
errs := tsdb_errors . NewMulti ( err )
for _ , uid := range ulids {
if errRemoveAll := os . RemoveAll ( filepath . Join ( db . dir , uid . String ( ) ) ) ; errRemoveAll != nil {
errs . Add ( errRemoveAll )
}
}
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "reloadBlocks blocks after failed compact ooo head: %w" , errs . Err ( ) )
2022-09-20 10:05:50 -07:00
}
lastWBLFile , minOOOMmapRef := oooHead . LastWBLFile ( ) , oooHead . LastMmapRef ( )
if lastWBLFile != 0 || minOOOMmapRef != 0 {
2023-11-24 03:38:38 -08:00
if minOOOMmapRef != 0 {
// Ensure that no more queriers are created that will reference chunks we're about to garbage collect.
// truncateOOO waits for any existing queriers that reference chunks we're about to garbage collect to
// complete before running garbage collection, so we don't need to do that here.
//
// We take mtx to ensure that Querier() and ChunkQuerier() don't miss blocks: without this, they could
// capture the list of blocks before the call to reloadBlocks() above runs, but then capture
// lastGarbageCollectedMmapRef after we update it here, and therefore not query either the blocks we've just
// written or the head chunks those blocks were created from.
db . mtx . Lock ( )
db . lastGarbageCollectedMmapRef = minOOOMmapRef
db . mtx . Unlock ( )
}
2022-09-20 10:05:50 -07:00
if err := db . head . truncateOOO ( lastWBLFile , minOOOMmapRef ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "truncate ooo wbl: %w" , err )
2022-09-20 10:05:50 -07:00
}
}
return nil
}
// compactOOO creates a new block per possible block range in the compactor's directory from the OOO Head given.
// Each ULID in the result corresponds to a block in a unique time range.
func ( db * DB ) compactOOO ( dest string , oooHead * OOOCompactionHead ) ( _ [ ] ulid . ULID , err error ) {
start := time . Now ( )
blockSize := oooHead . ChunkRange ( )
oooHeadMint , oooHeadMaxt := oooHead . MinTime ( ) , oooHead . MaxTime ( )
ulids := make ( [ ] ulid . ULID , 0 )
defer func ( ) {
if err != nil {
// Best effort removal of created block on any error.
for _ , uid := range ulids {
_ = os . RemoveAll ( filepath . Join ( db . dir , uid . String ( ) ) )
}
}
} ( )
2023-04-09 00:08:40 -07:00
for t := blockSize * ( oooHeadMint / blockSize ) ; t <= oooHeadMaxt ; t += blockSize {
2022-09-20 10:05:50 -07:00
mint , maxt := t , t + blockSize
// Block intervals are half-open: [b.MinTime, b.MaxTime). Block intervals are always +1 than the total samples it includes.
uid , err := db . compactor . Write ( dest , oooHead . CloneForTimeRange ( mint , maxt - 1 ) , mint , maxt , nil )
if err != nil {
return nil , err
}
if uid . Compare ( ulid . ULID { } ) != 0 {
ulids = append ( ulids , uid )
blockDir := filepath . Join ( dest , uid . String ( ) )
meta , _ , err := readMetaFile ( blockDir )
if err != nil {
2023-11-16 10:54:41 -08:00
return ulids , fmt . Errorf ( "read meta: %w" , err )
2022-09-20 10:05:50 -07:00
}
meta . Compaction . SetOutOfOrder ( )
_ , err = writeMetaFile ( db . logger , blockDir , meta )
if err != nil {
2023-11-16 10:54:41 -08:00
return ulids , fmt . Errorf ( "write meta: %w" , err )
2022-09-20 10:05:50 -07:00
}
}
}
if len ( ulids ) == 0 {
level . Info ( db . logger ) . Log (
"msg" , "compact ooo head resulted in no blocks" ,
"duration" , time . Since ( start ) ,
)
return nil , nil
}
level . Info ( db . logger ) . Log (
"msg" , "out-of-order compaction completed" ,
"duration" , time . Since ( start ) ,
"ulids" , fmt . Sprintf ( "%v" , ulids ) ,
)
return ulids , nil
}
2020-10-19 08:27:08 -07:00
// compactHead compacts the given RangeHead.
2020-02-14 01:50:24 -08:00
// The compaction mutex should be held before calling this method.
2020-10-19 08:27:08 -07:00
func ( db * DB ) compactHead ( head * RangeHead ) error {
uid , err := db . compactor . Write ( db . dir , head , head . MinTime ( ) , head . BlockMaxTime ( ) , nil )
2020-02-14 01:50:24 -08:00
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "persist head block: %w" , err )
2020-02-14 01:50:24 -08:00
}
2020-10-19 08:27:08 -07:00
if err := db . reloadBlocks ( ) ; err != nil {
2020-10-14 02:35:24 -07:00
if errRemoveAll := os . RemoveAll ( filepath . Join ( db . dir , uid . String ( ) ) ) ; errRemoveAll != nil {
2020-10-28 08:24:58 -07:00
return tsdb_errors . NewMulti (
2023-11-16 10:54:41 -08:00
fmt . Errorf ( "reloadBlocks blocks: %w" , err ) ,
fmt . Errorf ( "delete persisted head block after failed db reloadBlocks:%s: %w" , uid , errRemoveAll ) ,
2020-10-28 08:24:58 -07:00
) . Err ( )
2017-08-09 02:10:29 -07:00
}
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "reloadBlocks blocks: %w" , err )
2020-02-14 01:50:24 -08:00
}
2020-10-19 08:27:08 -07:00
if err = db . head . truncateMemory ( head . BlockMaxTime ( ) ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "head memory truncate: %w" , err )
2017-03-04 07:50:48 -08:00
}
2020-02-14 01:50:24 -08:00
return nil
}
2017-01-06 03:37:28 -08:00
2020-02-14 01:50:24 -08:00
// compactBlocks compacts all the eligible on-disk blocks.
// The compaction mutex should be held before calling this method.
func ( db * DB ) compactBlocks ( ) ( err error ) {
2017-03-02 00:13:29 -08:00
// Check for compactions of multiple blocks.
for {
2017-08-09 02:10:29 -07:00
plan , err := db . compactor . Plan ( db . dir )
2017-03-02 00:13:29 -08:00
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "plan compaction: %w" , err )
2017-03-02 00:13:29 -08:00
}
2017-08-09 02:10:29 -07:00
if len ( plan ) == 0 {
2017-03-21 04:21:02 -07:00
break
}
2017-01-06 03:37:28 -08:00
2017-03-02 00:13:29 -08:00
select {
case <- db . stopc :
2018-09-20 23:24:01 -07:00
return nil
2017-03-02 00:13:29 -08:00
default :
}
2017-03-20 02:41:43 -07:00
2019-01-29 00:26:01 -08:00
uid , err := db . compactor . Compact ( db . dir , plan , db . blocks )
if err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "compact %s: %w" , plan , err )
2017-08-09 02:10:29 -07:00
}
2017-08-28 15:39:17 -07:00
2020-10-19 08:27:08 -07:00
if err := db . reloadBlocks ( ) ; err != nil {
2019-01-29 00:26:01 -08:00
if err := os . RemoveAll ( filepath . Join ( db . dir , uid . String ( ) ) ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "delete compacted block after failed db reloadBlocks:%s: %w" , uid , err )
2019-01-29 00:26:01 -08:00
}
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "reloadBlocks blocks: %w" , err )
2017-08-28 15:39:17 -07:00
}
2017-02-23 01:50:22 -08:00
}
2018-09-20 23:24:01 -07:00
return nil
2017-02-09 17:54:26 -08:00
}
2019-07-23 01:04:48 -07:00
// getBlock iterates a given block range to find a block by a given id.
// If found it returns the block itself and a boolean to indicate that it was found.
func getBlock ( allBlocks [ ] * Block , id ulid . ULID ) ( * Block , bool ) {
for _ , b := range allBlocks {
2017-05-18 07:09:30 -07:00
if b . Meta ( ) . ULID == id {
2017-03-20 00:41:56 -07:00
return b , true
}
}
return nil , false
}
2020-10-19 08:27:08 -07:00
// reload reloads blocks and truncates the head and its WAL.
func ( db * DB ) reload ( ) error {
if err := db . reloadBlocks ( ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "reloadBlocks: %w" , err )
2020-10-19 08:27:08 -07:00
}
2022-09-20 10:05:50 -07:00
maxt , ok := db . inOrderBlocksMaxTime ( )
if ! ok {
2020-10-19 08:27:08 -07:00
return nil
}
2022-09-20 10:05:50 -07:00
if err := db . head . Truncate ( maxt ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "head truncate: %w" , err )
2020-10-19 08:27:08 -07:00
}
return nil
}
// reloadBlocks reloads blocks without touching head.
2018-06-27 06:47:11 -07:00
// Blocks that are obsolete due to replacement or retention will be deleted.
2020-10-19 08:27:08 -07:00
func ( db * DB ) reloadBlocks ( ) ( err error ) {
2017-08-30 08:38:25 -07:00
defer func ( ) {
2017-05-26 06:13:03 -07:00
if err != nil {
db . metrics . reloadsFailed . Inc ( )
}
db . metrics . reloads . Inc ( )
2017-08-30 08:38:25 -07:00
} ( )
2017-05-26 06:13:03 -07:00
2021-02-16 21:32:43 -08:00
// Now that we reload TSDB every minute, there is high chance for race condition with a reload
// triggered by CleanTombstones(). We need to lock the reload to avoid the situation where
// a normal reload and CleanTombstones try to delete the same block.
db . mtx . Lock ( )
defer db . mtx . Unlock ( )
2019-07-23 01:04:48 -07:00
loadable , corrupted , err := openBlocks ( db . logger , db . dir , db . blocks , db . chunkPool )
2017-03-02 00:13:29 -08:00
if err != nil {
2019-01-16 02:03:52 -08:00
return err
}
2018-06-27 09:05:21 -07:00
2020-07-22 08:19:33 -07:00
deletableULIDs := db . blocksToDelete ( loadable )
deletable := make ( map [ ulid . ULID ] * Block , len ( deletableULIDs ) )
2019-01-16 02:03:52 -08:00
2020-08-11 07:53:23 -07:00
// Mark all parents of loaded blocks as deletable (no matter if they exists). This makes it resilient against the process
// crashing towards the end of a compaction but before deletions. By doing that, we can pick up the deletion where it left off during a crash.
2019-01-16 02:03:52 -08:00
for _ , block := range loadable {
2020-07-22 08:19:33 -07:00
if _ , ok := deletableULIDs [ block . meta . ULID ] ; ok {
deletable [ block . meta . ULID ] = block
}
2019-01-16 02:03:52 -08:00
for _ , b := range block . Meta ( ) . Compaction . Parents {
2020-08-11 07:53:23 -07:00
if _ , ok := corrupted [ b . ULID ] ; ok {
delete ( corrupted , b . ULID )
level . Warn ( db . logger ) . Log ( "msg" , "Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes." , "block" , b . ULID )
}
2019-01-16 02:03:52 -08:00
deletable [ b . ULID ] = nil
2017-02-09 17:54:26 -08:00
}
2019-01-16 02:03:52 -08:00
}
2020-08-11 07:53:23 -07:00
2019-01-16 02:03:52 -08:00
if len ( corrupted ) > 0 {
2020-08-11 07:53:23 -07:00
// Corrupted but no child loaded for it.
2019-01-30 01:40:40 -08:00
// Close all new blocks to release the lock for windows.
for _ , block := range loadable {
2019-07-23 01:04:48 -07:00
if _ , open := getBlock ( db . blocks , block . Meta ( ) . ULID ) ; ! open {
2019-01-30 01:40:40 -08:00
block . Close ( )
}
}
2020-10-28 08:24:58 -07:00
errs := tsdb_errors . NewMulti ( )
2020-06-17 07:40:00 -07:00
for ulid , err := range corrupted {
2023-11-16 10:54:41 -08:00
if err != nil {
errs . Add ( fmt . Errorf ( "corrupted block %s: %w" , ulid . String ( ) , err ) )
}
2020-06-17 07:40:00 -07:00
}
2020-10-28 08:24:58 -07:00
return errs . Err ( )
2019-01-16 02:03:52 -08:00
}
var (
2020-08-11 07:53:23 -07:00
toLoad [ ] * Block
2019-01-16 02:03:52 -08:00
blocksSize int64
)
2020-08-11 07:53:23 -07:00
// All deletable blocks should be unloaded.
// NOTE: We need to loop through loadable one more time as there might be loadable ready to be removed (replaced by compacted block).
2019-01-16 02:03:52 -08:00
for _ , block := range loadable {
if _ , ok := deletable [ block . Meta ( ) . ULID ] ; ok {
deletable [ block . Meta ( ) . ULID ] = block
2017-11-03 12:34:21 -07:00
continue
}
2019-01-16 02:03:52 -08:00
2020-08-11 07:53:23 -07:00
toLoad = append ( toLoad , block )
blocksSize += block . Size ( )
2019-01-16 02:03:52 -08:00
}
db . metrics . blocksBytes . Set ( float64 ( blocksSize ) )
2023-09-21 13:53:51 -07:00
slices . SortFunc ( toLoad , func ( a , b * Block ) int {
2023-10-16 07:23:26 -07:00
switch {
case a . Meta ( ) . MinTime < b . Meta ( ) . MinTime :
return - 1
case a . Meta ( ) . MinTime > b . Meta ( ) . MinTime :
return 1
default :
return 0
}
2019-01-16 02:03:52 -08:00
} )
// Swap new blocks first for subsequently created readers to be seen.
oldBlocks := db . blocks
2020-08-11 07:53:23 -07:00
db . blocks = toLoad
2019-01-16 02:03:52 -08:00
2020-08-11 07:53:23 -07:00
blockMetas := make ( [ ] BlockMeta , 0 , len ( toLoad ) )
for _ , b := range toLoad {
2019-02-14 05:29:41 -08:00
blockMetas = append ( blockMetas , b . Meta ( ) )
}
if overlaps := OverlappingBlocks ( blockMetas ) ; len ( overlaps ) > 0 {
2020-10-19 08:27:08 -07:00
level . Warn ( db . logger ) . Log ( "msg" , "Overlapping blocks found during reloadBlocks" , "detail" , overlaps . String ( ) )
2019-02-14 05:29:41 -08:00
}
2020-08-11 07:53:23 -07:00
// Append blocks to old, deletable blocks, so we can close them.
2019-01-16 02:03:52 -08:00
for _ , b := range oldBlocks {
if _ , ok := deletable [ b . Meta ( ) . ULID ] ; ok {
deletable [ b . Meta ( ) . ULID ] = b
2018-06-27 09:05:21 -07:00
}
}
2019-01-16 02:03:52 -08:00
if err := db . deleteBlocks ( deletable ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "delete %v blocks: %w" , len ( deletable ) , err )
2019-01-16 02:03:52 -08:00
}
2020-10-19 08:27:08 -07:00
return nil
2019-01-16 02:03:52 -08:00
}
2019-07-23 01:04:48 -07:00
func openBlocks ( l log . Logger , dir string , loaded [ ] * Block , chunkPool chunkenc . Pool ) ( blocks [ ] * Block , corrupted map [ ulid . ULID ] error , err error ) {
bDirs , err := blockDirs ( dir )
2019-01-16 02:03:52 -08:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , nil , fmt . Errorf ( "find blocks: %w" , err )
2019-01-16 02:03:52 -08:00
}
corrupted = make ( map [ ulid . ULID ] error )
2019-07-23 01:04:48 -07:00
for _ , bDir := range bDirs {
meta , _ , err := readMetaFile ( bDir )
2018-06-27 06:47:11 -07:00
if err != nil {
2020-10-19 08:27:08 -07:00
level . Error ( l ) . Log ( "msg" , "Failed to read meta.json for a block during reloadBlocks. Skipping" , "dir" , bDir , "err" , err )
2018-06-27 06:47:11 -07:00
continue
}
2019-01-16 02:03:52 -08:00
2018-06-27 06:47:11 -07:00
// See if we already have the block in memory or open it otherwise.
2019-07-23 01:04:48 -07:00
block , open := getBlock ( loaded , meta . ULID )
if ! open {
block , err = OpenBlock ( l , bDir , chunkPool )
2017-05-18 07:09:30 -07:00
if err != nil {
2019-01-16 02:03:52 -08:00
corrupted [ meta . ULID ] = err
continue
2017-03-02 00:13:29 -08:00
}
}
2019-01-16 02:03:52 -08:00
blocks = append ( blocks , block )
2016-12-09 01:00:14 -08:00
}
2019-01-16 02:03:52 -08:00
return blocks , corrupted , nil
}
2020-07-22 08:19:33 -07:00
// DefaultBlocksToDelete returns a filter which decides time based and size based
// retention from the options of the db.
func DefaultBlocksToDelete ( db * DB ) BlocksToDeleteFunc {
return func ( blocks [ ] * Block ) map [ ulid . ULID ] struct { } {
return deletableBlocks ( db , blocks )
}
}
2020-08-10 22:56:08 -07:00
// deletableBlocks returns all currently loaded blocks past retention policy or already compacted into a new block.
2020-07-22 08:19:33 -07:00
func deletableBlocks ( db * DB , blocks [ ] * Block ) map [ ulid . ULID ] struct { } {
deletable := make ( map [ ulid . ULID ] struct { } )
2019-01-16 02:03:52 -08:00
// Sort the blocks by time - newest to oldest (largest to smallest timestamp).
// This ensures that the retentions will remove the oldest blocks.
2023-09-21 13:53:51 -07:00
slices . SortFunc ( blocks , func ( a , b * Block ) int {
2023-10-16 07:23:26 -07:00
switch {
case b . Meta ( ) . MaxTime < a . Meta ( ) . MaxTime :
return - 1
case b . Meta ( ) . MaxTime > a . Meta ( ) . MaxTime :
return 1
default :
return 0
}
2018-05-28 13:00:36 -07:00
} )
2019-01-16 02:03:52 -08:00
2019-01-18 00:35:16 -08:00
for _ , block := range blocks {
if block . Meta ( ) . Compaction . Deletable {
2020-07-22 08:19:33 -07:00
deletable [ block . Meta ( ) . ULID ] = struct { } { }
2019-01-18 00:35:16 -08:00
}
2017-05-18 07:09:30 -07:00
}
2017-05-26 04:01:45 -07:00
2020-07-22 08:19:33 -07:00
for ulid := range BeyondTimeRetention ( db , blocks ) {
deletable [ ulid ] = struct { } { }
2017-05-18 07:09:30 -07:00
}
2020-07-22 08:19:33 -07:00
for ulid := range BeyondSizeRetention ( db , blocks ) {
deletable [ ulid ] = struct { } { }
2019-01-16 02:03:52 -08:00
}
2017-05-18 07:09:30 -07:00
2019-01-16 02:03:52 -08:00
return deletable
}
2020-07-22 08:19:33 -07:00
// BeyondTimeRetention returns those blocks which are beyond the time retention
// set in the db options.
func BeyondTimeRetention ( db * DB , blocks [ ] * Block ) ( deletable map [ ulid . ULID ] struct { } ) {
2019-01-16 02:03:52 -08:00
// Time retention is disabled or no blocks to work with.
2020-10-19 04:21:54 -07:00
if len ( blocks ) == 0 || db . opts . RetentionDuration == 0 {
2019-01-16 02:03:52 -08:00
return
}
2020-07-22 08:19:33 -07:00
deletable = make ( map [ ulid . ULID ] struct { } )
2019-01-16 02:03:52 -08:00
for i , block := range blocks {
// The difference between the first block and this block is larger than
2020-01-02 06:54:09 -08:00
// the retention period so any blocks after that are added as deletable.
2020-02-11 08:34:09 -08:00
if i > 0 && blocks [ 0 ] . Meta ( ) . MaxTime - block . Meta ( ) . MaxTime > db . opts . RetentionDuration {
2019-01-16 02:03:52 -08:00
for _ , b := range blocks [ i : ] {
2020-07-22 08:19:33 -07:00
deletable [ b . meta . ULID ] = struct { } { }
2019-01-16 02:03:52 -08:00
}
db . metrics . timeRetentionCount . Inc ( )
break
2017-11-03 12:34:21 -07:00
}
2019-01-16 02:03:52 -08:00
}
2020-01-02 06:54:09 -08:00
return deletable
2019-01-16 02:03:52 -08:00
}
2020-07-22 08:19:33 -07:00
// BeyondSizeRetention returns those blocks which are beyond the size retention
// set in the db options.
func BeyondSizeRetention ( db * DB , blocks [ ] * Block ) ( deletable map [ ulid . ULID ] struct { } ) {
2019-01-16 02:03:52 -08:00
// Size retention is disabled or no blocks to work with.
2020-10-19 04:21:54 -07:00
if len ( blocks ) == 0 || db . opts . MaxBytes <= 0 {
2019-01-16 02:03:52 -08:00
return
}
2020-07-22 08:19:33 -07:00
deletable = make ( map [ ulid . ULID ] struct { } )
2019-11-11 18:40:16 -08:00
2020-05-06 08:30:00 -07:00
// Initializing size counter with WAL size and Head chunks
// written to disk, as that is part of the retention strategy.
2020-10-12 14:15:40 -07:00
blocksSize := db . Head ( ) . Size ( )
2019-01-16 02:03:52 -08:00
for i , block := range blocks {
blocksSize += block . Size ( )
2023-04-09 00:08:40 -07:00
if blocksSize > db . opts . MaxBytes {
2019-01-16 02:03:52 -08:00
// Add this and all following blocks for deletion.
for _ , b := range blocks [ i : ] {
2020-07-22 08:19:33 -07:00
deletable [ b . meta . ULID ] = struct { } { }
2019-01-16 02:03:52 -08:00
}
db . metrics . sizeRetentionCount . Inc ( )
break
2017-11-03 12:34:21 -07:00
}
2018-06-27 06:47:11 -07:00
}
2020-01-02 06:54:09 -08:00
return deletable
2019-01-16 02:03:52 -08:00
}
2020-08-11 07:53:23 -07:00
// deleteBlocks closes the block if loaded and deletes blocks from the disk if exists.
2019-01-16 02:03:52 -08:00
// When the map contains a non nil block object it means it is loaded in memory
// so needs to be closed first as it might need to wait for pending readers to complete.
func ( db * DB ) deleteBlocks ( blocks map [ ulid . ULID ] * Block ) error {
for ulid , block := range blocks {
if block != nil {
if err := block . Close ( ) ; err != nil {
2020-06-17 07:40:00 -07:00
level . Warn ( db . logger ) . Log ( "msg" , "Closing block failed" , "err" , err , "block" , ulid )
2019-01-16 02:03:52 -08:00
}
}
2020-08-10 22:56:08 -07:00
2020-08-11 07:53:23 -07:00
toDelete := filepath . Join ( db . dir , ulid . String ( ) )
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch _ , err := os . Stat ( toDelete ) ; {
case os . IsNotExist ( err ) :
2020-08-11 07:53:23 -07:00
// Noop.
continue
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case err != nil :
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "stat dir %v: %w" , toDelete , err )
2020-08-11 07:53:23 -07:00
}
// Replace atomically to avoid partial block when process would crash during deletion.
2020-08-10 22:56:08 -07:00
tmpToDelete := filepath . Join ( db . dir , fmt . Sprintf ( "%s%s" , ulid , tmpForDeletionBlockDirSuffix ) )
2020-08-11 07:53:23 -07:00
if err := fileutil . Replace ( toDelete , tmpToDelete ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "replace of obsolete block for deletion %s: %w" , ulid , err )
2020-08-10 22:56:08 -07:00
}
if err := os . RemoveAll ( tmpToDelete ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "delete obsolete block %s: %w" , ulid , err )
2017-10-23 11:30:03 -07:00
}
2020-08-11 07:53:23 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Deleting obsolete block" , "block" , ulid )
2017-10-23 11:30:03 -07:00
}
2020-08-10 22:56:08 -07:00
2019-01-16 02:03:52 -08:00
return nil
2017-05-18 07:09:30 -07:00
}
2019-02-26 11:50:37 -08:00
2018-04-05 06:15:24 -07:00
// TimeRange specifies minTime and maxTime range.
2018-03-29 04:50:46 -07:00
type TimeRange struct {
2018-04-05 05:51:33 -07:00
Min , Max int64
2018-03-29 04:50:46 -07:00
}
2018-04-05 05:51:33 -07:00
2018-04-05 06:15:24 -07:00
// Overlaps contains overlapping blocks aggregated by overlapping range.
type Overlaps map [ TimeRange ] [ ] BlockMeta
// String returns human readable string form of overlapped blocks.
func ( o Overlaps ) String ( ) string {
var res [ ] string
for r , overlaps := range o {
var groups [ ] string
for _ , m := range overlaps {
groups = append ( groups , fmt . Sprintf (
2018-04-05 08:53:24 -07:00
"<ulid: %s, mint: %d, maxt: %d, range: %s>" ,
2018-04-05 06:15:24 -07:00
m . ULID . String ( ) ,
m . MinTime ,
m . MaxTime ,
( time . Duration ( ( m . MaxTime - m . MinTime ) / 1000 ) * time . Second ) . String ( ) ,
) )
}
2018-04-05 08:01:16 -07:00
res = append ( res , fmt . Sprintf (
2018-04-05 08:53:24 -07:00
"[mint: %d, maxt: %d, range: %s, blocks: %d]: %s" ,
2018-04-05 08:01:16 -07:00
r . Min , r . Max ,
( time . Duration ( ( r . Max - r . Min ) / 1000 ) * time . Second ) . String ( ) ,
len ( overlaps ) ,
2018-04-05 08:53:24 -07:00
strings . Join ( groups , ", " ) ) ,
2018-04-05 08:01:16 -07:00
)
2018-04-05 06:15:24 -07:00
}
2018-04-05 08:01:16 -07:00
return strings . Join ( res , "\n" )
2018-04-05 06:15:24 -07:00
}
// OverlappingBlocks returns all overlapping blocks from given meta files.
func OverlappingBlocks ( bm [ ] BlockMeta ) Overlaps {
2018-03-28 10:33:41 -07:00
if len ( bm ) <= 1 {
2018-03-28 07:50:52 -07:00
return nil
}
2018-03-28 15:50:42 -07:00
var (
2018-04-05 05:51:33 -07:00
overlaps [ ] [ ] BlockMeta
2018-03-28 15:50:42 -07:00
// pending contains not ended blocks in regards to "current" timestamp.
pending = [ ] BlockMeta { bm [ 0 ] }
2018-03-29 04:50:46 -07:00
// continuousPending helps to aggregate same overlaps to single group.
continuousPending = true
2018-03-28 15:50:42 -07:00
)
2018-04-05 06:15:24 -07:00
// We have here blocks sorted by minTime. We iterate over each block and treat its minTime as our "current" timestamp.
// We check if any of the pending block finished (blocks that we have seen before, but their maxTime was still ahead current
// timestamp). If not, it means they overlap with our current block. In the same time current block is assumed pending.
2018-03-28 15:18:24 -07:00
for _ , b := range bm [ 1 : ] {
2018-03-28 15:50:42 -07:00
var newPending [ ] BlockMeta
2018-03-28 10:33:41 -07:00
2018-03-28 15:18:24 -07:00
for _ , p := range pending {
2018-03-28 15:50:42 -07:00
// "b.MinTime" is our current time.
2018-03-28 15:18:24 -07:00
if b . MinTime >= p . MaxTime {
2018-03-29 04:50:46 -07:00
continuousPending = false
2018-03-28 15:18:24 -07:00
continue
2018-03-28 10:33:41 -07:00
}
2018-03-28 15:18:24 -07:00
// "p" overlaps with "b" and "p" is still pending.
newPending = append ( newPending , p )
2017-05-18 07:09:30 -07:00
}
2018-03-28 15:50:42 -07:00
2018-03-28 15:18:24 -07:00
// Our block "b" is now pending.
pending = append ( newPending , b )
if len ( newPending ) == 0 {
2018-03-28 15:50:42 -07:00
// No overlaps.
2018-03-28 15:18:24 -07:00
continue
2018-03-28 10:33:41 -07:00
}
2018-03-29 04:50:46 -07:00
if continuousPending && len ( overlaps ) > 0 {
2018-03-28 15:18:24 -07:00
overlaps [ len ( overlaps ) - 1 ] = append ( overlaps [ len ( overlaps ) - 1 ] , b )
2018-03-28 10:33:41 -07:00
continue
2017-05-18 07:09:30 -07:00
}
2018-03-28 15:18:24 -07:00
overlaps = append ( overlaps , append ( newPending , b ) )
2018-03-29 04:50:46 -07:00
// Start new pendings.
continuousPending = true
2017-05-18 07:09:30 -07:00
}
2018-04-05 05:51:33 -07:00
// Fetch the critical overlapped time range foreach overlap groups.
2018-04-05 06:15:24 -07:00
overlapGroups := Overlaps { }
2018-04-05 05:51:33 -07:00
for _ , overlap := range overlaps {
minRange := TimeRange { Min : 0 , Max : math . MaxInt64 }
for _ , b := range overlap {
if minRange . Max > b . MaxTime {
minRange . Max = b . MaxTime
}
if minRange . Min < b . MinTime {
minRange . Min = b . MinTime
}
}
overlapGroups [ minRange ] = overlap
}
return overlapGroups
2017-01-02 13:24:35 -08:00
}
2017-10-09 06:21:46 -07:00
func ( db * DB ) String ( ) string {
return "HEAD"
}
// Blocks returns the databases persisted blocks.
func ( db * DB ) Blocks ( ) [ ] * Block {
2017-08-29 06:39:27 -07:00
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
return db . blocks
}
2022-09-20 10:05:50 -07:00
// inOrderBlocksMaxTime returns the max time among the blocks that were not totally created
// out of out-of-order data. If the returned boolean is true, it means there is at least
// one such block.
func ( db * DB ) inOrderBlocksMaxTime ( ) ( maxt int64 , ok bool ) {
maxt , ok = int64 ( math . MinInt64 ) , false
// If blocks are overlapping, last block might not have the max time. So check all blocks.
for _ , b := range db . Blocks ( ) {
if ! b . meta . Compaction . FromOutOfOrder ( ) && b . meta . MaxTime > maxt {
ok = true
maxt = b . meta . MaxTime
}
}
return maxt , ok
}
2017-10-09 06:21:46 -07:00
// Head returns the databases's head.
2017-09-25 07:45:24 -07:00
func ( db * DB ) Head ( ) * Head {
return db . head
}
2017-01-05 23:08:02 -08:00
// Close the partition.
2017-01-06 02:40:09 -08:00
func ( db * DB ) Close ( ) error {
2017-01-06 03:37:28 -08:00
close ( db . stopc )
2020-10-28 03:09:03 -07:00
if db . compactCancel != nil {
db . compactCancel ( )
}
2017-01-06 03:37:28 -08:00
<- db . donec
2017-01-06 02:40:09 -08:00
db . mtx . Lock ( )
2017-03-17 04:12:50 -07:00
defer db . mtx . Unlock ( )
2017-03-04 07:50:48 -08:00
2017-03-06 03:13:15 -08:00
var g errgroup . Group
2017-01-02 01:34:55 -08:00
2017-03-20 00:41:56 -07:00
// blocks also contains all head blocks.
for _ , pb := range db . blocks {
2017-03-06 03:13:15 -08:00
g . Go ( pb . Close )
2016-12-14 23:31:26 -08:00
}
2021-11-11 08:45:25 -08:00
errs := tsdb_errors . NewMulti ( g . Wait ( ) , db . locker . Release ( ) )
2020-10-21 08:08:28 -07:00
if db . head != nil {
2020-10-28 08:24:58 -07:00
errs . Add ( db . head . Close ( ) )
2020-10-21 08:08:28 -07:00
}
2020-10-28 08:24:58 -07:00
return errs . Err ( )
2016-12-09 01:00:14 -08:00
}
2018-11-20 02:34:26 -08:00
// DisableCompactions disables auto compactions.
2017-06-06 11:15:23 -07:00
func ( db * DB ) DisableCompactions ( ) {
2018-11-20 02:34:26 -08:00
db . autoCompactMtx . Lock ( )
defer db . autoCompactMtx . Unlock ( )
2017-07-14 01:06:07 -07:00
2018-11-20 02:34:26 -08:00
db . autoCompact = false
2020-04-11 01:22:18 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Compactions disabled" )
2017-06-06 07:53:20 -07:00
}
2018-11-20 02:34:26 -08:00
// EnableCompactions enables auto compactions.
2017-06-06 11:15:23 -07:00
func ( db * DB ) EnableCompactions ( ) {
2018-11-20 02:34:26 -08:00
db . autoCompactMtx . Lock ( )
defer db . autoCompactMtx . Unlock ( )
2017-07-14 01:06:07 -07:00
2018-11-20 02:34:26 -08:00
db . autoCompact = true
2020-04-11 01:22:18 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Compactions enabled" )
2017-06-05 01:18:31 -07:00
}
2023-08-26 02:40:59 -07:00
// ForceHeadMMap is intended for use only in tests and benchmarks.
func ( db * DB ) ForceHeadMMap ( ) {
db . head . mmapHeadChunks ( )
}
2018-02-28 03:04:55 -08:00
// Snapshot writes the current data to the directory. If withHead is set to true it
// will create a new block containing all data that's currently in the memory buffer/WAL.
func ( db * DB ) Snapshot ( dir string , withHead bool ) error {
2017-08-30 09:34:54 -07:00
if dir == db . dir {
2023-11-14 05:04:31 -08:00
return fmt . Errorf ( "cannot snapshot into base directory" )
2017-08-30 09:34:54 -07:00
}
2019-03-18 07:14:10 -07:00
if _ , err := ulid . ParseStrict ( dir ) ; err == nil {
2023-11-14 05:04:31 -08:00
return fmt . Errorf ( "dir must not be a valid ULID" )
2017-08-30 09:34:54 -07:00
}
2017-06-05 01:18:31 -07:00
2017-08-30 09:34:54 -07:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
2017-10-23 11:30:03 -07:00
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
for _ , b := range db . blocks {
2020-04-11 01:22:18 -07:00
level . Info ( db . logger ) . Log ( "msg" , "Snapshotting block" , "block" , b )
2017-08-30 09:34:54 -07:00
if err := b . Snapshot ( dir ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "error snapshotting block: %s: %w" , b . Dir ( ) , err )
2017-08-30 09:34:54 -07:00
}
}
2018-02-28 03:04:55 -08:00
if ! withHead {
return nil
}
2019-07-03 03:47:31 -07:00
mint := db . head . MinTime ( )
maxt := db . head . MaxTime ( )
2020-08-13 02:55:35 -07:00
head := NewRangeHead ( db . head , mint , maxt )
2019-07-03 03:47:31 -07:00
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
if _ , err := db . compactor . Write ( dir , head , mint , maxt + 1 , nil ) ; err != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "snapshot head block: %w" , err )
2019-07-03 03:47:31 -07:00
}
return nil
2017-08-28 15:39:17 -07:00
}
2017-07-14 00:00:22 -07:00
2017-08-28 15:39:17 -07:00
// Querier returns a new querier over the data partition for the given time range.
2023-11-24 03:38:38 -08:00
func ( db * DB ) Querier ( mint , maxt int64 ) ( _ storage . Querier , err error ) {
2017-10-09 06:21:46 -07:00
var blocks [ ] BlockReader
2017-08-28 15:39:17 -07:00
2017-10-23 11:30:03 -07:00
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
for _ , b := range db . blocks {
2018-07-02 01:23:36 -07:00
if b . OverlapsClosedInterval ( mint , maxt ) {
2017-10-09 06:21:46 -07:00
blocks = append ( blocks , b )
}
}
2023-11-24 03:38:38 -08:00
blockQueriers := make ( [ ] storage . Querier , 0 , len ( blocks ) + 2 ) // +2 to allow for possible in-order and OOO head queriers
defer func ( ) {
if err != nil {
// If we fail, all previously opened queriers must be closed.
for _ , q := range blockQueriers {
// TODO(bwplotka): Handle error.
_ = q . Close ( )
}
}
} ( )
2017-10-09 06:21:46 -07:00
if maxt >= db . head . MinTime ( ) {
2021-07-20 01:47:20 -07:00
rh := NewRangeHead ( db . head , mint , maxt )
var err error
2023-11-24 03:38:38 -08:00
inOrderHeadQuerier , err := NewBlockQuerier ( rh , mint , maxt )
2021-07-20 01:47:20 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open block querier for head %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
// Getting the querier above registers itself in the queue that the truncation waits on.
// So if the querier is currently not colliding with any truncation, we can continue to use it and still
// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
shouldClose , getNew , newMint := db . head . IsQuerierCollidingWithTruncation ( mint , maxt )
if shouldClose {
2022-09-20 10:05:50 -07:00
if err := inOrderHeadQuerier . Close ( ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "closing head block querier %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
2022-09-20 10:05:50 -07:00
inOrderHeadQuerier = nil
2021-07-20 01:47:20 -07:00
}
if getNew {
rh := NewRangeHead ( db . head , newMint , maxt )
2022-09-20 10:05:50 -07:00
inOrderHeadQuerier , err = NewBlockQuerier ( rh , newMint , maxt )
2021-07-20 01:47:20 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open block querier for head while getting new querier %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
}
2023-11-24 03:38:38 -08:00
if inOrderHeadQuerier != nil {
blockQueriers = append ( blockQueriers , inOrderHeadQuerier )
}
2017-10-09 06:21:46 -07:00
}
2017-08-28 15:39:17 -07:00
2022-09-20 10:05:50 -07:00
if overlapsClosedInterval ( mint , maxt , db . head . MinOOOTime ( ) , db . head . MaxOOOTime ( ) ) {
2023-11-24 03:38:38 -08:00
rh := NewOOORangeHead ( db . head , mint , maxt , db . lastGarbageCollectedMmapRef )
2022-09-20 10:05:50 -07:00
var err error
2023-11-24 03:38:38 -08:00
outOfOrderHeadQuerier , err := NewBlockQuerier ( rh , mint , maxt )
2022-09-20 10:05:50 -07:00
if err != nil {
2023-11-24 03:38:38 -08:00
// If NewBlockQuerier() failed, make sure to clean up the pending read created by NewOOORangeHead.
rh . isoState . Close ( )
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open block querier for ooo head %s: %w" , rh , err )
2022-09-20 10:05:50 -07:00
}
2023-11-24 03:38:38 -08:00
blockQueriers = append ( blockQueriers , outOfOrderHeadQuerier )
2022-09-20 10:05:50 -07:00
}
2017-06-06 05:45:54 -07:00
for _ , b := range blocks {
2017-10-09 06:21:46 -07:00
q , err := NewBlockQuerier ( b , mint , maxt )
2023-11-24 03:38:38 -08:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open querier for block %s: %w" , b , err )
2017-10-09 06:21:46 -07:00
}
2023-11-24 03:38:38 -08:00
blockQueriers = append ( blockQueriers , q )
2021-07-20 01:47:20 -07:00
}
2023-11-24 03:38:38 -08:00
2020-07-31 08:03:02 -07:00
return storage . NewMergeQuerier ( blockQueriers , nil , storage . ChainedSeriesMerge ) , nil
}
2023-04-24 23:19:16 -07:00
// blockChunkQuerierForRange returns individual block chunk queriers from the persistent blocks, in-order head block, and the
2022-09-20 10:05:50 -07:00
// out-of-order head block, overlapping with the given time range.
2023-11-24 03:38:38 -08:00
func ( db * DB ) blockChunkQuerierForRange ( mint , maxt int64 ) ( _ [ ] storage . ChunkQuerier , err error ) {
2020-07-31 08:03:02 -07:00
var blocks [ ] BlockReader
2019-02-14 05:29:41 -08:00
2020-07-31 08:03:02 -07:00
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
for _ , b := range db . blocks {
if b . OverlapsClosedInterval ( mint , maxt ) {
blocks = append ( blocks , b )
}
}
2023-11-24 03:38:38 -08:00
blockQueriers := make ( [ ] storage . ChunkQuerier , 0 , len ( blocks ) + 2 ) // +2 to allow for possible in-order and OOO head queriers
defer func ( ) {
if err != nil {
// If we fail, all previously opened queriers must be closed.
for _ , q := range blockQueriers {
// TODO(bwplotka): Handle error.
_ = q . Close ( )
}
}
} ( )
2020-07-31 08:03:02 -07:00
if maxt >= db . head . MinTime ( ) {
2021-07-20 01:47:20 -07:00
rh := NewRangeHead ( db . head , mint , maxt )
2023-11-24 03:38:38 -08:00
inOrderHeadQuerier , err := NewBlockChunkQuerier ( rh , mint , maxt )
2021-07-20 01:47:20 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open querier for head %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
// Getting the querier above registers itself in the queue that the truncation waits on.
// So if the querier is currently not colliding with any truncation, we can continue to use it and still
// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
shouldClose , getNew , newMint := db . head . IsQuerierCollidingWithTruncation ( mint , maxt )
if shouldClose {
2022-09-20 10:05:50 -07:00
if err := inOrderHeadQuerier . Close ( ) ; err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "closing head querier %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
2022-09-20 10:05:50 -07:00
inOrderHeadQuerier = nil
2021-07-20 01:47:20 -07:00
}
if getNew {
rh := NewRangeHead ( db . head , newMint , maxt )
2022-09-20 10:05:50 -07:00
inOrderHeadQuerier , err = NewBlockChunkQuerier ( rh , newMint , maxt )
2021-07-20 01:47:20 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open querier for head while getting new querier %s: %w" , rh , err )
2021-07-20 01:47:20 -07:00
}
}
2023-11-24 03:38:38 -08:00
if inOrderHeadQuerier != nil {
blockQueriers = append ( blockQueriers , inOrderHeadQuerier )
}
2019-02-14 05:29:41 -08:00
}
2022-09-20 10:05:50 -07:00
if overlapsClosedInterval ( mint , maxt , db . head . MinOOOTime ( ) , db . head . MaxOOOTime ( ) ) {
2023-11-24 03:38:38 -08:00
rh := NewOOORangeHead ( db . head , mint , maxt , db . lastGarbageCollectedMmapRef )
outOfOrderHeadQuerier , err := NewBlockChunkQuerier ( rh , mint , maxt )
2022-09-20 10:05:50 -07:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open block chunk querier for ooo head %s: %w" , rh , err )
2022-09-20 10:05:50 -07:00
}
2023-11-24 03:38:38 -08:00
blockQueriers = append ( blockQueriers , outOfOrderHeadQuerier )
2022-09-20 10:05:50 -07:00
}
2020-07-31 08:03:02 -07:00
for _ , b := range blocks {
q , err := NewBlockChunkQuerier ( b , mint , maxt )
2023-11-24 03:38:38 -08:00
if err != nil {
2023-11-16 10:54:41 -08:00
return nil , fmt . Errorf ( "open querier for block %s: %w" , b , err )
2020-07-31 08:03:02 -07:00
}
2023-11-24 03:38:38 -08:00
blockQueriers = append ( blockQueriers , q )
2021-07-20 01:47:20 -07:00
}
2017-08-28 15:39:17 -07:00
2022-09-20 10:05:50 -07:00
return blockQueriers , nil
}
// ChunkQuerier returns a new chunk querier over the data partition for the given time range.
2023-09-12 03:37:38 -07:00
func ( db * DB ) ChunkQuerier ( mint , maxt int64 ) ( storage . ChunkQuerier , error ) {
2022-09-20 10:05:50 -07:00
blockQueriers , err := db . blockChunkQuerierForRange ( mint , maxt )
if err != nil {
return nil , err
}
2020-07-31 08:03:02 -07:00
return storage . NewMergeChunkQuerier ( blockQueriers , nil , storage . NewCompactingChunkSeriesMerger ( storage . ChainedSeriesMerge ) ) , nil
2020-06-24 06:41:52 -07:00
}
2021-03-16 02:47:45 -07:00
func ( db * DB ) ExemplarQuerier ( ctx context . Context ) ( storage . ExemplarQuerier , error ) {
return db . head . exemplars . ExemplarQuerier ( ctx )
}
2021-10-22 01:06:44 -07:00
func rangeForTimestamp ( t , width int64 ) ( maxt int64 ) {
2018-12-04 02:30:49 -08:00
return ( t / width ) * width + width
2017-02-01 06:29:48 -08:00
}
2017-08-28 15:39:17 -07:00
// Delete implements deletion of metrics. It only has atomicity guarantees on a per-block basis.
2023-09-13 06:43:06 -07:00
func ( db * DB ) Delete ( ctx context . Context , mint , maxt int64 , ms ... * labels . Matcher ) error {
2017-05-20 00:51:10 -07:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
2017-07-14 00:00:22 -07:00
2017-05-19 12:05:50 -07:00
var g errgroup . Group
2017-10-23 11:30:03 -07:00
db . mtx . RLock ( )
defer db . mtx . RUnlock ( )
for _ , b := range db . blocks {
2018-07-02 01:23:36 -07:00
if b . OverlapsClosedInterval ( mint , maxt ) {
2017-10-09 06:21:46 -07:00
g . Go ( func ( b * Block ) func ( ) error {
2023-09-13 06:43:06 -07:00
return func ( ) error { return b . Delete ( ctx , mint , maxt , ms ... ) }
2017-08-28 15:39:17 -07:00
} ( b ) )
}
}
2021-09-15 23:50:03 -07:00
if db . head . OverlapsClosedInterval ( mint , maxt ) {
g . Go ( func ( ) error {
2023-09-13 06:43:06 -07:00
return db . head . Delete ( ctx , mint , maxt , ms ... )
2021-09-15 23:50:03 -07:00
} )
}
2018-03-21 14:23:47 -07:00
return g . Wait ( )
2017-05-19 12:05:50 -07:00
}
2017-11-22 04:34:50 -08:00
// CleanTombstones re-writes any blocks with tombstones.
2018-05-30 19:09:30 -07:00
func ( db * DB ) CleanTombstones ( ) ( err error ) {
2017-11-22 04:34:50 -08:00
db . cmtx . Lock ( )
defer db . cmtx . Unlock ( )
start := time . Now ( )
2022-10-25 15:26:12 -07:00
defer func ( ) {
db . metrics . tombCleanTimer . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2017-11-22 04:34:50 -08:00
2021-02-16 21:32:43 -08:00
cleanUpCompleted := false
// Repeat cleanup until there is no tombstones left.
for ! cleanUpCompleted {
cleanUpCompleted = true
for _ , pb := range db . Blocks ( ) {
uid , safeToDelete , cleanErr := pb . CleanTombstones ( db . Dir ( ) , db . compactor )
if cleanErr != nil {
2023-11-16 10:54:41 -08:00
return fmt . Errorf ( "clean tombstones: %s: %w" , pb . Dir ( ) , cleanErr )
2021-02-16 21:32:43 -08:00
}
if ! safeToDelete {
// There was nothing to clean.
continue
}
// In case tombstones of the old block covers the whole block,
// then there would be no resultant block to tell the parent.
// The lock protects against race conditions when deleting blocks
// during an already running reload.
db . mtx . Lock ( )
pb . meta . Compaction . Deletable = safeToDelete
db . mtx . Unlock ( )
cleanUpCompleted = false
if err = db . reloadBlocks ( ) ; err == nil { // Will try to delete old block.
// Successful reload will change the existing blocks.
// We need to loop over the new set of blocks.
break
}
// Delete new block if it was created.
if uid != nil && * uid != ( ulid . ULID { } ) {
2018-05-30 19:09:30 -07:00
dir := filepath . Join ( db . Dir ( ) , uid . String ( ) )
if err := os . RemoveAll ( dir ) ; err != nil {
level . Error ( db . logger ) . Log ( "msg" , "failed to delete block after failed `CleanTombstones`" , "dir" , dir , "err" , err )
}
}
2023-11-16 10:54:41 -08:00
if err != nil {
return fmt . Errorf ( "reload blocks: %w" , err )
}
return nil
2018-05-30 19:09:30 -07:00
}
2020-10-19 08:27:08 -07:00
}
return nil
2017-11-22 04:34:50 -08:00
}
2023-05-15 12:31:49 -07:00
func ( db * DB ) SetWriteNotified ( wn wlog . WriteNotified ) {
db . writeNotified = wn
// It's possible we already created the head struct, so we should also set the WN for that.
db . head . writeNotified = wn
}
2022-04-27 02:24:36 -07:00
func isBlockDir ( fi fs . DirEntry ) bool {
2017-01-19 02:22:47 -08:00
if ! fi . IsDir ( ) {
return false
}
2019-03-18 07:14:10 -07:00
_ , err := ulid . ParseStrict ( fi . Name ( ) )
2017-05-18 07:09:30 -07:00
return err == nil
2017-01-19 02:22:47 -08:00
}
2023-08-08 00:32:51 -07:00
// isTmpDir returns true if the given file-info contains a block ULID, a checkpoint prefix,
// or a chunk snapshot prefix and a tmp extension.
2022-04-27 02:24:36 -07:00
func isTmpDir ( fi fs . DirEntry ) bool {
2020-08-10 22:56:08 -07:00
if ! fi . IsDir ( ) {
return false
}
fn := fi . Name ( )
ext := filepath . Ext ( fn )
2021-01-09 01:02:26 -08:00
if ext == tmpForDeletionBlockDirSuffix || ext == tmpForCreationBlockDirSuffix || ext == tmpLegacy {
2022-03-24 03:44:14 -07:00
if strings . HasPrefix ( fn , "checkpoint." ) {
return true
}
2023-08-08 00:32:51 -07:00
if strings . HasPrefix ( fn , chunkSnapshotPrefix ) {
return true
}
2020-08-10 22:56:08 -07:00
if _ , err := ulid . ParseStrict ( fn [ : len ( fn ) - len ( ext ) ] ) ; err == nil {
return true
}
}
return false
}
2017-01-19 02:22:47 -08:00
func blockDirs ( dir string ) ( [ ] string , error ) {
2022-04-27 02:24:36 -07:00
files , err := os . ReadDir ( dir )
2017-01-19 02:22:47 -08:00
if err != nil {
return nil , err
}
var dirs [ ] string
2022-04-27 02:24:36 -07:00
for _ , f := range files {
if isBlockDir ( f ) {
dirs = append ( dirs , filepath . Join ( dir , f . Name ( ) ) )
2017-01-19 02:22:47 -08:00
}
}
return dirs , nil
2017-01-03 06:43:26 -08:00
}
2016-12-09 04:41:38 -08:00
2017-08-30 09:34:54 -07:00
func sequenceFiles ( dir string ) ( [ ] string , error ) {
2022-04-27 02:24:36 -07:00
files , err := os . ReadDir ( dir )
2017-02-13 23:53:19 -08:00
if err != nil {
return nil , err
}
var res [ ] string
for _ , fi := range files {
2017-08-30 09:34:54 -07:00
if _ , err := strconv . ParseUint ( fi . Name ( ) , 10 , 64 ) ; err != nil {
continue
2017-02-13 23:53:19 -08:00
}
2017-08-30 09:34:54 -07:00
res = append ( res , filepath . Join ( dir , fi . Name ( ) ) )
2017-02-13 23:53:19 -08:00
}
return res , nil
}
2017-08-30 09:34:54 -07:00
func nextSequenceFile ( dir string ) ( string , int , error ) {
2022-04-27 02:24:36 -07:00
files , err := os . ReadDir ( dir )
2017-01-06 00:26:39 -08:00
if err != nil {
2017-01-28 23:11:47 -08:00
return "" , 0 , err
2017-01-06 00:26:39 -08:00
}
2017-01-06 04:13:22 -08:00
2017-01-06 00:26:39 -08:00
i := uint64 ( 0 )
2020-04-06 06:34:20 -07:00
for _ , f := range files {
j , err := strconv . ParseUint ( f . Name ( ) , 10 , 64 )
2017-01-06 04:13:22 -08:00
if err != nil {
continue
2017-01-06 00:26:39 -08:00
}
2017-01-06 04:13:22 -08:00
i = j
2017-01-06 00:26:39 -08:00
}
2017-08-30 09:34:54 -07:00
return filepath . Join ( dir , fmt . Sprintf ( "%0.6d" , i + 1 ) ) , int ( i + 1 ) , nil
2017-01-06 00:26:39 -08:00
}
2016-12-09 04:41:38 -08:00
2017-08-28 15:39:17 -07:00
func exponential ( d , min , max time . Duration ) time . Duration {
d *= 2
if d < min {
d = min
}
if d > max {
d = max
}
return d
}