From 67294b092539163bb8c947a00cae4a983f42524a Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 15 Nov 2022 21:00:15 +0800 Subject: [PATCH 01/52] Tweak line wrappings in docs/storage.md Signed-off-by: Michael --- docs/storage.md | 159 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 118 insertions(+), 41 deletions(-) diff --git a/docs/storage.md b/docs/storage.md index bcb8f7853..b4c5b6ada 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -17,9 +17,9 @@ Ingested samples are grouped into blocks of two hours. Each two-hour block consi of a directory containing a chunks subdirectory containing all the time series samples for that window of time, a metadata file, and an index file (which indexes metric names and labels to time series in the chunks directory). The samples in the chunks directory -are grouped together into one or more segment files of up to 512MB each by default. When series are -deleted via the API, deletion records are stored in separate tombstone files (instead -of deleting the data immediately from the chunk segments). +are grouped together into one or more segment files of up to 512MB each by default. When +series are deleted via the API, deletion records are stored in separate tombstone files +(instead of deleting the data immediately from the chunk segments). The current block for incoming samples is kept in memory and is not fully persisted. It is secured against crashes by a write-ahead log (WAL) that can be @@ -58,15 +58,17 @@ A Prometheus server's data directory looks something like this:    └── 00000000 ``` - Note that a limitation of local storage is that it is not clustered or replicated. Thus, it is not arbitrarily scalable or durable in the face of drive or node outages and should be managed like any other single node -database. The use of RAID is suggested for storage availability, and [snapshots](querying/api.md#snapshot) -are recommended for backups. With proper +database. The use of RAID is suggested for storage availability, and +[snapshots](querying/api.md#snapshot) are recommended for backups. With proper architecture, it is possible to retain years of data in local storage. -Alternatively, external storage may be used via the [remote read/write APIs](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). Careful evaluation is required for these systems as they vary greatly in durability, performance, and efficiency. +Alternatively, external storage may be used via the +[remote read/write APIs](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). +Careful evaluation is required for these systems as they vary greatly in durability, +performance, and efficiency. For further details on file format, see [TSDB format](/tsdb/docs/format/README.md). @@ -74,40 +76,61 @@ For further details on file format, see [TSDB format](/tsdb/docs/format/README.m The initial two-hour blocks are eventually compacted into longer blocks in the background. -Compaction will create larger blocks containing data spanning up to 10% of the retention time, or 31 days, whichever is smaller. +Compaction will create larger blocks containing data spanning up to 10% of the retention time, +or 31 days, whichever is smaller. ## Operational aspects Prometheus has several flags that configure local storage. The most important are: -* `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -* `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. Overrides `storage.tsdb.retention` if this flag is set to anything other than default. -* `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only the persistent blocks are deleted to honor this retention although WAL and m-mapped chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours). -* `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`. -* `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). Depending on your data, you can expect the WAL size to be halved with little extra cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. Note that once enabled, downgrading Prometheus to a version below 2.11.0 will require deleting the WAL. +- `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. +- `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. + Overrides `storage.tsdb.retention` if this flag is set to anything other than default. +- `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. + The oldest data will be removed first. Defaults to `0` or disabled. Units supported: + B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only + the persistent blocks are deleted to honor this retention although WAL and m-mapped + chunks are counted in the total size. So the minimum requirement for the disk is the + peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` + (m-mapped Head chunks) directory combined (peaks every 2 hours). +- `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`. +- `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). + Depending on your data, you can expect the WAL size to be halved with little extra + cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. + Note that once enabled, downgrading Prometheus to a version below 2.11.0 will + require deleting the WAL. -Prometheus stores an average of only 1-2 bytes per sample. Thus, to plan the capacity of a Prometheus server, you can use the rough formula: +Prometheus stores an average of only 1-2 bytes per sample. Thus, to plan the +capacity of a Prometheus server, you can use the rough formula: ``` needed_disk_space = retention_time_seconds * ingested_samples_per_second * bytes_per_sample ``` -To lower the rate of ingested samples, you can either reduce the number of time series you scrape (fewer targets or fewer series per target), or you can increase the scrape interval. However, reducing the number of series is likely more effective, due to compression of samples within a series. +To lower the rate of ingested samples, you can either reduce the number of +time series you scrape (fewer targets or fewer series per target), or you +can increase the scrape interval. However, reducing the number of series is +likely more effective, due to compression of samples within a series. If your local storage becomes corrupted for whatever reason, the best strategy to address the problem is to shut down Prometheus then remove the entire storage directory. You can also try removing individual block directories, -or the WAL directory to resolve the problem. Note that this means losing +or the WAL directory to resolve the problem. Note that this means losing approximately two hours data per block directory. Again, Prometheus's local storage is not intended to be durable long-term storage; external solutions offer extended retention and data durability. -CAUTION: Non-POSIX compliant filesystems are not supported for Prometheus' local storage as unrecoverable corruptions may happen. NFS filesystems (including AWS's EFS) are not supported. NFS could be POSIX-compliant, but most implementations are not. It is strongly recommended to use a local filesystem for reliability. +CAUTION: Non-POSIX compliant filesystems are not supported for Prometheus' +local storage as unrecoverable corruptions may happen. NFS filesystems +(including AWS's EFS) are not supported. NFS could be POSIX-compliant, +but most implementations are not. It is strongly recommended to use a +local filesystem for reliability. If both time and size retention policies are specified, whichever triggers first will be used. -Expired block cleanup happens in the background. It may take up to two hours to remove expired blocks. Blocks must be fully expired before they are removed. +Expired block cleanup happens in the background. It may take up to two hours +to remove expired blocks. Blocks must be fully expired before they are removed. ## Remote storage integrations @@ -119,59 +142,101 @@ a set of interfaces that allow integrating with remote storage systems. Prometheus integrates with remote storage systems in three ways: -* Prometheus can write samples that it ingests to a remote URL in a standardized format. -* Prometheus can receive samples from other Prometheus servers in a standardized format. -* Prometheus can read (back) sample data from a remote URL in a standardized format. +- Prometheus can write samples that it ingests to a remote URL in a standardized format. +- Prometheus can receive samples from other Prometheus servers in a standardized format. +- Prometheus can read (back) sample data from a remote URL in a standardized format. ![Remote read and write architecture](images/remote_integrations.png) -The read and write protocols both use a snappy-compressed protocol buffer encoding over HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC over HTTP/2 in the future, when all hops between Prometheus and the remote storage can safely be assumed to support HTTP/2. +The read and write protocols both use a snappy-compressed protocol buffer encoding over +HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC +over HTTP/2 in the future, when all hops between Prometheus and the remote storage can +safely be assumed to support HTTP/2. -For details on configuring remote storage integrations in Prometheus, see the [remote write](configuration/configuration.md#remote_write) and [remote read](configuration/configuration.md#remote_read) sections of the Prometheus configuration documentation. +For details on configuring remote storage integrations in Prometheus, see the +[remote write](configuration/configuration.md#remote_write) and +[remote read](configuration/configuration.md#remote_read) sections of the Prometheus +configuration documentation. -The built-in remote write receiver can be enabled by setting the `--web.enable-remote-write-receiver` command line flag. When enabled, the remote write receiver endpoint is `/api/v1/write`. +The built-in remote write receiver can be enabled by setting the +`--web.enable-remote-write-receiver` command line flag. When enabled, +the remote write receiver endpoint is `/api/v1/write`. -For details on the request and response messages, see the [remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto). +For details on the request and response messages, see the +[remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto). -Note that on the read path, Prometheus only fetches raw series data for a set of label selectors and time ranges from the remote end. All PromQL evaluation on the raw data still happens in Prometheus itself. This means that remote read queries have some scalability limit, since all necessary data needs to be loaded into the querying Prometheus server first and then processed there. However, supporting fully distributed evaluation of PromQL was deemed infeasible for the time being. +Note that on the read path, Prometheus only fetches raw series data for a set of +label selectors and time ranges from the remote end. All PromQL evaluation on the +raw data still happens in Prometheus itself. This means that remote read queries +have some scalability limit, since all necessary data needs to be loaded into the +querying Prometheus server first and then processed there. However, supporting +fully distributed evaluation of PromQL was deemed infeasible for the time being. ### Existing integrations -To learn more about existing integrations with remote storage systems, see the [Integrations documentation](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). +To learn more about existing integrations with remote storage systems, see the +[Integrations documentation](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). ## Backfilling from OpenMetrics format ### Overview -If a user wants to create blocks into the TSDB from data that is in [OpenMetrics](https://openmetrics.io/) format, they can do so using backfilling. However, they should be careful and note that it is not safe to backfill data from the last 3 hours (the current head block) as this time range may overlap with the current head block Prometheus is still mutating. Backfilling will create new TSDB blocks, each containing two hours of metrics data. This limits the memory requirements of block creation. Compacting the two hour blocks into larger blocks is later done by the Prometheus server itself. +If a user wants to create blocks into the TSDB from data that is in +[OpenMetrics](https://openmetrics.io/) format, they can do so using backfilling. +However, they should be careful and note that it is not safe to backfill data +from the last 3 hours (the current head block) as this time range may overlap +with the current head block Prometheus is still mutating. Backfilling will +create new TSDB blocks, each containing two hours of metrics data. This limits +the memory requirements of block creation. Compacting the two hour blocks into +larger blocks is later done by the Prometheus server itself. -A typical use case is to migrate metrics data from a different monitoring system or time-series database to Prometheus. To do so, the user must first convert the source data into [OpenMetrics](https://openmetrics.io/) format, which is the input format for the backfilling as described below. +A typical use case is to migrate metrics data from a different monitoring system +or time-series database to Prometheus. To do so, the user must first convert the +source data into [OpenMetrics](https://openmetrics.io/) format, which is the +input format for the backfilling as described below. ### Usage -Backfilling can be used via the Promtool command line. Promtool will write the blocks to a directory. By default this output directory is ./data/, you can change it by using the name of the desired output directory as an optional argument in the sub-command. +Backfilling can be used via the Promtool command line. Promtool will write the blocks +to a directory. By default this output directory is ./data/, you can change it by +using the name of the desired output directory as an optional argument in the sub-command. ``` promtool tsdb create-blocks-from openmetrics [] ``` -After the creation of the blocks, move it to the data directory of Prometheus. If there is an overlap with the existing blocks in Prometheus, the flag `--storage.tsdb.allow-overlapping-blocks` needs to be set for Prometheus versions v2.38 and below. Note that any backfilled data is subject to the retention configured for your Prometheus server (by time or size). +After the creation of the blocks, move it to the data directory of Prometheus. +If there is an overlap with the existing blocks in Prometheus, the flag +`--storage.tsdb.allow-overlapping-blocks` needs to be set for Prometheus versions +v2.38 and below. Note that any backfilled data is subject to the retention +configured for your Prometheus server (by time or size). #### Longer Block Durations -By default, the promtool will use the default block duration (2h) for the blocks; this behavior is the most generally applicable and correct. However, when backfilling data over a long range of times, it may be advantageous to use a larger value for the block duration to backfill faster and prevent additional compactions by TSDB later. +By default, the promtool will use the default block duration (2h) for the blocks; +this behavior is the most generally applicable and correct. However, when backfilling +data over a long range of times, it may be advantageous to use a larger value for +the block duration to backfill faster and prevent additional compactions by TSDB later. -The `--max-block-duration` flag allows the user to configure a maximum duration of blocks. The backfilling tool will pick a suitable block duration no larger than this. +The `--max-block-duration` flag allows the user to configure a maximum duration of blocks. +The backfilling tool will pick a suitable block duration no larger than this. -While larger blocks may improve the performance of backfilling large datasets, drawbacks exist as well. Time-based retention policies must keep the entire block around if even one sample of the (potentially large) block is still within the retention policy. Conversely, size-based retention policies will remove the entire block even if the TSDB only goes over the size limit in a minor way. +While larger blocks may improve the performance of backfilling large datasets, +drawbacks exist as well. Time-based retention policies must keep the entire block +around if even one sample of the (potentially large) block is still within the +retention policy. Conversely, size-based retention policies will remove the entire +block even if the TSDB only goes over the size limit in a minor way. -Therefore, backfilling with few blocks, thereby choosing a larger block duration, must be done with care and is not recommended for any production instances. +Therefore, backfilling with few blocks, thereby choosing a larger block duration, +must be done with care and is not recommended for any production instances. ## Backfilling for Recording Rules ### Overview -When a new recording rule is created, there is no historical data for it. Recording rule data only exists from the creation time on. `promtool` makes it possible to create historical recording rule data. +When a new recording rule is created, there is no historical data for it. +Recording rule data only exists from the creation time on. +`promtool` makes it possible to create historical recording rule data. ### Usage @@ -187,14 +252,26 @@ $ promtool tsdb create-blocks-from rules \ rules.yaml rules2.yaml ``` -The recording rule files provided should be a normal [Prometheus rules file](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/). +The recording rule files provided should be a normal +[Prometheus rules file](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/). -The output of `promtool tsdb create-blocks-from rules` command is a directory that contains blocks with the historical rule data for all rules in the recording rule files. By default, the output directory is `data/`. In order to make use of this new block data, the blocks must be moved to a running Prometheus instance data dir `storage.tsdb.path` (for Prometheus versions v2.38 and below, the flag `--storage.tsdb.allow-overlapping-blocks` must be enabled). Once moved, the new blocks will merge with existing blocks when the next compaction runs. +The output of `promtool tsdb create-blocks-from rules` command is a directory that +contains blocks with the historical rule data for all rules in the recording rule +files. By default, the output directory is `data/`. In order to make use of this +new block data, the blocks must be moved to a running Prometheus instance data dir +`storage.tsdb.path` (for Prometheus versions v2.38 and below, the flag +`--storage.tsdb.allow-overlapping-blocks` must be enabled). Once moved, the new +blocks will merge with existing blocks when the next compaction runs. ### Limitations -- If you run the rule backfiller multiple times with the overlapping start/end times, blocks containing the same data will be created each time the rule backfiller is run. +- If you run the rule backfiller multiple times with the overlapping start/end times, + blocks containing the same data will be created each time the rule backfiller is run. - All rules in the recording rule files will be evaluated. -- If the `interval` is set in the recording rule file that will take priority over the `eval-interval` flag in the rule backfill command. +- If the `interval` is set in the recording rule file that will take priority over + the `eval-interval` flag in the rule backfill command. - Alerts are currently ignored if they are in the recording rule file. -- Rules in the same group cannot see the results of previous rules. Meaning that rules that refer to other rules being backfilled is not supported. A workaround is to backfill multiple times and create the dependent data first (and move dependent data to the Prometheus server data dir so that it is accessible from the Prometheus API). +- Rules in the same group cannot see the results of previous rules. Meaning that rules + that refer to other rules being backfilled is not supported. A workaround is to + backfill multiple times and create the dependent data first (and move dependent + data to the Prometheus server data dir so that it is accessible from the Prometheus API). From f095c33da12958b5d92a7d857264fafb8d4c549a Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 20 Nov 2023 19:28:05 +0000 Subject: [PATCH 02/52] scrape: simplify TargetsActive function Since everything was serialized on a single mutex, it's exactly the same if we process targets in sequence without starting goroutines. Signed-off-by: Bryan Boreham --- scrape/manager.go | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/scrape/manager.go b/scrape/manager.go index a0ac38f6b..96cad00c9 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -279,24 +279,10 @@ func (m *Manager) TargetsActive() map[string][]*Target { m.mtxScrape.Lock() defer m.mtxScrape.Unlock() - var ( - wg sync.WaitGroup - mtx sync.Mutex - ) - targets := make(map[string][]*Target, len(m.scrapePools)) - wg.Add(len(m.scrapePools)) for tset, sp := range m.scrapePools { - // Running in parallel limits the blocking time of scrapePool to scrape - // interval when there's an update from SD. - go func(tset string, sp *scrapePool) { - mtx.Lock() - targets[tset] = sp.ActiveTargets() - mtx.Unlock() - wg.Done() - }(tset, sp) + targets[tset] = sp.ActiveTargets() } - wg.Wait() return targets } From 34676a240e21705e8832e7be0b40df785bc06eb8 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 22 Nov 2023 18:50:57 +0000 Subject: [PATCH 03/52] scrape: consistent function names for metadata Too confusing to have `MetadataList` and `ListMetadata`, etc. I standardised on the ones which are in an interface. Signed-off-by: Bryan Boreham --- scrape/metrics.go | 4 ++-- scrape/target.go | 10 +++++----- storage/remote/metadata_watcher.go | 2 +- web/api/v1/api.go | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/scrape/metrics.go b/scrape/metrics.go index d74143185..7082bc743 100644 --- a/scrape/metrics.go +++ b/scrape/metrics.go @@ -286,8 +286,8 @@ func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) { for tset, targets := range mc.TargetsGatherer.TargetsActive() { var size, length int for _, t := range targets { - size += t.MetadataSize() - length += t.MetadataLength() + size += t.SizeMetadata() + length += t.LengthMetadata() } ch <- prometheus.MustNewConstMetric( diff --git a/scrape/target.go b/scrape/target.go index 8cc8597a4..c100e1bee 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -92,7 +92,7 @@ type MetricMetadata struct { Unit string } -func (t *Target) MetadataList() []MetricMetadata { +func (t *Target) ListMetadata() []MetricMetadata { t.mtx.RLock() defer t.mtx.RUnlock() @@ -102,7 +102,7 @@ func (t *Target) MetadataList() []MetricMetadata { return t.metadata.ListMetadata() } -func (t *Target) MetadataSize() int { +func (t *Target) SizeMetadata() int { t.mtx.RLock() defer t.mtx.RUnlock() @@ -113,7 +113,7 @@ func (t *Target) MetadataSize() int { return t.metadata.SizeMetadata() } -func (t *Target) MetadataLength() int { +func (t *Target) LengthMetadata() int { t.mtx.RLock() defer t.mtx.RUnlock() @@ -124,8 +124,8 @@ func (t *Target) MetadataLength() int { return t.metadata.LengthMetadata() } -// Metadata returns type and help metadata for the given metric. -func (t *Target) Metadata(metric string) (MetricMetadata, bool) { +// GetMetadata returns type and help metadata for the given metric. +func (t *Target) GetMetadata(metric string) (MetricMetadata, bool) { t.mtx.RLock() defer t.mtx.RUnlock() diff --git a/storage/remote/metadata_watcher.go b/storage/remote/metadata_watcher.go index 21de565ed..abfea3c7b 100644 --- a/storage/remote/metadata_watcher.go +++ b/storage/remote/metadata_watcher.go @@ -136,7 +136,7 @@ func (mw *MetadataWatcher) collect() { metadata := []scrape.MetricMetadata{} for _, tset := range mw.manager.TargetsActive() { for _, target := range tset { - for _, entry := range target.MetadataList() { + for _, entry := range target.ListMetadata() { if _, ok := metadataSet[entry]; !ok { metadata = append(metadata, entry) metadataSet[entry] = struct{}{} diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 671df7887..8fa7ce14a 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -1114,7 +1114,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult { } // If no metric is specified, get the full list for the target. if metric == "" { - for _, md := range t.MetadataList() { + for _, md := range t.ListMetadata() { res = append(res, metricMetadata{ Target: t.Labels(), Metric: md.Metric, @@ -1126,7 +1126,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult { continue } // Get metadata for the specified metric. - if md, ok := t.Metadata(metric); ok { + if md, ok := t.GetMetadata(metric); ok { res = append(res, metricMetadata{ Target: t.Labels(), Type: md.Type, @@ -1249,7 +1249,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { for _, tt := range api.targetRetriever(r.Context()).TargetsActive() { for _, t := range tt { if metric == "" { - for _, mm := range t.MetadataList() { + for _, mm := range t.ListMetadata() { m := metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit} ms, ok := metrics[mm.Metric] @@ -1266,7 +1266,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { continue } - if md, ok := t.Metadata(metric); ok { + if md, ok := t.GetMetadata(metric); ok { m := metadata{Type: md.Type, Help: md.Help, Unit: md.Unit} ms, ok := metrics[md.Metric] From 048886ae8abeaee79e6081ce39b014a2d49ca0b9 Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Wed, 29 Nov 2023 08:54:05 +0200 Subject: [PATCH 04/52] Histograms: optimize floatBucketIterator for common case Signed-off-by: Linas Medziunas --- model/histogram/float_histogram.go | 120 +++++++++++++++++------------ 1 file changed, 69 insertions(+), 51 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 6fa221354..10690b289 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -765,8 +765,9 @@ func (h *FloatHistogram) floatBucketIterator( schema: h.Schema, positive: positive, }, - targetSchema: targetSchema, - absoluteStartValue: absoluteStartValue, + targetSchema: targetSchema, + absoluteStartValue: absoluteStartValue, + boundReachedStartValue: absoluteStartValue == 0, } if positive { i.spans = h.PositiveSpans @@ -824,55 +825,83 @@ func (i *floatBucketIterator) Next() bool { return false } - // Copy all of these into local variables so that we can forward to the - // next bucket and then roll back if needed. - origIdx, spansIdx, idxInSpan := i.origIdx, i.spansIdx, i.idxInSpan - span := i.spans[spansIdx] - firstPass := true - i.currCount = 0 - -mergeLoop: // Merge together all buckets from the original schema that fall into one bucket in the targetSchema. - for { + if i.schema == i.targetSchema { + // Fast path for the common case. + span := i.spans[i.spansIdx] if i.bucketsIdx == 0 { // Seed origIdx for the first bucket. - origIdx = span.Offset + i.currIdx = span.Offset } else { - origIdx++ + i.currIdx++ } - for idxInSpan >= span.Length { + + for i.idxInSpan >= span.Length { // We have exhausted the current span and have to find a new // one. We even handle pathologic spans of length 0 here. - idxInSpan = 0 - spansIdx++ - if spansIdx >= len(i.spans) { - if firstPass { - return false + i.idxInSpan = 0 + i.spansIdx++ + if i.spansIdx >= len(i.spans) { + return false + } + span = i.spans[i.spansIdx] + i.currIdx += span.Offset + } + + i.currCount = i.buckets[i.bucketsIdx] + i.idxInSpan++ + i.bucketsIdx++ + } else { + // Copy all of these into local variables so that we can forward to the + // next bucket and then roll back if needed. + origIdx, spansIdx, idxInSpan := i.origIdx, i.spansIdx, i.idxInSpan + span := i.spans[spansIdx] + firstPass := true + i.currCount = 0 + + mergeLoop: // Merge together all buckets from the original schema that fall into one bucket in the targetSchema. + for { + if i.bucketsIdx == 0 { + // Seed origIdx for the first bucket. + origIdx = span.Offset + } else { + origIdx++ + } + for idxInSpan >= span.Length { + // We have exhausted the current span and have to find a new + // one. We even handle pathologic spans of length 0 here. + idxInSpan = 0 + spansIdx++ + if spansIdx >= len(i.spans) { + if firstPass { + return false + } + break mergeLoop } + span = i.spans[spansIdx] + origIdx += span.Offset + } + currIdx := targetIdx(origIdx, i.schema, i.targetSchema) + switch { + case firstPass: + i.currIdx = currIdx + firstPass = false + case currIdx != i.currIdx: + // Reached next bucket in targetSchema. + // Do not actually forward to the next bucket, but break out. + break mergeLoop + } + i.currCount += i.buckets[i.bucketsIdx] + idxInSpan++ + i.bucketsIdx++ + i.origIdx, i.spansIdx, i.idxInSpan = origIdx, spansIdx, idxInSpan + if i.schema == i.targetSchema { + // Don't need to test the next bucket for mergeability + // if we have no schema change anyway. break mergeLoop } - span = i.spans[spansIdx] - origIdx += span.Offset - } - currIdx := i.targetIdx(origIdx) - switch { - case firstPass: - i.currIdx = currIdx - firstPass = false - case currIdx != i.currIdx: - // Reached next bucket in targetSchema. - // Do not actually forward to the next bucket, but break out. - break mergeLoop - } - i.currCount += i.buckets[i.bucketsIdx] - idxInSpan++ - i.bucketsIdx++ - i.origIdx, i.spansIdx, i.idxInSpan = origIdx, spansIdx, idxInSpan - if i.schema == i.targetSchema { - // Don't need to test the next bucket for mergeability - // if we have no schema change anyway. - break mergeLoop } } + // Skip buckets before absoluteStartValue. // TODO(beorn7): Maybe do something more efficient than this recursive call. if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue { @@ -882,17 +911,6 @@ mergeLoop: // Merge together all buckets from the original schema that fall into return true } -// targetIdx returns the bucket index within i.targetSchema for the given bucket -// index within i.schema. -func (i *floatBucketIterator) targetIdx(idx int32) int32 { - if i.schema == i.targetSchema { - // Fast path for the common case. The below would yield the same - // result, just with more effort. - return idx - } - return ((idx - 1) >> (i.schema - i.targetSchema)) + 1 -} - type reverseFloatBucketIterator struct { baseBucketIterator[float64, float64] idxInSpan int32 // Changed from uint32 to allow negative values for exhaustion detection. From 9bf4cc993ed091c79452b65a999d134de0c84a07 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Fri, 1 Dec 2023 01:22:58 +0800 Subject: [PATCH 05/52] Add mad_over_time function Signed-off-by: Jeanette Tan --- docs/querying/functions.md | 1 + promql/functions.go | 20 ++++++++ promql/functions_test.go | 50 +++++++++++++++++++ promql/parser/functions.go | 5 ++ .../src/complete/promql.terms.ts | 6 +++ .../src/parser/parser.test.ts | 5 ++ .../codemirror-promql/src/types/function.ts | 7 +++ web/ui/module/lezer-promql/src/promql.grammar | 2 + 8 files changed, 96 insertions(+) diff --git a/docs/querying/functions.md b/docs/querying/functions.md index 8a4b2e80f..dda88fccd 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -640,6 +640,7 @@ over time and return an instant vector with per-series aggregation results: * `quantile_over_time(scalar, range-vector)`: the φ-quantile (0 ≤ φ ≤ 1) of the values in the specified interval. * `stddev_over_time(range-vector)`: the population standard deviation of the values in the specified interval. * `stdvar_over_time(range-vector)`: the population standard variance of the values in the specified interval. +* `mad_over_time(range-vector)`: the median absolute deviation of all points in the specified interval. * `last_over_time(range-vector)`: the most recent point value in the specified interval. * `present_over_time(range-vector)`: the value 1 for any series in the specified interval. diff --git a/promql/functions.go b/promql/functions.go index 06f6f8c71..407a11b50 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -609,6 +609,25 @@ func funcLastOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNod }), nil } +// === mad_over_time(Matrix parser.ValueTypeMatrix) (Vector, Annotations) === +func funcMadOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + if len(vals[0].(Matrix)[0].Floats) == 0 { + return enh.Out, nil + } + return aggrOverTime(vals, enh, func(s Series) float64 { + values := make(vectorByValueHeap, 0, len(s.Floats)) + for _, f := range s.Floats { + values = append(values, Sample{F: f.F}) + } + median := quantile(0.5, values) + values = make(vectorByValueHeap, 0, len(s.Floats)) + for _, f := range s.Floats { + values = append(values, Sample{F: math.Abs(f.F - median)}) + } + return quantile(0.5, values) + }), nil +} + // === max_over_time(Matrix parser.ValueTypeMatrix) (Vector, Annotations) === func funcMaxOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { if len(vals[0].(Matrix)[0].Floats) == 0 { @@ -1538,6 +1557,7 @@ var FunctionCalls = map[string]FunctionCall{ "log10": funcLog10, "log2": funcLog2, "last_over_time": funcLastOverTime, + "mad_over_time": funcMadOverTime, "max_over_time": funcMaxOverTime, "min_over_time": funcMinOverTime, "minute": funcMinute, diff --git a/promql/functions_test.go b/promql/functions_test.go index faf6859e7..abc8f9dc6 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -15,6 +15,7 @@ package promql import ( "context" + "fmt" "math" "testing" "time" @@ -86,3 +87,52 @@ func TestKahanSum(t *testing.T) { expected := 2.0 require.Equal(t, expected, kahanSum(vals)) } + +func TestMadOverTime(t *testing.T) { + cases := []struct { + series []int + expectedRes float64 + }{ + { + series: []int{4, 6, 2, 1, 999, 1, 2}, + expectedRes: 1, + }, + } + + for i, c := range cases { + t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { + engine := newTestEngine() + storage := teststorage.New(t) + t.Cleanup(func() { storage.Close() }) + + seriesName := "float_series" + + ts := int64(0) + app := storage.Appender(context.Background()) + lbls := labels.FromStrings("__name__", seriesName) + var err error + for _, num := range c.series { + _, err = app.Append(0, lbls, ts, float64(num)) + require.NoError(t, err) + ts += int64(1 * time.Minute / time.Millisecond) + } + require.NoError(t, app.Commit()) + + queryAndCheck := func(queryString string, exp Vector) { + qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) + require.NoError(t, err) + + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + + vector, err := res.Vector() + require.NoError(t, err) + + require.Equal(t, exp, vector) + } + + queryString := fmt.Sprintf(`mad_over_time(%s[%dm])`, seriesName, len(c.series)) + queryAndCheck(queryString, []Sample{{T: ts, F: c.expectedRes, Metric: labels.EmptyLabels()}}) + }) + } +} diff --git a/promql/parser/functions.go b/promql/parser/functions.go index ee2e90c55..aafb375da 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -254,6 +254,11 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeVector}, ReturnType: ValueTypeVector, }, + "mad_over_time": { + Name: "mad_over_time", + ArgTypes: []ValueType{ValueTypeMatrix}, + ReturnType: ValueTypeVector, + }, "max_over_time": { Name: "max_over_time", ArgTypes: []ValueType{ValueTypeMatrix}, diff --git a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts index 77a87c8cc..963fc95f2 100644 --- a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts +++ b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts @@ -317,6 +317,12 @@ export const functionIdentifierTerms = [ info: 'Calculate base-2 logarithm of input series', type: 'function', }, + { + label: 'mad_over_time', + detail: 'function', + info: 'Return the median absolute deviation over time for input series', + type: 'function', + }, { label: 'max_over_time', detail: 'function', diff --git a/web/ui/module/codemirror-promql/src/parser/parser.test.ts b/web/ui/module/codemirror-promql/src/parser/parser.test.ts index 5ef9c1f90..78195a5c6 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts @@ -95,6 +95,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'mad_over_time(rate(metric_name[5m])[1h:] offset 1m)', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'max_over_time(rate(metric_name[5m])[1h:] offset 1m)', expectedValueType: ValueType.vector, diff --git a/web/ui/module/codemirror-promql/src/types/function.ts b/web/ui/module/codemirror-promql/src/types/function.ts index cceeef90b..369478158 100644 --- a/web/ui/module/codemirror-promql/src/types/function.ts +++ b/web/ui/module/codemirror-promql/src/types/function.ts @@ -56,6 +56,7 @@ import { Ln, Log10, Log2, + MadOverTime, MaxOverTime, MinOverTime, Minute, @@ -370,6 +371,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = { variadic: 0, returnType: ValueType.vector, }, + [MadOverTime]: { + name: 'mad_over_time', + argTypes: [ValueType.matrix], + variadic: 0, + returnType: ValueType.vector, + }, [MaxOverTime]: { name: 'max_over_time', argTypes: [ValueType.matrix], diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index 5280ea800..ab627c829 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -149,6 +149,7 @@ FunctionIdentifier { Ln | Log10 | Log2 | + MadOverTime | MaxOverTime | MinOverTime | Minute | @@ -380,6 +381,7 @@ NumberLiteral { Ln { condFn<"ln"> } Log10 { condFn<"log10"> } Log2 { condFn<"log2"> } + MadOverTime { condFn<"mad_over_time"> } MaxOverTime { condFn<"max_over_time"> } MinOverTime { condFn<"min_over_time"> } Minute { condFn<"minute"> } From 2910b48180e4988adf90839c7629de11e2fb6129 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Fri, 1 Dec 2023 01:55:01 +0800 Subject: [PATCH 06/52] Make mad_over_time experimental and move tests Signed-off-by: Jeanette Tan --- promql/functions_test.go | 50 ---------------------------------- promql/parser/functions.go | 7 +++-- promql/testdata/functions.test | 8 ++++++ 3 files changed, 12 insertions(+), 53 deletions(-) diff --git a/promql/functions_test.go b/promql/functions_test.go index abc8f9dc6..faf6859e7 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -15,7 +15,6 @@ package promql import ( "context" - "fmt" "math" "testing" "time" @@ -87,52 +86,3 @@ func TestKahanSum(t *testing.T) { expected := 2.0 require.Equal(t, expected, kahanSum(vals)) } - -func TestMadOverTime(t *testing.T) { - cases := []struct { - series []int - expectedRes float64 - }{ - { - series: []int{4, 6, 2, 1, 999, 1, 2}, - expectedRes: 1, - }, - } - - for i, c := range cases { - t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "float_series" - - ts := int64(0) - app := storage.Appender(context.Background()) - lbls := labels.FromStrings("__name__", seriesName) - var err error - for _, num := range c.series { - _, err = app.Append(0, lbls, ts, float64(num)) - require.NoError(t, err) - ts += int64(1 * time.Minute / time.Millisecond) - } - require.NoError(t, app.Commit()) - - queryAndCheck := func(queryString string, exp Vector) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Equal(t, exp, vector) - } - - queryString := fmt.Sprintf(`mad_over_time(%s[%dm])`, seriesName, len(c.series)) - queryAndCheck(queryString, []Sample{{T: ts, F: c.expectedRes, Metric: labels.EmptyLabels()}}) - }) - } -} diff --git a/promql/parser/functions.go b/promql/parser/functions.go index aafb375da..46d50d547 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -255,9 +255,10 @@ var Functions = map[string]*Function{ ReturnType: ValueTypeVector, }, "mad_over_time": { - Name: "mad_over_time", - ArgTypes: []ValueType{ValueTypeMatrix}, - ReturnType: ValueTypeVector, + Name: "mad_over_time", + ArgTypes: []ValueType{ValueTypeMatrix}, + ReturnType: ValueTypeVector, + Experimental: true, }, "max_over_time": { Name: "max_over_time", diff --git a/promql/testdata/functions.test b/promql/testdata/functions.test index b5263a96f..b4547886a 100644 --- a/promql/testdata/functions.test +++ b/promql/testdata/functions.test @@ -739,6 +739,14 @@ eval instant at 1m stdvar_over_time(metric[1m]) eval instant at 1m stddev_over_time(metric[1m]) {} 0 +# Tests for mad_over_time. +clear +load 10s + metric 4 6 2 1 999 1 2 + +eval instant at 70s mad_over_time(metric[70s]) + {} 1 + # Tests for quantile_over_time clear From 2f5f711cf1b07847bcc54711b2d887f00fa74247 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 23:23:47 +0000 Subject: [PATCH 07/52] build(deps): bump dessant/lock-threads from 4.0.1 to 5.0.1 Bumps [dessant/lock-threads](https://github.com/dessant/lock-threads) from 4.0.1 to 5.0.1. - [Release notes](https://github.com/dessant/lock-threads/releases) - [Changelog](https://github.com/dessant/lock-threads/blob/main/CHANGELOG.md) - [Commits](https://github.com/dessant/lock-threads/compare/be8aa5be94131386884a6da4189effda9b14aa21...1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771) --- updated-dependencies: - dependency-name: dessant/lock-threads dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/lock.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml index 3f557a089..e7e813e3b 100644 --- a/.github/workflows/lock.yml +++ b/.github/workflows/lock.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: dessant/lock-threads@be8aa5be94131386884a6da4189effda9b14aa21 # v4.0.1 + - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1 with: process-only: 'issues' issue-inactive-days: '180' From 6d8e82df1ec96ec37481e84acc2cdabae3d7b70b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 23:23:56 +0000 Subject: [PATCH 08/52] build(deps): bump github/codeql-action from 2.22.5 to 2.22.8 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.22.5 to 2.22.8. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/74483a38d39275f33fcff5f35b679b5ca4a26a99...407ffafae6a767df3e0230c3df91b6443ae8df75) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql-analysis.yml | 6 +++--- .github/workflows/scorecards.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 19f8cfb55..5e14936a9 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -30,12 +30,12 @@ jobs: go-version: 1.21.x - name: Initialize CodeQL - uses: github/codeql-action/init@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5 + uses: github/codeql-action/init@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5 + uses: github/codeql-action/autobuild@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5 + uses: github/codeql-action/analyze@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 091b50ccd..f71e1331b 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -45,6 +45,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@74483a38d39275f33fcff5f35b679b5ca4a26a99 # tag=v2.22.5 + uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # tag=v2.22.8 with: sarif_file: results.sarif From 59f467ca849a62680e800b62409766facd4ff4e2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 23:24:02 +0000 Subject: [PATCH 09/52] build(deps): bump bufbuild/buf-setup-action from 1.26.1 to 1.28.1 Bumps [bufbuild/buf-setup-action](https://github.com/bufbuild/buf-setup-action) from 1.26.1 to 1.28.1. - [Release notes](https://github.com/bufbuild/buf-setup-action/releases) - [Commits](https://github.com/bufbuild/buf-setup-action/compare/eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b...382440cdb8ec7bc25a68d7b4711163d95f7cc3aa) --- updated-dependencies: - dependency-name: bufbuild/buf-setup-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/buf-lint.yml | 2 +- .github/workflows/buf.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 85109b39a..0f3c5d277 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1 + - uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3 diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index c2c9dc070..f6d5c9191 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -13,7 +13,7 @@ jobs: if: github.repository_owner == 'prometheus' steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1 + - uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3 From 237bfea46b59600cca77520de033d8896f6d3aeb Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 28 Nov 2023 16:33:28 +0100 Subject: [PATCH 10/52] `chunks.Reader`: Fix typo in ChunkOrIterable doc string. Also fix comment typo in `FloatHistogram.Sub`. Signed-off-by: Arve Knudsen --- model/histogram/float_histogram.go | 2 +- tsdb/chunks/chunks.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 1c2f3ebec..cf20b5080 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -303,7 +303,7 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) *FloatHistogram { ) if other.Schema < h.Schema { - panic(fmt.Errorf("cannot subtract histigram with schema %d to %d", other.Schema, h.Schema)) + panic(fmt.Errorf("cannot subtract histogram with schema %d to %d", other.Schema, h.Schema)) } else if other.Schema > h.Schema { otherPositiveSpans, otherPositiveBuckets = reduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false) otherNegativeSpans, otherNegativeBuckets = reduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false) diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index f22285a0c..2c6db3637 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -671,7 +671,7 @@ func (s *Reader) Size() int64 { return s.size } -// Chunk returns a chunk from a given reference. +// ChunkOrIterable returns a chunk from a given reference. func (s *Reader) ChunkOrIterable(meta Meta) (chunkenc.Chunk, chunkenc.Iterable, error) { sgmIndex, chkStart := BlockChunkRef(meta.Ref).Unpack() From bd895baefcafe785c0fd3ee33a5bf2f361985ced Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Thu, 7 Dec 2023 20:50:54 +0200 Subject: [PATCH 11/52] FloatHistogram.Add/Sub: handle any schema change Signed-off-by: Linas Medziunas --- model/histogram/float_histogram.go | 45 +++++++++++------ model/histogram/float_histogram_test.go | 64 +++++++++++++++++++------ 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 6fa221354..6ced7c7c5 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -231,11 +231,8 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { // resulting histogram might have buckets with a population of zero or directly // adjacent spans (offset=0). To normalize those, call the Compact method. // -// The method reconciles differences in the zero threshold and in the schema, -// but the schema of the other histogram must be ≥ the schema of the receiving -// histogram (i.e. must have an equal or higher resolution). This means that the -// schema of the receiving histogram won't change. Its zero threshold, however, -// will change if needed. The other histogram will not be modified in any case. +// The method reconciles differences in the zero threshold and in the schema, and +// changes them if needed. The other histogram will not be modified in any case. // // This method returns a pointer to the receiving histogram for convenience. func (h *FloatHistogram) Add(other *FloatHistogram) *FloatHistogram { @@ -269,21 +266,30 @@ func (h *FloatHistogram) Add(other *FloatHistogram) *FloatHistogram { h.Sum += other.Sum var ( + hPositiveSpans = h.PositiveSpans + hPositiveBuckets = h.PositiveBuckets + hNegativeSpans = h.NegativeSpans + hNegativeBuckets = h.NegativeBuckets + otherPositiveSpans = other.PositiveSpans otherPositiveBuckets = other.PositiveBuckets otherNegativeSpans = other.NegativeSpans otherNegativeBuckets = other.NegativeBuckets ) - if other.Schema < h.Schema { - panic(fmt.Errorf("cannot add histogram with schema %d to %d", other.Schema, h.Schema)) - } else if other.Schema > h.Schema { + switch { + case other.Schema < h.Schema: + hPositiveSpans, hPositiveBuckets = reduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true) + hNegativeSpans, hNegativeBuckets = reduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true) + h.Schema = other.Schema + + case other.Schema > h.Schema: otherPositiveSpans, otherPositiveBuckets = reduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false) otherNegativeSpans, otherNegativeBuckets = reduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false) } - h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.PositiveSpans, h.PositiveBuckets, otherPositiveSpans, otherPositiveBuckets) - h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.NegativeSpans, h.NegativeBuckets, otherNegativeSpans, otherNegativeBuckets) + h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, hPositiveSpans, hPositiveBuckets, otherPositiveSpans, otherPositiveBuckets) + h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, hNegativeSpans, hNegativeBuckets, otherNegativeSpans, otherNegativeBuckets) return h } @@ -296,21 +302,30 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) *FloatHistogram { h.Sum -= other.Sum var ( + hPositiveSpans = h.PositiveSpans + hPositiveBuckets = h.PositiveBuckets + hNegativeSpans = h.NegativeSpans + hNegativeBuckets = h.NegativeBuckets + otherPositiveSpans = other.PositiveSpans otherPositiveBuckets = other.PositiveBuckets otherNegativeSpans = other.NegativeSpans otherNegativeBuckets = other.NegativeBuckets ) - if other.Schema < h.Schema { - panic(fmt.Errorf("cannot subtract histigram with schema %d to %d", other.Schema, h.Schema)) - } else if other.Schema > h.Schema { + switch { + case other.Schema < h.Schema: + hPositiveSpans, hPositiveBuckets = reduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true) + hNegativeSpans, hNegativeBuckets = reduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true) + h.Schema = other.Schema + + case other.Schema > h.Schema: otherPositiveSpans, otherPositiveBuckets = reduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false) otherNegativeSpans, otherNegativeBuckets = reduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false) } - h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.PositiveSpans, h.PositiveBuckets, otherPositiveSpans, otherPositiveBuckets) - h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.NegativeSpans, h.NegativeBuckets, otherNegativeSpans, otherNegativeBuckets) + h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, hPositiveSpans, hPositiveBuckets, otherPositiveSpans, otherPositiveBuckets) + h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, hNegativeSpans, hNegativeBuckets, otherNegativeSpans, otherNegativeBuckets) return h } diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index b93a6d90d..b2e412e66 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -1242,7 +1242,7 @@ func TestFloatHistogramAdd(t *testing.T) { Sum: 1.234, PositiveSpans: []Span{{0, 2}, {3, 3}}, PositiveBuckets: []float64{5, 4, 2, 3, 6}, - NegativeSpans: []Span{{-9, 2}, {3, 2}}, + NegativeSpans: []Span{{-6, 2}, {1, 2}}, NegativeBuckets: []float64{1, 1, 4, 4}, }, &FloatHistogram{ @@ -1262,7 +1262,7 @@ func TestFloatHistogramAdd(t *testing.T) { Sum: 3.579, PositiveSpans: []Span{{-2, 2}, {0, 5}, {0, 3}}, PositiveBuckets: []float64{1, 0, 5, 4, 3, 4, 7, 2, 3, 6}, - NegativeSpans: []Span{{-9, 2}, {3, 2}, {5, 2}, {3, 2}}, + NegativeSpans: []Span{{-6, 2}, {1, 2}, {4, 2}, {3, 2}}, NegativeBuckets: []float64{1, 1, 4, 4, 3, 1, 5, 6}, }, }, @@ -1573,16 +1573,33 @@ func TestFloatHistogramAdd(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - in2Copy := c.in2.Copy() - require.Equal(t, c.expected, c.in1.Add(c.in2)) - // Has it also happened in-place? - require.Equal(t, c.expected, c.in1) - // Check that the argument was not mutated. - require.Equal(t, in2Copy, c.in2) + testHistogramAdd(t, c.in1, c.in2, c.expected) + testHistogramAdd(t, c.in2, c.in1, c.expected) }) } } +func testHistogramAdd(t *testing.T, a, b, expected *FloatHistogram) { + var ( + aCopy = a.Copy() + bCopy = b.Copy() + expectedCopy = expected.Copy() + ) + + res := aCopy.Add(bCopy) + + res.Compact(0) + expectedCopy.Compact(0) + + require.Equal(t, expectedCopy, res) + + // Has it also happened in-place? + require.Equal(t, expectedCopy, aCopy) + + // Check that the argument was not mutated. + require.Equal(t, b, bCopy) +} + func TestFloatHistogramSub(t *testing.T) { // This has fewer test cases than TestFloatHistogramAdd because Add and // Sub share most of the trickier code. @@ -1662,16 +1679,35 @@ func TestFloatHistogramSub(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - in2Copy := c.in2.Copy() - require.Equal(t, c.expected, c.in1.Sub(c.in2)) - // Has it also happened in-place? - require.Equal(t, c.expected, c.in1) - // Check that the argument was not mutated. - require.Equal(t, in2Copy, c.in2) + testFloatHistogramSub(t, c.in1, c.in2, c.expected) + + expectedNegative := c.expected.Copy().Mul(-1) + testFloatHistogramSub(t, c.in2, c.in1, expectedNegative) }) } } +func testFloatHistogramSub(t *testing.T, a, b, expected *FloatHistogram) { + var ( + aCopy = a.Copy() + bCopy = b.Copy() + expectedCopy = expected.Copy() + ) + + res := aCopy.Sub(bCopy) + + res.Compact(0) + expectedCopy.Compact(0) + + require.Equal(t, expectedCopy, res) + + // Has it also happened in-place? + require.Equal(t, expectedCopy, aCopy) + + // Check that the argument was not mutated. + require.Equal(t, b, bCopy) +} + func TestFloatHistogramCopyToSchema(t *testing.T) { cases := []struct { name string From 48b1818ee808c193bf34e5d6b3fd5447e0f68fc8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Dec 2023 09:19:48 +0000 Subject: [PATCH 12/52] build(deps-dev): bump @types/node from 17.0.45 to 20.10.4 in /web/ui (#13260) Bumps [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) from 17.0.45 to 20.10.4. - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) --- updated-dependencies: - dependency-name: "@types/node" dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- web/ui/package-lock.json | 30 +++++++++++++++++++++++------- web/ui/package.json | 2 +- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index f6138ff95..51dd60749 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -13,7 +13,7 @@ ], "devDependencies": { "@types/jest": "^29.5.11", - "@types/node": "^17.0.45", + "@types/node": "^20.10.4", "eslint-config-prettier": "^8.10.0", "eslint-config-react-app": "^7.0.1", "eslint-plugin-prettier": "^4.2.1", @@ -4483,9 +4483,12 @@ "devOptional": true }, "node_modules/@types/node": { - "version": "17.0.45", - "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz", - "integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==" + "version": "20.10.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.4.tgz", + "integrity": "sha512-D08YG6rr8X90YB56tSIuBaddy/UXAA9RKJoFvrsnogAum/0pmjkgi4+2nx96A330FmioegBWmEYQ+syqCFaveg==", + "dependencies": { + "undici-types": "~5.26.4" + } }, "node_modules/@types/parse-json": { "version": "4.0.0", @@ -19677,6 +19680,11 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, "node_modules/unicode-canonical-property-names-ecmascript": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.0.tgz", @@ -24179,9 +24187,12 @@ "devOptional": true }, "@types/node": { - "version": "17.0.45", - "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz", - "integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==" + "version": "20.10.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.4.tgz", + "integrity": "sha512-D08YG6rr8X90YB56tSIuBaddy/UXAA9RKJoFvrsnogAum/0pmjkgi4+2nx96A330FmioegBWmEYQ+syqCFaveg==", + "requires": { + "undici-types": "~5.26.4" + } }, "@types/parse-json": { "version": "4.0.0", @@ -35605,6 +35616,11 @@ "which-boxed-primitive": "^1.0.2" } }, + "undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, "unicode-canonical-property-names-ecmascript": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.0.tgz", diff --git a/web/ui/package.json b/web/ui/package.json index 17d57ee6d..7212ccacb 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -17,7 +17,7 @@ }, "devDependencies": { "@types/jest": "^29.5.11", - "@types/node": "^17.0.45", + "@types/node": "^20.10.4", "eslint-config-prettier": "^8.10.0", "eslint-config-react-app": "^7.0.1", "eslint-plugin-prettier": "^4.2.1", From 10a82f87fd07b89a5badc935b5535120c1122eb2 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Wed, 29 Nov 2023 15:15:57 +0100 Subject: [PATCH 13/52] Enable reusing memory when converting between histogram types The 'ToFloat' method on integer histograms currently allocates new memory each time it is called. This commit adds an optional *FloatHistogram parameter that can be used to reuse span and bucket slices. It is up to the caller to make sure the input float histogram is not used anymore after the call. Signed-off-by: Filip Petkovski --- model/histogram/histogram.go | 79 +++++++++++++-------------- model/histogram/histogram_test.go | 58 ++++++++++++++++++-- promql/engine_test.go | 20 +++---- rules/manager_test.go | 4 +- scrape/target_test.go | 2 +- storage/buffer.go | 4 +- storage/buffer_test.go | 6 +- storage/remote/codec_test.go | 4 +- storage/remote/queue_manager_test.go | 2 +- storage/remote/write_handler_test.go | 2 +- tsdb/block_test.go | 4 +- tsdb/chunkenc/float_histogram_test.go | 22 ++++---- tsdb/chunkenc/histogram_test.go | 10 ++-- tsdb/compact_test.go | 4 +- tsdb/db_test.go | 14 ++--- tsdb/head_test.go | 10 ++-- tsdb/record/record_test.go | 2 +- tsdb/wlog/watcher_test.go | 2 +- web/federate.go | 2 +- web/federate_test.go | 4 +- 20 files changed, 151 insertions(+), 104 deletions(-) diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index fb0185a63..1fe342482 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -282,50 +282,49 @@ func (h *Histogram) Compact(maxEmptyBuckets int) *Histogram { return h } -// ToFloat returns a FloatHistogram representation of the Histogram. It is a -// deep copy (e.g. spans are not shared). -func (h *Histogram) ToFloat() *FloatHistogram { - var ( - positiveSpans, negativeSpans []Span - positiveBuckets, negativeBuckets []float64 - ) - if len(h.PositiveSpans) != 0 { - positiveSpans = make([]Span, len(h.PositiveSpans)) - copy(positiveSpans, h.PositiveSpans) +// ToFloat returns a FloatHistogram representation of the Histogram. It is a deep +// copy (e.g. spans are not shared). The function accepts a FloatHistogram as an +// argument whose memory will be reused and overwritten if provided. If this +// argument is nil, a new FloatHistogram will be allocated. +func (h *Histogram) ToFloat(fh *FloatHistogram) *FloatHistogram { + if fh == nil { + fh = &FloatHistogram{} } - if len(h.NegativeSpans) != 0 { - negativeSpans = make([]Span, len(h.NegativeSpans)) - copy(negativeSpans, h.NegativeSpans) - } - if len(h.PositiveBuckets) != 0 { - positiveBuckets = make([]float64, len(h.PositiveBuckets)) - var current float64 - for i, b := range h.PositiveBuckets { - current += float64(b) - positiveBuckets[i] = current - } - } - if len(h.NegativeBuckets) != 0 { - negativeBuckets = make([]float64, len(h.NegativeBuckets)) - var current float64 - for i, b := range h.NegativeBuckets { - current += float64(b) - negativeBuckets[i] = current - } + fh.CounterResetHint = h.CounterResetHint + fh.Schema = h.Schema + fh.ZeroThreshold = h.ZeroThreshold + fh.ZeroCount = float64(h.ZeroCount) + fh.Count = float64(h.Count) + fh.Sum = h.Sum + + fh.PositiveSpans = resize(fh.PositiveSpans, len(h.PositiveSpans)) + copy(fh.PositiveSpans, h.PositiveSpans) + + fh.NegativeSpans = resize(fh.NegativeSpans, len(h.NegativeSpans)) + copy(fh.NegativeSpans, h.NegativeSpans) + + fh.PositiveBuckets = resize(fh.PositiveBuckets, len(h.PositiveBuckets)) + var currentPositive float64 + for i, b := range h.PositiveBuckets { + currentPositive += float64(b) + fh.PositiveBuckets[i] = currentPositive } - return &FloatHistogram{ - CounterResetHint: h.CounterResetHint, - Schema: h.Schema, - ZeroThreshold: h.ZeroThreshold, - ZeroCount: float64(h.ZeroCount), - Count: float64(h.Count), - Sum: h.Sum, - PositiveSpans: positiveSpans, - NegativeSpans: negativeSpans, - PositiveBuckets: positiveBuckets, - NegativeBuckets: negativeBuckets, + fh.NegativeBuckets = resize(fh.NegativeBuckets, len(h.NegativeBuckets)) + var currentNegative float64 + for i, b := range h.NegativeBuckets { + currentNegative += float64(b) + fh.NegativeBuckets[i] = currentNegative } + + return fh +} + +func resize[T any](items []T, n int) []T { + if len(items) < n { + return make([]T, n) + } + return items[:n] } // Validate validates consistency between span and bucket slices. Also, buckets are checked diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go index d5aed112a..9a64faaaa 100644 --- a/model/histogram/histogram_test.go +++ b/model/histogram/histogram_test.go @@ -408,9 +408,57 @@ func TestHistogramToFloat(t *testing.T) { }, NegativeBuckets: []int64{1, 2, -2, 1, -1, 0}, } - fh := h.ToFloat() + cases := []struct { + name string + fh *FloatHistogram + }{ + {name: "without prior float histogram"}, + {name: "prior float histogram with more buckets", fh: &FloatHistogram{ + Schema: 2, + Count: 3, + Sum: 5, + ZeroThreshold: 4, + ZeroCount: 1, + PositiveSpans: []Span{ + {Offset: 1, Length: 2}, + {Offset: 1, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}, + NegativeSpans: []Span{ + {Offset: 20, Length: 6}, + {Offset: 12, Length: 7}, + {Offset: 33, Length: 10}, + }, + NegativeBuckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }}, + {name: "prior float histogram with fewer buckets", fh: &FloatHistogram{ + Schema: 2, + Count: 3, + Sum: 5, + ZeroThreshold: 4, + ZeroCount: 1, + PositiveSpans: []Span{ + {Offset: 1, Length: 2}, + {Offset: 1, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 2}, + NegativeSpans: []Span{ + {Offset: 20, Length: 6}, + {Offset: 12, Length: 7}, + {Offset: 33, Length: 10}, + }, + NegativeBuckets: []float64{1, 2}, + }}, + } - require.Equal(t, h.String(), fh.String()) + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + fh := h.ToFloat(c.fh) + require.Equal(t, h.String(), fh.String()) + }) + } } // TestHistogramEquals tests both Histogram and FloatHistogram. @@ -436,14 +484,14 @@ func TestHistogramEquals(t *testing.T) { equals := func(h1, h2 Histogram) { require.True(t, h1.Equals(&h2)) require.True(t, h2.Equals(&h1)) - h1f, h2f := h1.ToFloat(), h2.ToFloat() + h1f, h2f := h1.ToFloat(nil), h2.ToFloat(nil) require.True(t, h1f.Equals(h2f)) require.True(t, h2f.Equals(h1f)) } notEquals := func(h1, h2 Histogram) { require.False(t, h1.Equals(&h2)) require.False(t, h2.Equals(&h1)) - h1f, h2f := h1.ToFloat(), h2.ToFloat() + h1f, h2f := h1.ToFloat(nil), h2.ToFloat(nil) require.False(t, h1f.Equals(h2f)) require.False(t, h2f.Equals(h1f)) } @@ -950,7 +998,7 @@ func TestHistogramValidation(t *testing.T) { return } - fh := tc.h.ToFloat() + fh := tc.h.ToFloat(nil) if err := fh.Validate(); tc.errMsg != "" { require.EqualError(t, err, tc.errMsg) } else { diff --git a/promql/engine_test.go b/promql/engine_test.go index b8fd19299..9ab54dd16 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3267,7 +3267,7 @@ func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { app := storage.Appender(context.Background()) var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, h.ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, h, nil) } @@ -3287,7 +3287,7 @@ func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { require.Len(t, vector, 1) require.Nil(t, vector[0].H) if floatHisto { - require.Equal(t, h.ToFloat().Count, vector[0].F) + require.Equal(t, h.ToFloat(nil).Count, vector[0].F) } else { require.Equal(t, float64(h.Count), vector[0].F) } @@ -3305,7 +3305,7 @@ func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { require.Len(t, vector, 1) require.Nil(t, vector[0].H) if floatHisto { - require.Equal(t, h.ToFloat().Sum, vector[0].F) + require.Equal(t, h.ToFloat(nil).Sum, vector[0].F) } else { require.Equal(t, h.Sum, vector[0].F) } @@ -3433,7 +3433,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { app := storage.Appender(context.Background()) var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, tc.h, nil) } @@ -3678,7 +3678,7 @@ func TestNativeHistogram_HistogramQuantile(t *testing.T) { app := storage.Appender(context.Background()) var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) } @@ -4109,7 +4109,7 @@ func TestNativeHistogram_HistogramFraction(t *testing.T) { app := storage.Appender(context.Background()) var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) } @@ -4272,7 +4272,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // Since we mutate h later, we need to create a copy here. var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) } @@ -4282,7 +4282,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { newTs := ts + int64(idx1)*int64(time.Minute/time.Millisecond) // Since we mutate h later, we need to create a copy here. if floatHisto { - _, err = app.AppendHistogram(0, lbls, newTs, nil, h.Copy().ToFloat()) + _, err = app.AppendHistogram(0, lbls, newTs, nil, h.Copy().ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, newTs, h.Copy(), nil) } @@ -4530,7 +4530,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { // Since we mutate h later, we need to create a copy here. var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) } @@ -4687,7 +4687,7 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { // Since we mutate h later, we need to create a copy here. var err error if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat()) + _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) } else { _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) } diff --git a/rules/manager_test.go b/rules/manager_test.go index 0c9a49e46..6418c5a37 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -1390,9 +1390,9 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { require.Equal(t, labels.FromStrings("__name__", "sum:histogram_metric"), s.Labels()) - expHist := hists[0].ToFloat() + expHist := hists[0].ToFloat(nil) for _, h := range hists[1:] { - expHist = expHist.Add(h.ToFloat()) + expHist = expHist.Add(h.ToFloat(nil)) } it := s.Iterator(nil) diff --git a/scrape/target_test.go b/scrape/target_test.go index 6631f328c..f37c75a76 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -565,7 +565,7 @@ func TestBucketLimitAppender(t *testing.T) { lbls := labels.FromStrings("__name__", "sparse_histogram_series") var err error if floatHisto { - fh := c.h.Copy().ToFloat() + fh := c.h.Copy().ToFloat(nil) _, err = app.AppendHistogram(0, lbls, ts, nil, fh) if c.expectError { require.Error(t, err) diff --git a/storage/buffer.go b/storage/buffer.go index d2d89e042..f686796ca 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -202,7 +202,7 @@ func (s hSample) H() *histogram.Histogram { } func (s hSample) FH() *histogram.FloatHistogram { - return s.h.ToFloat() + return s.h.ToFloat(nil) } func (s hSample) Type() chunkenc.ValueType { @@ -376,7 +376,7 @@ func (it *sampleRingIterator) AtHistogram() (int64, *histogram.Histogram) { func (it *sampleRingIterator) AtFloatHistogram() (int64, *histogram.FloatHistogram) { if it.fh == nil { - return it.t, it.h.ToFloat() + return it.t, it.h.ToFloat(nil) } return it.t, it.fh } diff --git a/storage/buffer_test.go b/storage/buffer_test.go index c2542f3d9..84f94a008 100644 --- a/storage/buffer_test.go +++ b/storage/buffer_test.go @@ -233,7 +233,7 @@ func TestBufferedSeriesIteratorMixedHistograms(t *testing.T) { histograms := tsdbutil.GenerateTestHistograms(2) it := NewBufferIterator(NewListSeriesIterator(samples{ - fhSample{t: 1, fh: histograms[0].ToFloat()}, + fhSample{t: 1, fh: histograms[0].ToFloat(nil)}, hSample{t: 2, h: histograms[1]}, }), 2) @@ -244,11 +244,11 @@ func TestBufferedSeriesIteratorMixedHistograms(t *testing.T) { require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) _, fh := buf.AtFloatHistogram() - require.Equal(t, histograms[0].ToFloat(), fh) + require.Equal(t, histograms[0].ToFloat(nil), fh) require.Equal(t, chunkenc.ValHistogram, buf.Next()) _, fh = buf.AtFloatHistogram() - require.Equal(t, histograms[1].ToFloat(), fh) + require.Equal(t, histograms[1].ToFloat(nil), fh) } func BenchmarkBufferedSeriesIterator(b *testing.B) { diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 4df3c972e..d2a7d45be 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -57,7 +57,7 @@ var writeRequestFixture = &prompb.WriteRequest{ }, Samples: []prompb.Sample{{Value: 1, Timestamp: 0}}, Exemplars: []prompb.Exemplar{{Labels: []prompb.Label{{Name: "f", Value: "g"}}, Value: 1, Timestamp: 0}}, - Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram), FloatHistogramToHistogramProto(1, testHistogram.ToFloat())}, + Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram), FloatHistogramToHistogramProto(1, testHistogram.ToFloat(nil))}, }, { Labels: []prompb.Label{ @@ -69,7 +69,7 @@ var writeRequestFixture = &prompb.WriteRequest{ }, Samples: []prompb.Sample{{Value: 2, Timestamp: 1}}, Exemplars: []prompb.Exemplar{{Labels: []prompb.Label{{Name: "h", Value: "i"}}, Value: 2, Timestamp: 1}}, - Histograms: []prompb.Histogram{HistogramToHistogramProto(2, &testHistogram), FloatHistogramToHistogramProto(3, testHistogram.ToFloat())}, + Histograms: []prompb.Histogram{HistogramToHistogramProto(2, &testHistogram), FloatHistogramToHistogramProto(3, testHistogram.ToFloat(nil))}, }, }, } diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 16691a174..c878c750b 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -619,7 +619,7 @@ func createHistograms(numSamples, numSeries int, floatHistogram bool) ([]record. fh := record.RefFloatHistogramSample{ Ref: chunks.HeadSeriesRef(i), T: int64(j), - FH: hist.ToFloat(), + FH: hist.ToFloat(nil), } floatHistograms = append(floatHistograms, fh) } else { diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index b00fe891a..839009b2a 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -134,7 +134,7 @@ func TestOutOfOrderExemplar(t *testing.T) { func TestOutOfOrderHistogram(t *testing.T) { buf, _, err := buildWriteRequest([]prompb.TimeSeries{{ Labels: []prompb.Label{{Name: "__name__", Value: "test_metric"}}, - Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram), FloatHistogramToHistogramProto(1, testHistogram.ToFloat())}, + Histograms: []prompb.Histogram{HistogramToHistogramProto(0, &testHistogram), FloatHistogramToHistogramProto(1, testHistogram.ToFloat(nil))}, }}, nil, nil, nil) require.NoError(t, err) diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 778fcf708..7858e6b0c 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -656,7 +656,7 @@ func genHistogramSeries(totalSeries, labelCount int, mint, maxt, step int64, flo h.CounterResetHint = histogram.NotCounterReset } if floatHistogram { - return sample{t: ts, fh: h.ToFloat()} + return sample{t: ts, fh: h.ToFloat(nil)} } return sample{t: ts, h: h} }) @@ -692,7 +692,7 @@ func genHistogramAndFloatSeries(totalSeries, labelCount int, mint, maxt, step in h.CounterResetHint = histogram.NotCounterReset } if floatHistogram { - s = sample{t: ts, fh: h.ToFloat()} + s = sample{t: ts, fh: h.ToFloat(nil)} } else { s = sample{t: ts, h: h} } diff --git a/tsdb/chunkenc/float_histogram_test.go b/tsdb/chunkenc/float_histogram_test.go index 05b1fa0a1..6f5a95fb1 100644 --- a/tsdb/chunkenc/float_histogram_test.go +++ b/tsdb/chunkenc/float_histogram_test.go @@ -94,10 +94,10 @@ func TestFloatHistogramChunkSameBuckets(t *testing.T) { }, NegativeBuckets: []int64{2, 1, -1, -1}, // counts: 2, 3, 2, 1 (total 8) } - chk, _, app, err := app.AppendFloatHistogram(nil, ts, h.ToFloat(), false) + chk, _, app, err := app.AppendFloatHistogram(nil, ts, h.ToFloat(nil), false) require.NoError(t, err) require.Nil(t, chk) - exp = append(exp, floatResult{t: ts, h: h.ToFloat()}) + exp = append(exp, floatResult{t: ts, h: h.ToFloat(nil)}) require.Equal(t, 1, c.NumSamples()) // Add an updated histogram. @@ -108,10 +108,10 @@ func TestFloatHistogramChunkSameBuckets(t *testing.T) { h.Sum = 24.4 h.PositiveBuckets = []int64{5, -2, 1, -2} // counts: 5, 3, 4, 2 (total 14) h.NegativeBuckets = []int64{4, -1, 1, -1} // counts: 4, 3, 4, 4 (total 15) - chk, _, _, err = app.AppendFloatHistogram(nil, ts, h.ToFloat(), false) + chk, _, _, err = app.AppendFloatHistogram(nil, ts, h.ToFloat(nil), false) require.NoError(t, err) require.Nil(t, chk) - expH := h.ToFloat() + expH := h.ToFloat(nil) expH.CounterResetHint = histogram.NotCounterReset exp = append(exp, floatResult{t: ts, h: expH}) require.Equal(t, 2, c.NumSamples()) @@ -127,10 +127,10 @@ func TestFloatHistogramChunkSameBuckets(t *testing.T) { h.Sum = 24.4 h.PositiveBuckets = []int64{6, 1, -3, 6} // counts: 6, 7, 4, 10 (total 27) h.NegativeBuckets = []int64{5, 1, -2, 3} // counts: 5, 6, 4, 7 (total 22) - chk, _, _, err = app.AppendFloatHistogram(nil, ts, h.ToFloat(), false) + chk, _, _, err = app.AppendFloatHistogram(nil, ts, h.ToFloat(nil), false) require.NoError(t, err) require.Nil(t, chk) - expH = h.ToFloat() + expH = h.ToFloat(nil) expH.CounterResetHint = histogram.NotCounterReset exp = append(exp, floatResult{t: ts, h: expH}) require.Equal(t, 3, c.NumSamples()) @@ -217,7 +217,7 @@ func TestFloatHistogramChunkBucketChanges(t *testing.T) { NegativeBuckets: []int64{1}, } - chk, _, app, err := app.AppendFloatHistogram(nil, ts1, h1.ToFloat(), false) + chk, _, app, err := app.AppendFloatHistogram(nil, ts1, h1.ToFloat(nil), false) require.NoError(t, err) require.Nil(t, chk) require.Equal(t, 1, c.NumSamples()) @@ -245,13 +245,13 @@ func TestFloatHistogramChunkBucketChanges(t *testing.T) { h2.NegativeBuckets = []int64{2, -1} // 2 1 (total 3) // This is how span changes will be handled. hApp, _ := app.(*FloatHistogramAppender) - posInterjections, negInterjections, ok, cr := hApp.appendable(h2.ToFloat()) + posInterjections, negInterjections, ok, cr := hApp.appendable(h2.ToFloat(nil)) require.NotEmpty(t, posInterjections) require.NotEmpty(t, negInterjections) require.True(t, ok) // Only new buckets came in. require.False(t, cr) c, app = hApp.recode(posInterjections, negInterjections, h2.PositiveSpans, h2.NegativeSpans) - chk, _, _, err = app.AppendFloatHistogram(nil, ts2, h2.ToFloat(), false) + chk, _, _, err = app.AppendFloatHistogram(nil, ts2, h2.ToFloat(nil), false) require.NoError(t, err) require.Nil(t, chk) require.Equal(t, 2, c.NumSamples()) @@ -263,10 +263,10 @@ func TestFloatHistogramChunkBucketChanges(t *testing.T) { h1.PositiveBuckets = []int64{6, -3, -3, 3, -3, 0, 2, 2, 1, -5, 1} h1.NegativeSpans = h2.NegativeSpans h1.NegativeBuckets = []int64{0, 1} - expH2 := h2.ToFloat() + expH2 := h2.ToFloat(nil) expH2.CounterResetHint = histogram.NotCounterReset exp := []floatResult{ - {t: ts1, h: h1.ToFloat()}, + {t: ts1, h: h1.ToFloat(nil)}, {t: ts2, h: expH2}, } it := c.Iterator(nil) diff --git a/tsdb/chunkenc/histogram_test.go b/tsdb/chunkenc/histogram_test.go index 768ef2e6e..53aee89db 100644 --- a/tsdb/chunkenc/histogram_test.go +++ b/tsdb/chunkenc/histogram_test.go @@ -98,7 +98,7 @@ func TestHistogramChunkSameBuckets(t *testing.T) { chk, _, app, err := app.AppendHistogram(nil, ts, h, false) require.NoError(t, err) require.Nil(t, chk) - exp = append(exp, result{t: ts, h: h, fh: h.ToFloat()}) + exp = append(exp, result{t: ts, h: h, fh: h.ToFloat(nil)}) require.Equal(t, 1, c.NumSamples()) // Add an updated histogram. @@ -114,7 +114,7 @@ func TestHistogramChunkSameBuckets(t *testing.T) { require.Nil(t, chk) hExp := h.Copy() hExp.CounterResetHint = histogram.NotCounterReset - exp = append(exp, result{t: ts, h: hExp, fh: hExp.ToFloat()}) + exp = append(exp, result{t: ts, h: hExp, fh: hExp.ToFloat(nil)}) require.Equal(t, 2, c.NumSamples()) // Add update with new appender. @@ -133,7 +133,7 @@ func TestHistogramChunkSameBuckets(t *testing.T) { require.Nil(t, chk) hExp = h.Copy() hExp.CounterResetHint = histogram.NotCounterReset - exp = append(exp, result{t: ts, h: hExp, fh: hExp.ToFloat()}) + exp = append(exp, result{t: ts, h: hExp, fh: hExp.ToFloat(nil)}) require.Equal(t, 3, c.NumSamples()) // 1. Expand iterator in simple case. @@ -278,8 +278,8 @@ func TestHistogramChunkBucketChanges(t *testing.T) { hExp := h2.Copy() hExp.CounterResetHint = histogram.NotCounterReset exp := []result{ - {t: ts1, h: h1, fh: h1.ToFloat()}, - {t: ts2, h: hExp, fh: hExp.ToFloat()}, + {t: ts1, h: h1, fh: h1.ToFloat(nil)}, + {t: ts2, h: hExp, fh: hExp.ToFloat(nil)}, } it := c.Iterator(nil) var act []result diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 3eb997712..75d564139 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -1402,8 +1402,8 @@ func TestHeadCompactionWithHistograms(t *testing.T) { for tsMinute := from; tsMinute <= to; tsMinute++ { var err error if floatTest { - _, err = app.AppendHistogram(0, lbls, minute(tsMinute), nil, h.ToFloat()) - efh := h.ToFloat() + _, err = app.AppendHistogram(0, lbls, minute(tsMinute), nil, h.ToFloat(nil)) + efh := h.ToFloat(nil) if tsMinute == from { efh.CounterResetHint = histogram.UnknownCounterReset } else { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index cc219a20b..4dcdef858 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -519,7 +519,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) { }, PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, } - fh := h.ToFloat() + fh := h.ToFloat(nil) app = db.Appender(ctx) _, err = app.AppendHistogram(0, labels.FromStrings("a", "c"), 0, h.Copy(), nil) @@ -6392,8 +6392,8 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { var err error app := db.Appender(ctx) if floatHistogram { - _, err = app.AppendHistogram(0, lbls, minute(tsMinute), nil, h.ToFloat()) - efh := h.ToFloat() + _, err = app.AppendHistogram(0, lbls, minute(tsMinute), nil, h.ToFloat(nil)) + efh := h.ToFloat(nil) efh.CounterResetHint = expCRH *exp = append(*exp, sample{t: minute(tsMinute), fh: efh}) } else { @@ -6814,20 +6814,20 @@ func TestNativeHistogramFlag(t *testing.T) { // Disabled by default. _, err = app.AppendHistogram(0, l, 100, h, nil) require.Equal(t, storage.ErrNativeHistogramsDisabled, err) - _, err = app.AppendHistogram(0, l, 105, nil, h.ToFloat()) + _, err = app.AppendHistogram(0, l, 105, nil, h.ToFloat(nil)) require.Equal(t, storage.ErrNativeHistogramsDisabled, err) // Enable and append. db.EnableNativeHistograms() _, err = app.AppendHistogram(0, l, 200, h, nil) require.NoError(t, err) - _, err = app.AppendHistogram(0, l, 205, nil, h.ToFloat()) + _, err = app.AppendHistogram(0, l, 205, nil, h.ToFloat(nil)) require.NoError(t, err) db.DisableNativeHistograms() _, err = app.AppendHistogram(0, l, 300, h, nil) require.Equal(t, storage.ErrNativeHistogramsDisabled, err) - _, err = app.AppendHistogram(0, l, 305, nil, h.ToFloat()) + _, err = app.AppendHistogram(0, l, 305, nil, h.ToFloat(nil)) require.Equal(t, storage.ErrNativeHistogramsDisabled, err) require.NoError(t, app.Commit()) @@ -6836,7 +6836,7 @@ func TestNativeHistogramFlag(t *testing.T) { require.NoError(t, err) act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) require.Equal(t, map[string][]chunks.Sample{ - l.String(): {sample{t: 200, h: h}, sample{t: 205, fh: h.ToFloat()}}, + l.String(): {sample{t: 200, h: h}, sample{t: 205, fh: h.ToFloat(nil)}}, }, act) } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 65ed72834..d444e1496 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -4082,8 +4082,8 @@ func testHistogramStaleSampleHelper(t *testing.T, floatHistogram bool) { for _, h := range tsdbutil.GenerateTestHistograms(numHistograms) { var err error if floatHistogram { - _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), nil, h.ToFloat()) - expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), fh: h.ToFloat()}) + _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), nil, h.ToFloat(nil)) + expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), fh: h.ToFloat(nil)}) } else { _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), h, nil) expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), h: h}) @@ -4113,8 +4113,8 @@ func testHistogramStaleSampleHelper(t *testing.T, floatHistogram bool) { for _, h := range tsdbutil.GenerateTestHistograms(2 * numHistograms)[numHistograms:] { var err error if floatHistogram { - _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), nil, h.ToFloat()) - expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), fh: h.ToFloat()}) + _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), nil, h.ToFloat(nil)) + expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), fh: h.ToFloat(nil)}) } else { _, err = app.AppendHistogram(0, l, 100*int64(len(expHistograms)), h, nil) expHistograms = append(expHistograms, timedHistogram{t: 100 * int64(len(expHistograms)), h: h}) @@ -4160,7 +4160,7 @@ func TestHistogramCounterResetHeader(t *testing.T) { app := head.Appender(context.Background()) var err error if floatHisto { - _, err = app.AppendHistogram(0, l, ts, nil, h.ToFloat()) + _, err = app.AppendHistogram(0, l, ts, nil, h.ToFloat(nil)) } else { _, err = app.AppendHistogram(0, l, ts, h.Copy(), nil) } diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index 544fa5af2..57599ef6d 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -158,7 +158,7 @@ func TestRecord_EncodeDecode(t *testing.T) { floatHistograms[i] = RefFloatHistogramSample{ Ref: h.Ref, T: h.T, - FH: h.H.ToFloat(), + FH: h.H.ToFloat(nil), } } decFloatHistograms, err := dec.FloatHistogramSamples(enc.FloatHistogramSamples(floatHistograms, nil), nil) diff --git a/tsdb/wlog/watcher_test.go b/tsdb/wlog/watcher_test.go index fc665b57d..b30dce91a 100644 --- a/tsdb/wlog/watcher_test.go +++ b/tsdb/wlog/watcher_test.go @@ -199,7 +199,7 @@ func TestTailSamples(t *testing.T) { floatHistogram := enc.FloatHistogramSamples([]record.RefFloatHistogramSample{{ Ref: chunks.HeadSeriesRef(inner), T: now.UnixNano() + 1, - FH: hist.ToFloat(), + FH: hist.ToFloat(nil), }}, nil) require.NoError(t, w.Log(floatHistogram)) } diff --git a/web/federate.go b/web/federate.go index 2b79d0053..2e7bac21d 100644 --- a/web/federate.go +++ b/web/federate.go @@ -138,7 +138,7 @@ Loop: case chunkenc.ValFloat: f = sample.F() case chunkenc.ValHistogram: - fh = sample.H().ToFloat() + fh = sample.H().ToFloat(nil) case chunkenc.ValFloatHistogram: fh = sample.FH() default: diff --git a/web/federate_test.go b/web/federate_test.go index 80539861d..94783a739 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -354,7 +354,7 @@ func TestFederationWithNativeHistograms(t *testing.T) { _, err = app.AppendHistogram(0, l, 100*60*1000, histWithoutZeroBucket.Copy(), nil) expVec = append(expVec, promql.Sample{ T: 100 * 60 * 1000, - H: histWithoutZeroBucket.ToFloat(), + H: histWithoutZeroBucket.ToFloat(nil), Metric: expL, }) default: @@ -363,7 +363,7 @@ func TestFederationWithNativeHistograms(t *testing.T) { _, err = app.AppendHistogram(0, l, 100*60*1000, hist.Copy(), nil) expVec = append(expVec, promql.Sample{ T: 100 * 60 * 1000, - H: hist.ToFloat(), + H: hist.ToFloat(nil), Metric: expL, }) } From 9008271df57d4392d3e424feba80b394267428ef Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 8 Dec 2023 10:12:50 +0100 Subject: [PATCH 14/52] Use cap to determine slice capacity Signed-off-by: Filip Petkovski --- model/histogram/histogram.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index 1fe342482..f4d292b34 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -321,7 +321,7 @@ func (h *Histogram) ToFloat(fh *FloatHistogram) *FloatHistogram { } func resize[T any](items []T, n int) []T { - if len(items) < n { + if cap(items) < n { return make([]T, n) } return items[:n] From acbaee8c9d7d780b6bf67128e435d049d0ea685c Mon Sep 17 00:00:00 2001 From: SuperQ Date: Fri, 8 Dec 2023 11:01:38 +0100 Subject: [PATCH 15/52] Sync golangci-lint version Update `scripts/golangci-lint.yml` golangci-lint version to match main workflow. * Add note to keep things in sync. Signed-off-by: SuperQ --- .github/workflows/ci.yml | 1 + scripts/golangci-lint.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22d3f8ad5..8ba154e25 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,6 +151,7 @@ jobs: uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0 with: args: --verbose + # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. version: v1.55.2 fuzzing: uses: ./.github/workflows/fuzzing.yml diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index ffa6b3090..805c59fb7 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -29,4 +29,4 @@ jobs: - name: Lint uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0 with: - version: v1.54.2 + version: v1.55.2 From ee700151a3bc901bdf52b184dee60f477f1d6ea8 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 3 Jan 2023 19:11:00 +0000 Subject: [PATCH 16/52] tsdb/index: add benchmark for Postings.Merge Signed-off-by: Bryan Boreham --- tsdb/index/postings_test.go | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index c20b4506f..e8df6dbd2 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -380,6 +380,38 @@ func BenchmarkIntersect(t *testing.B) { }) } +func BenchmarkMerge(t *testing.B) { + var lps []*ListPostings + var refs [][]storage.SeriesRef + + // Create 100000 matchers(k=100000), making sure all memory allocation is done before starting the loop. + for i := 0; i < 100000; i++ { + var temp []storage.SeriesRef + for j := 1; j < 100; j++ { + temp = append(temp, storage.SeriesRef(i+j*100000)) + } + lps = append(lps, newListPostings(temp...)) + refs = append(refs, temp) + } + + its := make([]Postings, len(refs)) + for _, nSeries := range []int{1, 10, 100, 1000, 10000, 100000} { + t.Run(fmt.Sprint(nSeries), func(bench *testing.B) { + ctx := context.Background() + for i := 0; i < bench.N; i++ { + // Reset the ListPostings to their original values each time round the loop. + for j := range refs[:nSeries] { + lps[j].list = refs[j] + its[j] = lps[j] + } + if err := consumePostings(Merge(ctx, its[:nSeries]...)); err != nil { + bench.Fatal(err) + } + } + }) + } +} + func TestMultiMerge(t *testing.T) { i1 := newListPostings(1, 2, 3, 4, 5, 6, 1000, 1001) i2 := newListPostings(2, 4, 5, 6, 7, 8, 999, 1001) @@ -481,7 +513,7 @@ func TestMergedPostings(t *testing.T) { m := Merge(ctx, c.in...) if c.res == EmptyPostings() { - require.Equal(t, EmptyPostings(), m) + require.False(t, m.Next()) return } From ab3a47b48905fb2dd91ea011480521b5f3671f95 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 4 Sep 2023 15:23:30 +0100 Subject: [PATCH 17/52] postings: use Loser Tree for merge It's faster. Note change to test - instead of requiring that the data structure is identical to `EmptyPostings()`, check that calling `Next()` returns false, which implies it was empty. Also the check for context cancellation during initialization was removed. Initialization should be a small portion of the work done during merge, so it's not worth plumbing a context argument through. Signed-off-by: Bryan Boreham --- go.mod | 1 + go.sum | 2 + tsdb/index/postings.go | 125 ++++++++++------------------------------- 3 files changed, 32 insertions(+), 96 deletions(-) diff --git a/go.mod b/go.mod index 95cc7b8af..f022f09e9 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/alecthomas/kingpin/v2 v2.4.0 github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 github.com/aws/aws-sdk-go v1.48.14 + github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 github.com/cespare/xxhash/v2 v2.2.0 github.com/dennwc/varint v1.0.0 github.com/digitalocean/godo v1.106.0 diff --git a/go.sum b/go.sum index d29694fb9..ae367bb90 100644 --- a/go.sum +++ b/go.sum @@ -94,6 +94,8 @@ github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2z github.com/aws/aws-sdk-go v1.48.14 h1:nVLrp+F84SG+xGiFMfe1TE6ZV6smF+42tuuNgYGV30s= github.com/aws/aws-sdk-go v1.48.14/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= +github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= +github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index c83957427..222a8b0d6 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -18,11 +18,13 @@ import ( "context" "encoding/binary" "fmt" + "math" "runtime" "sort" "strings" "sync" + "github.com/bboreham/go-loser" "golang.org/x/exp/slices" "github.com/prometheus/prometheus/model/labels" @@ -525,7 +527,7 @@ func (it *intersectPostings) Err() error { } // Merge returns a new iterator over the union of the input iterators. -func Merge(ctx context.Context, its ...Postings) Postings { +func Merge(_ context.Context, its ...Postings) Postings { if len(its) == 0 { return EmptyPostings() } @@ -533,122 +535,48 @@ func Merge(ctx context.Context, its ...Postings) Postings { return its[0] } - p, ok := newMergedPostings(ctx, its) + p, ok := newMergedPostings(its) if !ok { return EmptyPostings() } return p } -type postingsHeap []Postings - -func (h postingsHeap) Len() int { return len(h) } -func (h postingsHeap) Less(i, j int) bool { return h[i].At() < h[j].At() } -func (h *postingsHeap) Swap(i, j int) { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] } - -func (h *postingsHeap) Push(x interface{}) { - *h = append(*h, x.(Postings)) -} - -func (h *postingsHeap) Pop() interface{} { - old := *h - n := len(old) - x := old[n-1] - *h = old[0 : n-1] - return x -} - type mergedPostings struct { - h postingsHeap - initialized bool - cur storage.SeriesRef - err error + p []Postings + h *loser.Tree[storage.SeriesRef, Postings] + cur storage.SeriesRef } -func newMergedPostings(ctx context.Context, p []Postings) (m *mergedPostings, nonEmpty bool) { - ph := make(postingsHeap, 0, len(p)) - - for _, it := range p { - // NOTE: mergedPostings struct requires the user to issue an initial Next. - switch { - case ctx.Err() != nil: - return &mergedPostings{err: ctx.Err()}, true - case it.Next(): - ph = append(ph, it) - case it.Err() != nil: - return &mergedPostings{err: it.Err()}, true - } - } - - if len(ph) == 0 { - return nil, false - } - return &mergedPostings{h: ph}, true +func newMergedPostings(p []Postings) (m *mergedPostings, nonEmpty bool) { + const maxVal = storage.SeriesRef(math.MaxUint64) // This value must be higher than all real values used in the tree. + lt := loser.New(p, maxVal) + return &mergedPostings{p: p, h: lt}, true } func (it *mergedPostings) Next() bool { - if it.h.Len() == 0 || it.err != nil { - return false - } - - // The user must issue an initial Next. - if !it.initialized { - heap.Init(&it.h) - it.cur = it.h[0].At() - it.initialized = true - return true - } - for { - cur := it.h[0] - if !cur.Next() { - heap.Pop(&it.h) - if cur.Err() != nil { - it.err = cur.Err() - return false - } - if it.h.Len() == 0 { - return false - } - } else { - // Value of top of heap has changed, re-heapify. - heap.Fix(&it.h, 0) + if !it.h.Next() { + return false } - - if it.h[0].At() != it.cur { - it.cur = it.h[0].At() + // Remove duplicate entries. + newItem := it.h.At() + if newItem != it.cur { + it.cur = newItem return true } } } func (it *mergedPostings) Seek(id storage.SeriesRef) bool { - if it.h.Len() == 0 || it.err != nil { + for !it.h.IsEmpty() && it.h.At() < id { + finished := !it.h.Winner().Seek(id) + it.h.Fix(finished) + } + if it.h.IsEmpty() { return false } - if !it.initialized { - if !it.Next() { - return false - } - } - for it.cur < id { - cur := it.h[0] - if !cur.Seek(id) { - heap.Pop(&it.h) - if cur.Err() != nil { - it.err = cur.Err() - return false - } - if it.h.Len() == 0 { - return false - } - } else { - // Value of top of heap has changed, re-heapify. - heap.Fix(&it.h, 0) - } - - it.cur = it.h[0].At() - } + it.cur = it.h.At() return true } @@ -657,7 +585,12 @@ func (it mergedPostings) At() storage.SeriesRef { } func (it mergedPostings) Err() error { - return it.err + for _, p := range it.p { + if err := p.Err(); err != nil { + return err + } + } + return nil } // Without returns a new postings list that contains all elements from the full list that From e2a9f8ac0f2d2b0c1565241ca46b4d3b6dc22007 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Sun, 10 Dec 2023 21:07:19 +0100 Subject: [PATCH 18/52] Reuse float histogram objects This commit reduces the memory needed to query native histogram objects by reusing existing HPoint instances. Signed-off-by: Filip Petkovski --- promql/engine.go | 33 ++++++++++++++++++++++----------- storage/buffer.go | 8 ++++---- storage/buffer_test.go | 4 ++-- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 4fa2a513b..8b4987921 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2078,21 +2078,27 @@ loop: case chunkenc.ValNone: break loop case chunkenc.ValFloatHistogram, chunkenc.ValHistogram: - t, h := buf.AtFloatHistogram() - if value.IsStaleNaN(h.Sum) { - continue loop - } + t := buf.AtT() // Values in the buffer are guaranteed to be smaller than maxt. if t >= mintHistograms { - if ev.currentSamples >= ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - point := HPoint{T: t, H: h} if histograms == nil { histograms = getHPointSlice(16) } - histograms = append(histograms, point) - ev.currentSamples += point.size() + n := len(histograms) + if n < cap(histograms) { + histograms = histograms[:n+1] + } else { + histograms = append(histograms, HPoint{}) + } + histograms[n].T, histograms[n].H = buf.AtFloatHistogram(histograms[n].H) + if value.IsStaleNaN(histograms[n].H.Sum) { + histograms = histograms[:n] + continue loop + } + if ev.currentSamples >= ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + ev.currentSamples += histograms[n].size() } case chunkenc.ValFloat: t, f := buf.At() @@ -2108,7 +2114,12 @@ loop: if floats == nil { floats = getFPointSlice(16) } - floats = append(floats, FPoint{T: t, F: f}) + if n := len(floats); n < cap(floats) { + floats = floats[:n+1] + floats[n].T, floats[n].F = t, f + } else { + floats = append(floats, FPoint{T: t, F: f}) + } } } } diff --git a/storage/buffer.go b/storage/buffer.go index f686796ca..49dfbd299 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -74,7 +74,7 @@ func (b *BufferedSeriesIterator) PeekBack(n int) (sample chunks.Sample, ok bool) // Buffer returns an iterator over the buffered data. Invalidates previously // returned iterators. -func (b *BufferedSeriesIterator) Buffer() chunkenc.Iterator { +func (b *BufferedSeriesIterator) Buffer() *sampleRingIterator { return b.buf.iterator() } @@ -304,7 +304,7 @@ func (r *sampleRing) reset() { } // Returns the current iterator. Invalidates previously returned iterators. -func (r *sampleRing) iterator() chunkenc.Iterator { +func (r *sampleRing) iterator() *sampleRingIterator { r.it.r = r r.it.i = -1 return &r.it @@ -374,9 +374,9 @@ func (it *sampleRingIterator) AtHistogram() (int64, *histogram.Histogram) { return it.t, it.h } -func (it *sampleRingIterator) AtFloatHistogram() (int64, *histogram.FloatHistogram) { +func (it *sampleRingIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { if it.fh == nil { - return it.t, it.h.ToFloat(nil) + return it.t, it.h.ToFloat(fh) } return it.t, it.fh } diff --git a/storage/buffer_test.go b/storage/buffer_test.go index 84f94a008..12e6ff0f0 100644 --- a/storage/buffer_test.go +++ b/storage/buffer_test.go @@ -243,11 +243,11 @@ func TestBufferedSeriesIteratorMixedHistograms(t *testing.T) { buf := it.Buffer() require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) - _, fh := buf.AtFloatHistogram() + _, fh := buf.AtFloatHistogram(nil) require.Equal(t, histograms[0].ToFloat(nil), fh) require.Equal(t, chunkenc.ValHistogram, buf.Next()) - _, fh = buf.AtFloatHistogram() + _, fh = buf.AtFloatHistogram(nil) require.Equal(t, histograms[1].ToFloat(nil), fh) } From 5082655392fe2eb036f24da51b54ca050496368a Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Mon, 11 Dec 2023 05:43:42 -0300 Subject: [PATCH 19/52] Append Created Timestamps (#12733) * Append created timestamps. Signed-off-by: Arthur Silva Sens * Log when created timestamps are ignored Signed-off-by: Arthur Silva Sens * Proposed changes to Append CT PR. Changes: * Changed textparse Parser interface for consistency and robustness. * Changed CT interface to be more explicit and handle validation. * Simplified test, change scrapeManager to allow testability. * Added TODOs. Signed-off-by: bwplotka * Updates. Signed-off-by: bwplotka * Addressed comments. Signed-off-by: bwplotka * Refactor head_appender test Signed-off-by: Arthur Silva Sens * Fix linter issues Signed-off-by: Arthur Silva Sens * Use model.Sample in head appender test Signed-off-by: Arthur Silva Sens --------- Signed-off-by: Arthur Silva Sens Signed-off-by: bwplotka Co-authored-by: bwplotka --- cmd/prometheus/main.go | 14 ++- config/config.go | 9 +- model/textparse/interface.go | 10 +- model/textparse/openmetricsparse.go | 9 +- model/textparse/promparse.go | 9 +- model/textparse/protobufparse.go | 22 ++-- model/textparse/protobufparse_test.go | 34 +++--- scrape/helpers_test.go | 57 ++++++++++ scrape/manager.go | 6 + scrape/manager_test.go | 153 ++++++++++++++++++++++++++ scrape/scrape.go | 39 +++++-- scrape/scrape_test.go | 8 +- storage/fanout.go | 14 +++ storage/interface.go | 26 +++++ storage/remote/write.go | 5 + storage/remote/write_handler_test.go | 5 + tsdb/agent/db.go | 5 + tsdb/head.go | 4 + tsdb/head_append.go | 95 +++++++++++++--- tsdb/head_test.go | 91 +++++++++++++++ util/runutil/runutil.go | 37 +++++++ 21 files changed, 578 insertions(+), 74 deletions(-) create mode 100644 util/runutil/runutil.go diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index dfafe66c6..106f9d05c 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -206,9 +206,15 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "native-histograms": c.tsdb.EnableNativeHistograms = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. - config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols - config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols + config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + case "created-timestamp-zero-ingestion": + c.scrape.EnableCreatedTimestampZeroIngestion = true + // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. + config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "": continue case "promql-at-modifier", "promql-negative-offset": @@ -1449,6 +1455,10 @@ func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, return 0, tsdb.ErrNotReady } +func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return 0, tsdb.ErrNotReady +} + func (n notReadyAppender) Commit() error { return tsdb.ErrNotReady } func (n notReadyAppender) Rollback() error { return tsdb.ErrNotReady } diff --git a/config/config.go b/config/config.go index b832ac9a1..ddcca84dc 100644 --- a/config/config.go +++ b/config/config.go @@ -454,12 +454,19 @@ var ( OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } + // DefaultScrapeProtocols is the set of scrape protocols that will be proposed + // to scrape target, ordered by priority. DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, PrometheusText0_0_4, } - DefaultNativeHistogramScrapeProtocols = []ScrapeProtocol{ + + // DefaultProtoFirstScrapeProtocols is like DefaultScrapeProtocols, but it + // favors protobuf Prometheus exposition format. + // Used by default for certain feature-flags like + // "native-histograms" and "created-timestamp-zero-ingestion". + DefaultProtoFirstScrapeProtocols = []ScrapeProtocol{ PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 2f5fdbc3b..df4259c85 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -16,8 +16,6 @@ package textparse import ( "mime" - "github.com/gogo/protobuf/types" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -66,10 +64,10 @@ type Parser interface { // retrieved (including the case where no exemplars exist at all). Exemplar(l *exemplar.Exemplar) bool - // CreatedTimestamp writes the created timestamp of the current sample - // into the passed timestamp. It returns false if no created timestamp - // exists or if the metric type does not support created timestamps. - CreatedTimestamp(ct *types.Timestamp) bool + // CreatedTimestamp returns the created timestamp (in milliseconds) for the + // current sample. It returns nil if it is unknown e.g. if it wasn't set, + // if the scrape protocol or metric type does not support created timestamps. + CreatedTimestamp() *int64 // Next advances the parser to the next sample. It returns false if no // more samples were read or an error occurred. diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index bb5075544..f0c383723 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -24,8 +24,6 @@ import ( "strings" "unicode/utf8" - "github.com/gogo/protobuf/types" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -213,9 +211,10 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { return true } -// CreatedTimestamp returns false because OpenMetricsParser does not support created timestamps (yet). -func (p *OpenMetricsParser) CreatedTimestamp(_ *types.Timestamp) bool { - return false +// CreatedTimestamp returns nil as it's not implemented yet. +// TODO(bwplotka): https://github.com/prometheus/prometheus/issues/12980 +func (p *OpenMetricsParser) CreatedTimestamp() *int64 { + return nil } // nextToken returns the next token from the openMetricsLexer. diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index b3fa2d8a6..935801fb9 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -26,8 +26,6 @@ import ( "unicode/utf8" "unsafe" - "github.com/gogo/protobuf/types" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -247,9 +245,10 @@ func (p *PromParser) Exemplar(*exemplar.Exemplar) bool { return false } -// CreatedTimestamp returns false because PromParser does not support created timestamps. -func (p *PromParser) CreatedTimestamp(_ *types.Timestamp) bool { - return false +// CreatedTimestamp returns nil as it's not implemented yet. +// TODO(bwplotka): https://github.com/prometheus/prometheus/issues/12980 +func (p *PromParser) CreatedTimestamp() *int64 { + return nil } // nextToken returns the next token from the promlexer. It skips over tabs diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index 23afb5c59..baede7e1d 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -360,22 +360,26 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool { return true } -func (p *ProtobufParser) CreatedTimestamp(ct *types.Timestamp) bool { - var foundCT *types.Timestamp +// CreatedTimestamp returns CT or nil if CT is not present or +// invalid (as timestamp e.g. negative value) on counters, summaries or histograms. +func (p *ProtobufParser) CreatedTimestamp() *int64 { + var ct *types.Timestamp switch p.mf.GetType() { case dto.MetricType_COUNTER: - foundCT = p.mf.GetMetric()[p.metricPos].GetCounter().GetCreatedTimestamp() + ct = p.mf.GetMetric()[p.metricPos].GetCounter().GetCreatedTimestamp() case dto.MetricType_SUMMARY: - foundCT = p.mf.GetMetric()[p.metricPos].GetSummary().GetCreatedTimestamp() + ct = p.mf.GetMetric()[p.metricPos].GetSummary().GetCreatedTimestamp() case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: - foundCT = p.mf.GetMetric()[p.metricPos].GetHistogram().GetCreatedTimestamp() + ct = p.mf.GetMetric()[p.metricPos].GetHistogram().GetCreatedTimestamp() default: } - if foundCT == nil { - return false + ctAsTime, err := types.TimestampFromProto(ct) + if err != nil { + // Errors means ct == nil or invalid timestamp, which we silently ignore. + return nil } - *ct = *foundCT - return true + ctMilis := ctAsTime.UnixMilli() + return &ctMilis } // Next advances the parser to the next "sample" (emulating the behavior of a diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index e062e64dd..c5b672dbc 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -21,7 +21,6 @@ import ( "testing" "github.com/gogo/protobuf/proto" - "github.com/gogo/protobuf/types" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" @@ -630,7 +629,7 @@ func TestProtobufParse(t *testing.T) { shs *histogram.Histogram fhs *histogram.FloatHistogram e []exemplar.Exemplar - ct *types.Timestamp + ct int64 } inputBuf := createTestProtoBuf(t) @@ -1069,7 +1068,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_counter_with_createdtimestamp", v: 42, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), @@ -1085,7 +1084,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), @@ -1093,7 +1092,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), @@ -1108,7 +1107,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_createdtimestamp", - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -1128,7 +1127,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gaugehistogram_with_createdtimestamp", - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -1887,7 +1886,7 @@ func TestProtobufParse(t *testing.T) { { // 83 m: "test_counter_with_createdtimestamp", v: 42, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), @@ -1903,7 +1902,7 @@ func TestProtobufParse(t *testing.T) { { // 86 m: "test_summary_with_createdtimestamp_count", v: 42, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), @@ -1911,7 +1910,7 @@ func TestProtobufParse(t *testing.T) { { // 87 m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), @@ -1926,7 +1925,7 @@ func TestProtobufParse(t *testing.T) { }, { // 90 m: "test_histogram_with_createdtimestamp", - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -1946,7 +1945,7 @@ func TestProtobufParse(t *testing.T) { }, { // 93 m: "test_gaugehistogram_with_createdtimestamp", - ct: &types.Timestamp{Seconds: 1, Nanos: 1}, + ct: 1000, shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -1981,10 +1980,9 @@ func TestProtobufParse(t *testing.T) { m, ts, v := p.Series() var e exemplar.Exemplar - var ct types.Timestamp p.Metric(&res) eFound := p.Exemplar(&e) - ctFound := p.CreatedTimestamp(&ct) + ct := p.CreatedTimestamp() require.Equal(t, exp[i].m, string(m), "i: %d", i) if ts != nil { require.Equal(t, exp[i].t, *ts, "i: %d", i) @@ -2000,11 +1998,11 @@ func TestProtobufParse(t *testing.T) { require.Equal(t, exp[i].e[0], e, "i: %d", i) require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i) } - if exp[i].ct != nil { - require.True(t, ctFound, "i: %d", i) - require.Equal(t, exp[i].ct.String(), ct.String(), "i: %d", i) + if exp[i].ct != 0 { + require.NotNilf(t, ct, "i: %d", i) + require.Equal(t, exp[i].ct, *ct, "i: %d", i) } else { - require.False(t, ctFound, "i: %d", i) + require.Nilf(t, ct, "i: %d", i) } case EntryHistogram: diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index c580a5051..43ee0fcec 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -14,10 +14,18 @@ package scrape import ( + "bytes" "context" + "encoding/binary" "fmt" "math/rand" "strings" + "sync" + "testing" + + "github.com/gogo/protobuf/proto" + dto "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -50,6 +58,10 @@ func (a nopAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.M return 0, nil } +func (a nopAppender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { + return 0, nil +} + func (a nopAppender) Commit() error { return nil } func (a nopAppender) Rollback() error { return nil } @@ -65,9 +77,19 @@ type histogramSample struct { fh *histogram.FloatHistogram } +type collectResultAppendable struct { + *collectResultAppender +} + +func (a *collectResultAppendable) Appender(_ context.Context) storage.Appender { + return a +} + // collectResultAppender records all samples that were added through the appender. // It can be used as its zero value or be backed by another appender it writes samples through. type collectResultAppender struct { + mtx sync.Mutex + next storage.Appender resultFloats []floatSample pendingFloats []floatSample @@ -82,6 +104,8 @@ type collectResultAppender struct { } func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() a.pendingFloats = append(a.pendingFloats, floatSample{ metric: lset, t: t, @@ -103,6 +127,8 @@ func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels } func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() a.pendingExemplars = append(a.pendingExemplars, e) if a.next == nil { return 0, nil @@ -112,6 +138,8 @@ func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.L } func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) if a.next == nil { return 0, nil @@ -121,6 +149,8 @@ func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels. } func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() a.pendingMetadata = append(a.pendingMetadata, m) if ref == 0 { ref = storage.SeriesRef(rand.Uint64()) @@ -132,7 +162,13 @@ func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.L return a.next.UpdateMetadata(ref, l, m) } +func (a *collectResultAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return a.Append(ref, l, ct, 0.0) +} + func (a *collectResultAppender) Commit() error { + a.mtx.Lock() + defer a.mtx.Unlock() a.resultFloats = append(a.resultFloats, a.pendingFloats...) a.resultExemplars = append(a.resultExemplars, a.pendingExemplars...) a.resultHistograms = append(a.resultHistograms, a.pendingHistograms...) @@ -148,6 +184,8 @@ func (a *collectResultAppender) Commit() error { } func (a *collectResultAppender) Rollback() error { + a.mtx.Lock() + defer a.mtx.Unlock() a.rolledbackFloats = a.pendingFloats a.rolledbackHistograms = a.pendingHistograms a.pendingFloats = nil @@ -171,3 +209,22 @@ func (a *collectResultAppender) String() string { } return sb.String() } + +// protoMarshalDelimited marshals a MetricFamily into a delimited +// Prometheus proto exposition format bytes (known as 'encoding=delimited`) +// +// See also https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers +func protoMarshalDelimited(t *testing.T, mf *dto.MetricFamily) []byte { + t.Helper() + + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + buf := &bytes.Buffer{} + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + return buf.Bytes() +} diff --git a/scrape/manager.go b/scrape/manager.go index 3b70e48a1..faa46f54d 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -78,9 +78,15 @@ type Options struct { EnableMetadataStorage bool // Option to increase the interval used by scrape manager to throttle target groups updates. DiscoveryReloadInterval model.Duration + // Option to enable the ingestion of the created timestamp as a synthetic zero sample. + // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md + EnableCreatedTimestampZeroIngestion bool // Optional HTTP client options to use when scraping. HTTPClientOptions []config_util.HTTPClientOption + + // private option for testability. + skipOffsetting bool } // Manager maintains a set of scrape pools and manages start/stop cycles diff --git a/scrape/manager_test.go b/scrape/manager_test.go index a689c469d..524424269 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -15,14 +15,23 @@ package scrape import ( "context" + "fmt" "net/http" + "net/http/httptest" + "net/url" + "os" "strconv" + "sync" "testing" "time" + "github.com/go-kit/log" + "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" @@ -30,6 +39,7 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/util/runutil" ) func TestPopulateLabels(t *testing.T) { @@ -714,3 +724,146 @@ scrape_configs: reload(scrapeManager, cfg2) require.ElementsMatch(t, []string{"job1", "job3"}, scrapeManager.ScrapePools()) } + +// TestManagerCTZeroIngestion tests scrape manager for CT cases. +func TestManagerCTZeroIngestion(t *testing.T) { + const mName = "expected_counter" + + for _, tc := range []struct { + name string + counterSample *dto.Counter + enableCTZeroIngestion bool + + expectedValues []float64 + }{ + { + name: "disabled with CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + // Timestamp does not matter as long as it exists in this test. + CreatedTimestamp: timestamppb.Now(), + }, + expectedValues: []float64{1.0}, + }, + { + name: "enabled with CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + // Timestamp does not matter as long as it exists in this test. + CreatedTimestamp: timestamppb.Now(), + }, + enableCTZeroIngestion: true, + expectedValues: []float64{0.0, 1.0}, + }, + { + name: "enabled without CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + }, + enableCTZeroIngestion: true, + expectedValues: []float64{1.0}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + app := &collectResultAppender{} + scrapeManager, err := NewManager( + &Options{ + EnableCreatedTimestampZeroIngestion: tc.enableCTZeroIngestion, + skipOffsetting: true, + }, + log.NewLogfmtLogger(os.Stderr), + &collectResultAppendable{app}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + require.NoError(t, scrapeManager.ApplyConfig(&config.Config{ + GlobalConfig: config.GlobalConfig{ + // Disable regular scrapes. + ScrapeInterval: model.Duration(9999 * time.Minute), + ScrapeTimeout: model.Duration(5 * time.Second), + // Ensure the proto is chosen. We need proto as it's the only protocol + // with the CT parsing support. + ScrapeProtocols: []config.ScrapeProtocol{config.PrometheusProto}, + }, + ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test"}}, + })) + + once := sync.Once{} + // Start fake HTTP target to that allow one scrape only. + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fail := true + once.Do(func() { + fail = false + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + + ctrType := dto.MetricType_COUNTER + w.Write(protoMarshalDelimited(t, &dto.MetricFamily{ + Name: proto.String(mName), + Type: &ctrType, + Metric: []*dto.Metric{{Counter: tc.counterSample}}, + })) + }) + + if fail { + w.WriteHeader(http.StatusInternalServerError) + } + }), + ) + defer server.Close() + + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + + // Add fake target directly into tsets + reload. Normally users would use + // Manager.Run and wait for minimum 5s refresh interval. + scrapeManager.updateTsets(map[string][]*targetgroup.Group{ + "test": {{ + Targets: []model.LabelSet{{ + model.SchemeLabel: model.LabelValue(serverURL.Scheme), + model.AddressLabel: model.LabelValue(serverURL.Host), + }}, + }}, + }) + scrapeManager.reload() + + // Wait for one scrape. + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { + if countFloatSamples(app, mName) != len(tc.expectedValues) { + return fmt.Errorf("expected %v samples", tc.expectedValues) + } + return nil + }), "after 1 minute") + scrapeManager.Stop() + + require.Equal(t, tc.expectedValues, getResultFloats(app, mName)) + }) + } +} + +func countFloatSamples(a *collectResultAppender, expectedMetricName string) (count int) { + a.mtx.Lock() + defer a.mtx.Unlock() + + for _, f := range a.resultFloats { + if f.metric.Get(model.MetricNameLabel) == expectedMetricName { + count++ + } + } + return count +} + +func getResultFloats(app *collectResultAppender, expectedMetricName string) (result []float64) { + app.mtx.Lock() + defer app.mtx.Unlock() + + for _, f := range app.resultFloats { + if f.metric.Get(model.MetricNameLabel) == expectedMetricName { + result = append(result, f.f) + } + } + return result +} diff --git a/scrape/scrape.go b/scrape/scrape.go index 9a0ba1d00..be27a5d48 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -106,9 +106,10 @@ type scrapeLoopOptions struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool - mrc []*relabel.Config - cache *scrapeCache - enableCompression bool + + mrc []*relabel.Config + cache *scrapeCache + enableCompression bool } const maxAheadTime = 10 * time.Minute @@ -168,11 +169,13 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.interval, opts.timeout, opts.scrapeClassicHistograms, + options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, options.EnableMetadataStorage, opts.target, options.PassMetadataInContext, metrics, + options.skipOffsetting, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -787,6 +790,7 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool + enableCTZeroIngestion bool appender func(ctx context.Context) storage.Appender sampleMutator labelsMutator @@ -804,6 +808,8 @@ type scrapeLoop struct { appendMetadataToWAL bool metrics *scrapeMetrics + + skipOffsetting bool // For testability. } // scrapeCache tracks mappings of exposed metric strings to label sets and @@ -1076,11 +1082,13 @@ func newScrapeLoop(ctx context.Context, interval time.Duration, timeout time.Duration, scrapeClassicHistograms bool, + enableCTZeroIngestion bool, reportExtraMetrics bool, appendMetadataToWAL bool, target *Target, passMetadataInContext bool, metrics *scrapeMetrics, + skipOffsetting bool, ) *scrapeLoop { if l == nil { l = log.NewNopLogger() @@ -1124,9 +1132,11 @@ func newScrapeLoop(ctx context.Context, interval: interval, timeout: timeout, scrapeClassicHistograms: scrapeClassicHistograms, + enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, appendMetadataToWAL: appendMetadataToWAL, metrics: metrics, + skipOffsetting: skipOffsetting, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1134,12 +1144,14 @@ func newScrapeLoop(ctx context.Context, } func (sl *scrapeLoop) run(errc chan<- error) { - select { - case <-time.After(sl.scraper.offset(sl.interval, sl.offsetSeed)): - // Continue after a scraping offset. - case <-sl.ctx.Done(): - close(sl.stopped) - return + if !sl.skipOffsetting { + select { + case <-time.After(sl.scraper.offset(sl.interval, sl.offsetSeed)): + // Continue after a scraping offset. + case <-sl.ctx.Done(): + close(sl.stopped) + return + } } var last time.Time @@ -1557,6 +1569,15 @@ loop: updateMetadata(lset, true) } + if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. + // CT is an experimental feature. For now, we don't need to fail the + // scrape on errors updating the created timestamp, log debug. + level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + } + } + if isHistogram { if h != nil { ref, err = app.AppendHistogram(ref, lset, t, h, nil) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 8dee1f2c7..90578f2e9 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -660,9 +660,11 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), + false, ) } @@ -801,9 +803,11 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, + false, ) // The loop must terminate during the initial offset if the context @@ -945,9 +949,11 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, + false, ) defer cancel() @@ -2377,7 +2383,7 @@ func TestTargetScraperScrapeOK(t *testing.T) { runTest(acceptHeader(config.DefaultScrapeProtocols)) protobufParsing = true - runTest(acceptHeader(config.DefaultNativeHistogramScrapeProtocols)) + runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols)) } func TestTargetScrapeScrapeCancel(t *testing.T) { diff --git a/storage/fanout.go b/storage/fanout.go index 33257046f..a9a3f904b 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -202,6 +202,20 @@ func (f *fanoutAppender) UpdateMetadata(ref SeriesRef, l labels.Labels, m metada return ref, nil } +func (f *fanoutAppender) AppendCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64) (SeriesRef, error) { + ref, err := f.primary.AppendCTZeroSample(ref, l, t, ct) + if err != nil { + return ref, err + } + + for _, appender := range f.secondaries { + if _, err := appender.AppendCTZeroSample(ref, l, t, ct); err != nil { + return 0, err + } + } + return ref, nil +} + func (f *fanoutAppender) Commit() (err error) { err = f.primary.Commit() diff --git a/storage/interface.go b/storage/interface.go index 2b1b6a63e..675e44c0e 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -43,6 +43,13 @@ var ( ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") + + // ErrOutOfOrderCT indicates failed append of CT to the storage + // due to CT being older the then newer sample. + // NOTE(bwplotka): This can be both an instrumentation failure or commonly expected + // behaviour, and we currently don't have a way to determine this. As a result + // it's recommended to ignore this error for now. + ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") ) // SeriesRef is a generic series reference. In prometheus it is either a @@ -237,6 +244,7 @@ type Appender interface { ExemplarAppender HistogramAppender MetadataUpdater + CreatedTimestampAppender } // GetRef is an extra interface on Appenders used by downstream projects @@ -294,6 +302,24 @@ type MetadataUpdater interface { UpdateMetadata(ref SeriesRef, l labels.Labels, m metadata.Metadata) (SeriesRef, error) } +// CreatedTimestampAppender provides an interface for appending CT to storage. +type CreatedTimestampAppender interface { + // AppendCTZeroSample adds synthetic zero sample for the given ct timestamp, + // which will be associated with given series, labels and the incoming + // sample's t (timestamp). AppendCTZeroSample returns error if zero sample can't be + // appended, for example when ct is too old, or when it would collide with + // incoming sample (sample has priority). + // + // AppendCTZeroSample has to be called before the corresponding sample Append. + // A series reference number is returned which can be used to modify the + // CT for the given series in the same or later transactions. + // Returned reference numbers are ephemeral and may be rejected in calls + // to AppendCTZeroSample() at any point. + // + // If the reference is 0 it must not be used for caching. + AppendCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64) (SeriesRef, error) +} + // SeriesSet contains a set of series. type SeriesSet interface { Next() bool diff --git a/storage/remote/write.go b/storage/remote/write.go index 237f8caa9..66455cb4d 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -303,6 +303,11 @@ func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, return 0, nil } +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { + // AppendCTZeroSample is no-op for remote-write for now. + return 0, nil +} + // Commit implements storage.Appender. func (t *timestampTracker) Commit() error { t.writeStorage.samplesIn.incr(t.samples + t.exemplars + t.histograms) diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index 839009b2a..fd5b34ecd 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -339,3 +339,8 @@ func (m *mockAppendable) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ // UpdateMetadata is no-op for remote write (where mockAppendable is being used to test) for now. return 0, nil } + +func (m *mockAppendable) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { + // AppendCTZeroSample is no-op for remote-write for now. + return 0, nil +} diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 6afef1389..557fb7854 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -962,6 +962,11 @@ func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Met return 0, nil } +func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { + // TODO(bwplotka): Wire metadata in the Agent's appender. + return 0, nil +} + // Commit submits the collected samples and purges the batch. func (a *appender) Commit() error { if err := a.log(); err != nil { diff --git a/tsdb/head.go b/tsdb/head.go index 3ff2bee71..848357359 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -149,6 +149,10 @@ type HeadOptions struct { // EnableNativeHistograms enables the ingestion of native histograms. EnableNativeHistograms atomic.Bool + // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. + // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md + EnableCreatedTimestampZeroIngestion bool + ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. ChunkDirRoot string diff --git a/tsdb/head_append.go b/tsdb/head_append.go index be53a4f3f..afb461afe 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -87,6 +87,17 @@ func (a *initAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m return a.app.UpdateMetadata(ref, l, m) } +func (a *initAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if a.app != nil { + return a.app.AppendCTZeroSample(ref, lset, t, ct) + } + + a.head.initTime(t) + a.app = a.head.appender() + + return a.app.AppendCTZeroSample(ref, lset, t, ct) +} + // initTime initializes a head with the first timestamp. This only needs to be called // for a completely fresh head with an empty WAL. func (h *Head) initTime(t int64) { @@ -319,28 +330,11 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { - // Ensure no empty labels have gotten through. - lset = lset.WithoutEmpty() - if lset.IsEmpty() { - return 0, errors.Wrap(ErrInvalidSample, "empty labelset") - } - - if l, dup := lset.HasDuplicateLabelNames(); dup { - return 0, errors.Wrap(ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, l)) - } - - var created bool var err error - s, created, err = a.head.getOrCreate(lset.Hash(), lset) + s, err = a.getOrCreate(lset) if err != nil { return 0, err } - if created { - a.series = append(a.series, record.RefSeries{ - Ref: s.ref, - Labels: lset, - }) - } } if value.IsStaleNaN(v) { @@ -389,6 +383,71 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 return storage.SeriesRef(s.ref), nil } +// AppendCTZeroSample appends synthetic zero sample for ct timestamp. It returns +// error when sample can't be appended. See +// storage.CreatedTimestampAppender.AppendCTZeroSample for further documentation. +func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") + } + + s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) + if s == nil { + var err error + s, err = a.getOrCreate(lset) + if err != nil { + return 0, err + } + } + + // Check if CT wouldn't be OOO vs samples we already might have for this series. + // NOTE(bwplotka): This will be often hit as it's expected for long living + // counters to share the same CT. + s.Lock() + isOOO, _, err := s.appendable(ct, 0, a.headMaxt, a.minValidTime, a.oooTimeWindow) + if err == nil { + s.pendingCommit = true + } + s.Unlock() + if err != nil { + return 0, err + } + if isOOO { + return storage.SeriesRef(s.ref), storage.ErrOutOfOrderCT + } + + if ct > a.maxt { + a.maxt = ct + } + a.samples = append(a.samples, record.RefSample{Ref: s.ref, T: ct, V: 0.0}) + a.sampleSeries = append(a.sampleSeries, s) + return storage.SeriesRef(s.ref), nil +} + +func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { + // Ensure no empty labels have gotten through. + lset = lset.WithoutEmpty() + if lset.IsEmpty() { + return nil, errors.Wrap(ErrInvalidSample, "empty labelset") + } + if l, dup := lset.HasDuplicateLabelNames(); dup { + return nil, errors.Wrap(ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, l)) + } + var created bool + var err error + s, created, err := a.head.getOrCreate(lset.Hash(), lset) + if err != nil { + return nil, err + } + if created { + a.series = append(a.series, record.RefSeries{ + Ref: s.ref, + Labels: lset, + }) + } + return s, nil +} + // appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) // The sample belongs to the out of order chunk if we return true and no error. // An error signifies the sample cannot be handled. diff --git a/tsdb/head_test.go b/tsdb/head_test.go index d444e1496..535647d3a 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -33,6 +33,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" "golang.org/x/sync/errgroup" @@ -5641,3 +5642,93 @@ func TestPostingsCardinalityStats(t *testing.T) { // Using cache. require.Equal(t, statsForSomeLabel1, head.PostingsCardinalityStats("n", 1)) } + +func TestHeadAppender_AppendCTZeroSample(t *testing.T) { + type appendableSamples struct { + ts int64 + val float64 + ct int64 + } + for _, tc := range []struct { + name string + appendableSamples []appendableSamples + expectedSamples []model.Sample + }{ + { + name: "In order ct+normal sample", + appendableSamples: []appendableSamples{ + {ts: 100, val: 10, ct: 1}, + }, + expectedSamples: []model.Sample{ + {Timestamp: 1, Value: 0}, + {Timestamp: 100, Value: 10}, + }, + }, + { + name: "Consecutive appends with same ct ignore ct", + appendableSamples: []appendableSamples{ + {ts: 100, val: 10, ct: 1}, + {ts: 101, val: 10, ct: 1}, + }, + expectedSamples: []model.Sample{ + {Timestamp: 1, Value: 0}, + {Timestamp: 100, Value: 10}, + {Timestamp: 101, Value: 10}, + }, + }, + { + name: "Consecutive appends with newer ct do not ignore ct", + appendableSamples: []appendableSamples{ + {ts: 100, val: 10, ct: 1}, + {ts: 102, val: 10, ct: 101}, + }, + expectedSamples: []model.Sample{ + {Timestamp: 1, Value: 0}, + {Timestamp: 100, Value: 10}, + {Timestamp: 101, Value: 0}, + {Timestamp: 102, Value: 10}, + }, + }, + { + name: "CT equals to previous sample timestamp is ignored", + appendableSamples: []appendableSamples{ + {ts: 100, val: 10, ct: 1}, + {ts: 101, val: 10, ct: 100}, + }, + expectedSamples: []model.Sample{ + {Timestamp: 1, Value: 0}, + {Timestamp: 100, Value: 10}, + {Timestamp: 101, Value: 10}, + }, + }, + } { + h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + a := h.Appender(context.Background()) + lbls := labels.FromStrings("foo", "bar") + for _, sample := range tc.appendableSamples { + _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) + require.NoError(t, err) + _, err = a.Append(0, lbls, sample.ts, sample.val) + require.NoError(t, err) + } + require.NoError(t, a.Commit()) + + q, err := NewBlockQuerier(h, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.True(t, ss.Next()) + s := ss.At() + require.False(t, ss.Next()) + it := s.Iterator(nil) + for _, sample := range tc.expectedSamples { + require.Equal(t, chunkenc.ValFloat, it.Next()) + timestamp, value := it.At() + require.Equal(t, sample.Timestamp, model.Time(timestamp)) + require.Equal(t, sample.Value, model.SampleValue(value)) + } + require.Equal(t, chunkenc.ValNone, it.Next()) + } +} diff --git a/util/runutil/runutil.go b/util/runutil/runutil.go new file mode 100644 index 000000000..5a77c332b --- /dev/null +++ b/util/runutil/runutil.go @@ -0,0 +1,37 @@ +// Copyright 2013 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copied from https://github.com/efficientgo/core/blob/a21078e2c723b69e05f95c65dbc5058712b4edd8/runutil/runutil.go#L39 +// and adjusted. + +package runutil + +import "time" + +// Retry executes f every interval seconds until timeout or no error is returned from f. +func Retry(interval time.Duration, stopc <-chan struct{}, f func() error) error { + tick := time.NewTicker(interval) + defer tick.Stop() + + var err error + for { + if err = f(); err == nil { + return nil + } + select { + case <-stopc: + return err + case <-tick.C: + } + } +} From 48df9fc0203c569241d1d0c38d12a4beab65883b Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 11 Dec 2023 11:12:11 +0100 Subject: [PATCH 20/52] Export SampleRingIterator Signed-off-by: Filip Petkovski --- storage/buffer.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/storage/buffer.go b/storage/buffer.go index 49dfbd299..75a935f9d 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -74,7 +74,7 @@ func (b *BufferedSeriesIterator) PeekBack(n int) (sample chunks.Sample, ok bool) // Buffer returns an iterator over the buffered data. Invalidates previously // returned iterators. -func (b *BufferedSeriesIterator) Buffer() *sampleRingIterator { +func (b *BufferedSeriesIterator) Buffer() *SampleRingIterator { return b.buf.iterator() } @@ -252,7 +252,7 @@ type sampleRing struct { f int // Position of first element in ring buffer. l int // Number of elements in buffer. - it sampleRingIterator + it SampleRingIterator } type bufType int @@ -304,13 +304,13 @@ func (r *sampleRing) reset() { } // Returns the current iterator. Invalidates previously returned iterators. -func (r *sampleRing) iterator() *sampleRingIterator { +func (r *sampleRing) iterator() *SampleRingIterator { r.it.r = r r.it.i = -1 return &r.it } -type sampleRingIterator struct { +type SampleRingIterator struct { r *sampleRing i int t int64 @@ -319,7 +319,7 @@ type sampleRingIterator struct { fh *histogram.FloatHistogram } -func (it *sampleRingIterator) Next() chunkenc.ValueType { +func (it *SampleRingIterator) Next() chunkenc.ValueType { it.i++ if it.i >= it.r.l { return chunkenc.ValNone @@ -358,30 +358,30 @@ func (it *sampleRingIterator) Next() chunkenc.ValueType { } } -func (it *sampleRingIterator) Seek(int64) chunkenc.ValueType { +func (it *SampleRingIterator) Seek(int64) chunkenc.ValueType { return chunkenc.ValNone } -func (it *sampleRingIterator) Err() error { +func (it *SampleRingIterator) Err() error { return nil } -func (it *sampleRingIterator) At() (int64, float64) { +func (it *SampleRingIterator) At() (int64, float64) { return it.t, it.f } -func (it *sampleRingIterator) AtHistogram() (int64, *histogram.Histogram) { +func (it *SampleRingIterator) AtHistogram() (int64, *histogram.Histogram) { return it.t, it.h } -func (it *sampleRingIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { +func (it *SampleRingIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { if it.fh == nil { return it.t, it.h.ToFloat(fh) } return it.t, it.fh } -func (it *sampleRingIterator) AtT() int64 { +func (it *SampleRingIterator) AtT() int64 { return it.t } From 6de80d7fb03f7cd5ed44b85559886e73d89e720d Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Mon, 23 Oct 2023 14:55:36 +0100 Subject: [PATCH 21/52] Allow non-default registry to be used for metrics of SD components Signed-off-by: Paulin Todev --- cmd/prometheus/main.go | 28 +++- cmd/promtool/main.go | 3 +- cmd/promtool/sd.go | 5 +- discovery/README.md | 5 + discovery/aws/ec2.go | 16 +- discovery/aws/lightsail.go | 16 +- discovery/azure/azure.go | 63 ++++---- discovery/consul/consul.go | 141 ++++++++++-------- discovery/consul/consul_test.go | 11 +- discovery/digitalocean/digitalocean.go | 16 +- discovery/digitalocean/digitalocean_test.go | 3 +- discovery/discovery.go | 10 ++ discovery/dns/dns.go | 72 ++++----- discovery/dns/dns_test.go | 4 +- discovery/eureka/eureka.go | 16 +- discovery/eureka/eureka_test.go | 3 +- discovery/file/file.go | 75 ++++++---- discovery/file/file_test.go | 9 +- discovery/gce/gce.go | 16 +- discovery/hetzner/hetzner.go | 16 +- discovery/http/http.go | 41 ++--- discovery/http/http_test.go | 23 ++- discovery/ionos/ionos.go | 17 ++- discovery/kubernetes/endpoints.go | 21 +-- discovery/kubernetes/endpointslice.go | 21 +-- discovery/kubernetes/ingress.go | 15 +- discovery/kubernetes/kubernetes.go | 69 +++++---- discovery/kubernetes/kubernetes_test.go | 18 ++- discovery/kubernetes/node.go | 15 +- discovery/kubernetes/pod.go | 15 +- discovery/kubernetes/service.go | 15 +- discovery/legacymanager/manager.go | 68 +++------ discovery/legacymanager/manager_test.go | 27 ++-- discovery/linode/linode.go | 47 +++--- discovery/linode/linode_test.go | 3 +- discovery/manager.go | 66 ++------ discovery/manager_test.go | 39 +++-- discovery/marathon/marathon.go | 16 +- discovery/marathon/marathon_test.go | 5 +- discovery/metrics.go | 103 +++++++++++++ ...lient_metrics.go => metrics_k8s_client.go} | 48 +++--- discovery/moby/docker.go | 16 +- discovery/moby/docker_test.go | 3 +- discovery/moby/dockerswarm.go | 16 +- discovery/moby/nodes_test.go | 3 +- discovery/moby/services_test.go | 5 +- discovery/moby/tasks_test.go | 3 +- discovery/nomad/nomad.go | 53 +++---- discovery/nomad/nomad_test.go | 5 +- discovery/openstack/openstack.go | 16 +- discovery/ovhcloud/ovhcloud.go | 16 +- discovery/ovhcloud/ovhcloud_test.go | 4 +- discovery/puppetdb/puppetdb.go | 16 +- discovery/puppetdb/puppetdb_test.go | 11 +- discovery/refresh/refresh.go | 85 +++++++---- discovery/refresh/refresh_test.go | 11 +- discovery/scaleway/scaleway.go | 16 +- discovery/triton/triton.go | 16 +- discovery/triton/triton_test.go | 3 +- discovery/util.go | 72 +++++++++ discovery/uyuni/uyuni.go | 16 +- discovery/uyuni/uyuni_test.go | 6 +- discovery/vultr/vultr.go | 16 +- discovery/vultr/vultr_test.go | 3 +- discovery/xds/kuma.go | 81 +++++----- discovery/xds/kuma_test.go | 3 +- discovery/xds/xds.go | 13 +- .../examples/custom-sd/adapter-usage/main.go | 10 +- .../examples/custom-sd/adapter/adapter.go | 5 +- .../custom-sd/adapter/adapter_test.go | 6 +- 70 files changed, 1056 insertions(+), 693 deletions(-) create mode 100644 discovery/metrics.go rename discovery/{kubernetes/client_metrics.go => metrics_k8s_client.go} (85%) create mode 100644 discovery/util.go diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 106f9d05c..24fc0f8e4 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -620,14 +620,30 @@ func main() { discoveryManagerNotify discoveryManager ) + // Register the metrics used by both "scrape" and "notify" discovery managers. + // The same metrics are used for both discovery managers. Hence the registration + // needs to be done here, outside the NewManager() calls, to avoid duplicate + // metric registrations. + discoveryMetrics, err := discovery.NewMetrics(prometheus.DefaultRegisterer) + if err != nil { + level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + os.Exit(1) + } if cfg.enableNewSDManager { - discovery.RegisterMetrics() - discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) - discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify")) + discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discoveryMetrics, discovery.Name("scrape")) + discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discoveryMetrics, discovery.Name("notify")) } else { - legacymanager.RegisterMetrics() - discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), legacymanager.Name("scrape")) - discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify")) + discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discoveryMetrics, legacymanager.Name("scrape")) + discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discoveryMetrics, legacymanager.Name("notify")) + } + + if discoveryManagerScrape == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + os.Exit(1) + } + if discoveryManagerNotify == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager notify") + os.Exit(1) } scrapeManager, err := scrape.NewManager( diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index f0b2719c9..508b681b8 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -36,6 +36,7 @@ import ( "github.com/google/pprof/profile" "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/testutil/promlint" config_util "github.com/prometheus/common/config" @@ -317,7 +318,7 @@ func main() { switch parsedCmd { case sdCheckCmd.FullCommand(): - os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort)) + os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index 7c5ae7036..155152e1a 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -37,7 +38,7 @@ type sdCheckResult struct { } // CheckSD performs service discovery for the given job name and reports the results. -func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool) int { +func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int { logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) @@ -77,7 +78,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault defer cancel() for _, cfg := range scrapeConfig.ServiceDiscoveryConfigs { - d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger}) + d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger, Registerer: registerer}) if err != nil { fmt.Fprintln(os.Stderr, "Could not create new discoverer", err) return failureExitCode diff --git a/discovery/README.md b/discovery/README.md index 19b579b39..4c0660862 100644 --- a/discovery/README.md +++ b/discovery/README.md @@ -234,6 +234,11 @@ type Config interface { type DiscovererOptions struct { Logger log.Logger + + // A registerer for the Discoverer's metrics. + Registerer prometheus.Registerer + + HTTPClientOptions []config.HTTPClientOption } ``` diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index 64c8fdce6..40e6e7cb7 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go/service/ec2" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -101,7 +102,7 @@ func (*EC2SDConfig) Name() string { return "ec2" } // NewDiscoverer returns a Discoverer for the EC2 Config. func (c *EC2SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewEC2Discovery(c, opts.Logger), nil + return NewEC2Discovery(c, opts.Logger, opts.Registerer), nil } // UnmarshalYAML implements the yaml.Unmarshaler interface for the EC2 Config. @@ -147,7 +148,7 @@ type EC2Discovery struct { } // NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets. -func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery { +func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, reg prometheus.Registerer) *EC2Discovery { if logger == nil { logger = log.NewNopLogger() } @@ -156,10 +157,13 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery { cfg: conf, } d.Discovery = refresh.NewDiscovery( - logger, - "ec2", - time.Duration(d.cfg.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "ec2", + Interval: time.Duration(d.cfg.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d } diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index c0198d6a7..5382ea015 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -29,6 +29,7 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/lightsail" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -84,7 +85,7 @@ func (*LightsailSDConfig) Name() string { return "lightsail" } // NewDiscoverer returns a Discoverer for the Lightsail Config. func (c *LightsailSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewLightsailDiscovery(c, opts.Logger), nil + return NewLightsailDiscovery(c, opts.Logger, opts.Registerer), nil } // UnmarshalYAML implements the yaml.Unmarshaler interface for the Lightsail Config. @@ -121,7 +122,7 @@ type LightsailDiscovery struct { } // NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets. -func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *LightsailDiscovery { +func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, reg prometheus.Registerer) *LightsailDiscovery { if logger == nil { logger = log.NewNopLogger() } @@ -129,10 +130,13 @@ func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *Lightsai cfg: conf, } d.Discovery = refresh.NewDiscovery( - logger, - "lightsail", - time.Duration(d.cfg.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "lightsail", + Interval: time.Duration(d.cfg.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d } diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index ef953b802..3b67a8102 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -79,17 +79,6 @@ var ( AuthenticationMethod: authMethodOAuth, HTTPClientConfig: config_util.DefaultHTTPClientConfig, } - - failuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_azure_failures_total", - Help: "Number of Azure service discovery refresh failures.", - }) - cacheHitCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_azure_cache_hit_total", - Help: "Number of cache hit during refresh.", - }) ) var environments = map[string]cloud.Configuration{ @@ -114,8 +103,6 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) { func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(failuresCount) - prometheus.MustRegister(cacheHitCount) } // SDConfig is the configuration for Azure based service discovery. @@ -138,7 +125,7 @@ func (*SDConfig) Name() string { return "azure" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger), nil + return NewDiscovery(c, opts.Logger, opts.Registerer) } func validateAuthParam(param, name string) error { @@ -181,14 +168,16 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Discovery struct { *refresh.Discovery - logger log.Logger - cfg *SDConfig - port int - cache *cache.Cache[string, *armnetwork.Interface] + logger log.Logger + cfg *SDConfig + port int + cache *cache.Cache[string, *armnetwork.Interface] + failuresCount prometheus.Counter + cacheHitCount prometheus.Counter } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. -func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery { +func NewDiscovery(cfg *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -198,16 +187,30 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery { port: cfg.Port, logger: logger, cache: l, + failuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_azure_failures_total", + Help: "Number of Azure service discovery refresh failures.", + }), + cacheHitCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_azure_cache_hit_total", + Help: "Number of cache hit during refresh.", + }), } d.Discovery = refresh.NewDiscovery( - logger, - "azure", - time.Duration(cfg.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "azure", + Interval: time.Duration(cfg.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + Metrics: []prometheus.Collector{d.failuresCount, d.cacheHitCount}, + }, ) - return d + return d, nil } // azureClient represents multiple Azure Resource Manager providers. @@ -329,14 +332,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { client, err := createAzureClient(*d.cfg) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("could not create Azure client: %w", err) } client.logger = d.logger machines, err := client.getVMs(ctx, d.cfg.ResourceGroup) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machines: %w", err) } @@ -345,14 +348,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { // Load the vms managed by scale sets. scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machine scale sets: %w", err) } for _, scaleSet := range scaleSets { scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machine scale set vms: %w", err) } machines = append(machines, scaleSetVms...) @@ -403,7 +406,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { var networkInterface *armnetwork.Interface if v, ok := d.getFromCache(nicID); ok { networkInterface = v - cacheHitCount.Add(1) + d.cacheHitCount.Add(1) } else { networkInterface, err = client.getNetworkInterfaceByID(ctx, nicID) if err != nil { @@ -462,7 +465,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { var tg targetgroup.Group for tgt := range ch { if tgt.err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("unable to complete Azure service discovery: %w", tgt.err) } if tgt.labelSet != nil { diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index b4cb15229..50f171a78 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -71,41 +71,18 @@ const ( namespace = "prometheus" ) -var ( - rpcFailuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_consul_rpc_failures_total", - Help: "The number of Consul RPC call failures.", - }) - rpcDuration = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "sd_consul_rpc_duration_seconds", - Help: "The duration of a Consul RPC call in seconds.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, - []string{"endpoint", "call"}, - ) - - // Initialize metric vectors. - servicesRPCDuration = rpcDuration.WithLabelValues("catalog", "services") - serviceRPCDuration = rpcDuration.WithLabelValues("catalog", "service") - - // DefaultSDConfig is the default Consul SD configuration. - DefaultSDConfig = SDConfig{ - TagSeparator: ",", - Scheme: "http", - Server: "localhost:8500", - AllowStale: true, - RefreshInterval: model.Duration(30 * time.Second), - HTTPClientConfig: config.DefaultHTTPClientConfig, - } -) +// DefaultSDConfig is the default Consul SD configuration. +var DefaultSDConfig = SDConfig{ + TagSeparator: ",", + Scheme: "http", + Server: "localhost:8500", + AllowStale: true, + RefreshInterval: model.Duration(30 * time.Second), + HTTPClientConfig: config.DefaultHTTPClientConfig, +} func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(rpcFailuresCount, rpcDuration) } // SDConfig is the configuration for Consul service discovery. @@ -147,7 +124,7 @@ func (*SDConfig) Name() string { return "consul" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -184,22 +161,27 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // Discovery retrieves target information from a Consul server // and updates them via watches. type Discovery struct { - client *consul.Client - clientDatacenter string - clientNamespace string - clientPartition string - tagSeparator string - watchedServices []string // Set of services which will be discovered. - watchedTags []string // Tags used to filter instances of a service. - watchedNodeMeta map[string]string - allowStale bool - refreshInterval time.Duration - finalizer func() - logger log.Logger + client *consul.Client + clientDatacenter string + clientNamespace string + clientPartition string + tagSeparator string + watchedServices []string // Set of services which will be discovered. + watchedTags []string // Tags used to filter instances of a service. + watchedNodeMeta map[string]string + allowStale bool + refreshInterval time.Duration + finalizer func() + logger log.Logger + rpcFailuresCount prometheus.Counter + rpcDuration *prometheus.SummaryVec + servicesRPCDuration prometheus.Observer + serviceRPCDuration prometheus.Observer + metricRegisterer discovery.MetricRegisterer } // NewDiscovery returns a new Discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -237,7 +219,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { clientPartition: conf.Partition, finalizer: wrapper.CloseIdleConnections, logger: logger, + rpcFailuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_failures_total", + Help: "The number of Consul RPC call failures.", + }), + rpcDuration: prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_consul_rpc_duration_seconds", + Help: "The duration of a Consul RPC call in seconds.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + []string{"endpoint", "call"}, + ), } + + cd.metricRegisterer = discovery.NewMetricRegisterer( + reg, + []prometheus.Collector{ + cd.rpcFailuresCount, + cd.rpcDuration, + }, + ) + + // Initialize metric vectors. + cd.servicesRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "services") + cd.serviceRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "service") + return cd, nil } @@ -293,7 +303,7 @@ func (d *Discovery) getDatacenter() error { info, err := d.client.Agent().Self() if err != nil { level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) - rpcFailuresCount.Inc() + d.rpcFailuresCount.Inc() return err } @@ -334,6 +344,13 @@ func (d *Discovery) initialize(ctx context.Context) { // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { + err := d.metricRegisterer.RegisterMetrics() + if err != nil { + level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error()) + return + } + defer d.metricRegisterer.UnregisterMetrics() + if d.finalizer != nil { defer d.finalizer() } @@ -382,7 +399,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. t0 := time.Now() srvs, meta, err := catalog.Services(opts.WithContext(ctx)) elapsed := time.Since(t0) - servicesRPCDuration.Observe(elapsed.Seconds()) + d.servicesRPCDuration.Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { @@ -393,7 +410,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. if err != nil { level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) - rpcFailuresCount.Inc() + d.rpcFailuresCount.Inc() time.Sleep(retryInterval) return } @@ -449,13 +466,15 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. // consulService contains data belonging to the same service. type consulService struct { - name string - tags []string - labels model.LabelSet - discovery *Discovery - client *consul.Client - tagSeparator string - logger log.Logger + name string + tags []string + labels model.LabelSet + discovery *Discovery + client *consul.Client + tagSeparator string + logger log.Logger + rpcFailuresCount prometheus.Counter + serviceRPCDuration prometheus.Observer } // Start watching a service. @@ -469,8 +488,10 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G serviceLabel: model.LabelValue(name), datacenterLabel: model.LabelValue(d.clientDatacenter), }, - tagSeparator: d.tagSeparator, - logger: d.logger, + tagSeparator: d.tagSeparator, + logger: d.logger, + rpcFailuresCount: d.rpcFailuresCount, + serviceRPCDuration: d.serviceRPCDuration, } go func() { @@ -508,7 +529,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr t0 := time.Now() serviceNodes, meta, err := health.ServiceMultipleTags(srv.name, srv.tags, false, opts.WithContext(ctx)) elapsed := time.Since(t0) - serviceRPCDuration.Observe(elapsed.Seconds()) + srv.serviceRPCDuration.Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { @@ -520,7 +541,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr if err != nil { level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) - rpcFailuresCount.Inc() + srv.rpcFailuresCount.Inc() time.Sleep(retryInterval) return } diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go index 19f7d3c4a..97cb8fbc9 100644 --- a/discovery/consul/consul_test.go +++ b/discovery/consul/consul_test.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -39,7 +40,7 @@ func TestConfiguredService(t *testing.T) { conf := &SDConfig{ Services: []string{"configuredServiceName"}, } - consulDiscovery, err := NewDiscovery(conf, nil) + consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry()) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } @@ -56,7 +57,7 @@ func TestConfiguredServiceWithTag(t *testing.T) { Services: []string{"configuredServiceName"}, ServiceTags: []string{"http"}, } - consulDiscovery, err := NewDiscovery(conf, nil) + consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry()) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } @@ -151,7 +152,7 @@ func TestConfiguredServiceWithTags(t *testing.T) { } for _, tc := range cases { - consulDiscovery, err := NewDiscovery(tc.conf, nil) + consulDiscovery, err := NewDiscovery(tc.conf, nil, prometheus.NewRegistry()) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } @@ -165,7 +166,7 @@ func TestConfiguredServiceWithTags(t *testing.T) { func TestNonConfiguredService(t *testing.T) { conf := &SDConfig{} - consulDiscovery, err := NewDiscovery(conf, nil) + consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry()) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) } @@ -262,7 +263,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { func newDiscovery(t *testing.T, config *SDConfig) *Discovery { logger := log.NewNopLogger() - d, err := NewDiscovery(config, logger) + d, err := NewDiscovery(config, logger, prometheus.NewRegistry()) require.NoError(t, err) return d } diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index e207388b3..970258de0 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -24,6 +24,7 @@ import ( "github.com/digitalocean/godo" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -75,7 +76,7 @@ func (*SDConfig) Name() string { return "digitalocean" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -103,7 +104,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { d := &Discovery{ port: conf.Port, } @@ -125,10 +126,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { } d.Discovery = refresh.NewDiscovery( - logger, - "digitalocean", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "digitalocean", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go index df2514ecb..a959b312c 100644 --- a/discovery/digitalocean/digitalocean_test.go +++ b/discovery/digitalocean/digitalocean_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" ) @@ -46,7 +47,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) { cfg := DefaultSDConfig cfg.HTTPClientConfig.BearerToken = tokenID - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/discovery.go b/discovery/discovery.go index 9dc010a09..acc4c1efe 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -18,6 +18,7 @@ import ( "reflect" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -42,6 +43,15 @@ type Discoverer interface { type DiscovererOptions struct { Logger log.Logger + // A registerer for the Discoverer's metrics. + // Some Discoverers may ignore this registerer and use the global one instead. + // For now this will work, because the Prometheus `main` function uses the global registry. + // However, in the future the Prometheus `main` function will be updated to not use the global registry. + // Hence, if a discoverer wants its metrics to be visible via the Prometheus executable's + // `/metrics` endpoint, it should use this explicit registerer. + // TODO(ptodev): Update this comment once the Prometheus `main` function does not use the global registry. + Registerer prometheus.Registerer + // Extra HTTP client options to expose to Discoverers. This field may be // ignored; Discoverer implementations must opt-in to reading it. HTTPClientOptions []config.HTTPClientOption diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 4838a8954..9b6bd6741 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -49,30 +49,14 @@ const ( namespace = "prometheus" ) -var ( - dnsSDLookupsCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_dns_lookups_total", - Help: "The number of DNS-SD lookups.", - }) - dnsSDLookupFailuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_dns_lookup_failures_total", - Help: "The number of DNS-SD lookup failures.", - }) - - // DefaultSDConfig is the default DNS SD configuration. - DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(30 * time.Second), - Type: "SRV", - } -) +// DefaultSDConfig is the default DNS SD configuration. +var DefaultSDConfig = SDConfig{ + RefreshInterval: model.Duration(30 * time.Second), + Type: "SRV", +} func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(dnsSDLookupFailuresCount, dnsSDLookupsCount) } // SDConfig is the configuration for DNS based service discovery. @@ -88,7 +72,7 @@ func (*SDConfig) Name() string { return "dns" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(*c, opts.Logger), nil + return NewDiscovery(*c, opts.Logger, opts.Registerer) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -118,16 +102,18 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // the Discoverer interface. type Discovery struct { *refresh.Discovery - names []string - port int - qtype uint16 - logger log.Logger + names []string + port int + qtype uint16 + logger log.Logger + dnsSDLookupsCount prometheus.Counter + dnsSDLookupFailuresCount prometheus.Counter lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery { +func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -151,14 +137,32 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery { port: conf.Port, logger: logger, lookupFn: lookupWithSearchPath, + dnsSDLookupsCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_dns_lookups_total", + Help: "The number of DNS-SD lookups.", + }), + dnsSDLookupFailuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_dns_lookup_failures_total", + Help: "The number of DNS-SD lookup failures.", + }), } + d.Discovery = refresh.NewDiscovery( - logger, - "dns", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "dns", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: prometheus.NewRegistry(), + Metrics: []prometheus.Collector{d.dnsSDLookupsCount, d.dnsSDLookupFailuresCount}, + }, ) - return d + + return d, nil } func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { @@ -191,9 +195,9 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error { response, err := d.lookupFn(name, d.qtype, d.logger) - dnsSDLookupsCount.Inc() + d.dnsSDLookupsCount.Inc() if err != nil { - dnsSDLookupFailuresCount.Inc() + d.dnsSDLookupFailuresCount.Inc() return err } diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go index 52ca72c79..b8dd2efaa 100644 --- a/discovery/dns/dns_test.go +++ b/discovery/dns/dns_test.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/miekg/dns" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/goleak" @@ -252,7 +253,8 @@ func TestDNS(t *testing.T) { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() - sd := NewDiscovery(tc.config, nil) + sd, err := NewDiscovery(tc.config, nil, prometheus.NewRegistry()) + require.NoError(t, err) sd.lookupFn = tc.lookup tgs, err := sd.refresh(context.Background()) diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go index 5d9d8d552..d3e4084e5 100644 --- a/discovery/eureka/eureka.go +++ b/discovery/eureka/eureka.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -80,7 +81,7 @@ func (*SDConfig) Name() string { return "eureka" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -117,7 +118,7 @@ type Discovery struct { } // NewDiscovery creates a new Eureka discovery for the given role. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd") if err != nil { return nil, err @@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { server: conf.Server, } d.Discovery = refresh.NewDiscovery( - logger, - "eureka", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "eureka", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/eureka/eureka_test.go b/discovery/eureka/eureka_test.go index cb75e1428..1fe3c710e 100644 --- a/discovery/eureka/eureka_test.go +++ b/discovery/eureka/eureka_test.go @@ -20,6 +20,7 @@ import ( "net/http/httptest" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -35,7 +36,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err Server: ts.URL, } - md, err := NewDiscovery(&conf, nil) + md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry()) if err != nil { return nil, err } diff --git a/discovery/file/file.go b/discovery/file/file.go index 60b63350f..ef6ed1f5e 100644 --- a/discovery/file/file.go +++ b/discovery/file/file.go @@ -39,24 +39,6 @@ import ( ) var ( - fileSDReadErrorsCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_file_read_errors_total", - Help: "The number of File-SD read errors.", - }) - fileSDScanDuration = prometheus.NewSummary( - prometheus.SummaryOpts{ - Name: "prometheus_sd_file_scan_duration_seconds", - Help: "The duration of the File-SD scan in seconds.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }) - fileSDTimeStamp = NewTimestampCollector() - fileWatcherErrorsCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_file_watcher_errors_total", - Help: "The number of File-SD errors caused by filesystem watch failures.", - }) - patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) // DefaultSDConfig is the default file SD configuration. @@ -67,7 +49,6 @@ var ( func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(fileSDReadErrorsCount, fileSDScanDuration, fileSDTimeStamp, fileWatcherErrorsCount) } // SDConfig is the configuration for file based discovery. @@ -81,7 +62,7 @@ func (*SDConfig) Name() string { return "file" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger), nil + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -187,10 +168,17 @@ type Discovery struct { // This is used to detect deleted target groups. lastRefresh map[string]int logger log.Logger + + fileSDReadErrorsCount prometheus.Counter + fileSDScanDuration prometheus.Summary + fileWatcherErrorsCount prometheus.Counter + fileSDTimeStamp *TimestampCollector + + metricRegisterer discovery.MetricRegisterer } // NewDiscovery returns a new file discovery for the given paths. -func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -200,9 +188,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery { interval: time.Duration(conf.RefreshInterval), timestamps: make(map[string]float64), logger: logger, + fileSDReadErrorsCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_file_read_errors_total", + Help: "The number of File-SD read errors.", + }), + fileSDScanDuration: prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "prometheus_sd_file_scan_duration_seconds", + Help: "The duration of the File-SD scan in seconds.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }), + fileWatcherErrorsCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_file_watcher_errors_total", + Help: "The number of File-SD errors caused by filesystem watch failures.", + }), + fileSDTimeStamp: NewTimestampCollector(), } - fileSDTimeStamp.addDiscoverer(disc) - return disc + + disc.fileSDTimeStamp.addDiscoverer(disc) + + disc.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{ + disc.fileSDReadErrorsCount, + disc.fileSDScanDuration, + disc.fileWatcherErrorsCount, + disc.fileSDTimeStamp, + }) + + return disc, nil } // listFiles returns a list of all files that match the configured patterns. @@ -239,10 +253,17 @@ func (d *Discovery) watchFiles() { // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { + err := d.metricRegisterer.RegisterMetrics() + if err != nil { + level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error()) + return + } + defer d.metricRegisterer.UnregisterMetrics() + watcher, err := fsnotify.NewWatcher() if err != nil { level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err) - fileWatcherErrorsCount.Inc() + d.fileWatcherErrorsCount.Inc() return } d.watcher = watcher @@ -306,7 +327,7 @@ func (d *Discovery) stop() { done := make(chan struct{}) defer close(done) - fileSDTimeStamp.removeDiscoverer(d) + d.fileSDTimeStamp.removeDiscoverer(d) // Closing the watcher will deadlock unless all events and errors are drained. go func() { @@ -332,13 +353,13 @@ func (d *Discovery) stop() { func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) { t0 := time.Now() defer func() { - fileSDScanDuration.Observe(time.Since(t0).Seconds()) + d.fileSDScanDuration.Observe(time.Since(t0).Seconds()) }() ref := map[string]int{} for _, p := range d.listFiles() { tgroups, err := d.readFile(p) if err != nil { - fileSDReadErrorsCount.Inc() + d.fileSDReadErrorsCount.Inc() level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err) // Prevent deletion down below. diff --git a/discovery/file/file_test.go b/discovery/file/file_test.go index 76e1cebed..c138fc8a9 100644 --- a/discovery/file/file_test.go +++ b/discovery/file/file_test.go @@ -24,6 +24,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/goleak" @@ -143,7 +144,7 @@ func (t *testRunner) run(files ...string) { ctx, cancel := context.WithCancel(context.Background()) t.cancelSD = cancel go func() { - NewDiscovery( + d, err := NewDiscovery( &SDConfig{ Files: files, // Setting a high refresh interval to make sure that the tests only @@ -151,7 +152,11 @@ func (t *testRunner) run(files ...string) { RefreshInterval: model.Duration(1 * time.Hour), }, nil, - ).Run(ctx, t.ch) + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + d.Run(ctx, t.ch) }() } diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go index fa05fbbf3..21a95ee39 100644 --- a/discovery/gce/gce.go +++ b/discovery/gce/gce.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "golang.org/x/oauth2/google" "google.golang.org/api/compute/v1" @@ -86,7 +87,7 @@ func (*SDConfig) Name() string { return "gce" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(*c, opts.Logger) + return NewDiscovery(*c, opts.Logger, opts.Registerer) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -121,7 +122,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { d := &Discovery{ project: conf.Project, zone: conf.Zone, @@ -141,10 +142,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { d.isvc = compute.NewInstancesService(d.svc) d.Discovery = refresh.NewDiscovery( - logger, - "gce", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "gce", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go index c3f7ec39c..9d3e6aa65 100644 --- a/discovery/hetzner/hetzner.go +++ b/discovery/hetzner/hetzner.go @@ -21,6 +21,7 @@ import ( "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -67,7 +68,7 @@ func (*SDConfig) Name() string { return "hetzner" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } type refresher interface { @@ -127,17 +128,20 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) { r, err := newRefresher(conf, logger) if err != nil { return nil, err } return refresh.NewDiscovery( - logger, - "hetzner", - time.Duration(conf.RefreshInterval), - r.refresh, + refresh.Options{ + Logger: logger, + Mech: "hetzner", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: r.refresh, + Registry: reg, + }, ), nil } diff --git a/discovery/http/http.go b/discovery/http/http.go index 2980d7efd..c12fdb26d 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -45,17 +45,10 @@ var ( } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) - - failuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_http_failures_total", - Help: "Number of HTTP service discovery refresh failures.", - }) ) func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(failuresCount) } // SDConfig is the configuration for HTTP based discovery. @@ -70,7 +63,7 @@ func (*SDConfig) Name() string { return "http" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions) + return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -112,10 +105,11 @@ type Discovery struct { client *http.Client refreshInterval time.Duration tgLastLength int + failuresCount prometheus.Counter } // NewDiscovery returns a new HTTP discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -130,13 +124,22 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPCli url: conf.URL, client: client, refreshInterval: time.Duration(conf.RefreshInterval), // Stored to be sent as headers. + failuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_http_failures_total", + Help: "Number of HTTP service discovery refresh failures.", + }), } d.Discovery = refresh.NewDiscovery( - logger, - "http", - time.Duration(conf.RefreshInterval), - d.Refresh, + refresh.Options{ + Logger: logger, + Mech: "http", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.Refresh, + Registry: reg, + Metrics: []prometheus.Collector{d.failuresCount}, + }, ) return d, nil } @@ -152,7 +155,7 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) { resp, err := d.client.Do(req.WithContext(ctx)) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } defer func() { @@ -161,31 +164,31 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) { }() if resp.StatusCode != http.StatusOK { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("server returned HTTP status %s", resp.Status) } if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("unsupported content type %q", resp.Header.Get("Content-Type")) } b, err := io.ReadAll(resp.Body) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } var targetGroups []*targetgroup.Group if err := json.Unmarshal(b, &targetGroups); err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } for i, tg := range targetGroups { if tg == nil { - failuresCount.Inc() + d.failuresCount.Inc() err = errors.New("nil target group item found") return nil, err } diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go index 9bbda95b7..164719e90 100644 --- a/discovery/http/http_test.go +++ b/discovery/http/http_test.go @@ -41,7 +41,7 @@ func TestHTTPValidRefresh(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() @@ -63,7 +63,7 @@ func TestHTTPValidRefresh(t *testing.T) { }, } require.Equal(t, expectedTargets, tgs) - require.Equal(t, 0.0, getFailureCount()) + require.Equal(t, 0.0, getFailureCount(d.failuresCount)) } func TestHTTPInvalidCode(t *testing.T) { @@ -79,13 +79,13 @@ func TestHTTPInvalidCode(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() _, err = d.Refresh(ctx) require.EqualError(t, err, "server returned HTTP status 400 Bad Request") - require.Equal(t, 1.0, getFailureCount()) + require.Equal(t, 1.0, getFailureCount(d.failuresCount)) } func TestHTTPInvalidFormat(t *testing.T) { @@ -101,18 +101,16 @@ func TestHTTPInvalidFormat(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() _, err = d.Refresh(ctx) require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`) - require.Equal(t, 1.0, getFailureCount()) + require.Equal(t, 1.0, getFailureCount(d.failuresCount)) } -var lastFailureCount float64 - -func getFailureCount() float64 { +func getFailureCount(failuresCount prometheus.Counter) float64 { failureChan := make(chan prometheus.Metric) go func() { @@ -129,10 +127,7 @@ func getFailureCount() float64 { metric.Write(&counter) } - // account for failures in prior tests - count := *counter.Counter.Value - lastFailureCount - lastFailureCount = *counter.Counter.Value - return count + return *counter.Counter.Value } func TestContentTypeRegex(t *testing.T) { @@ -417,7 +412,7 @@ func TestSourceDisappeared(t *testing.T) { URL: ts.URL, RefreshInterval: model.Duration(1 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry()) require.NoError(t, err) for _, test := range cases { ctx := context.Background() diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go index 3afed8d79..36623745a 100644 --- a/discovery/ionos/ionos.go +++ b/discovery/ionos/ionos.go @@ -23,6 +23,8 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" + + "github.com/prometheus/client_golang/prometheus" ) const ( @@ -41,7 +43,7 @@ func init() { type Discovery struct{} // NewDiscovery returns a new refresh.Discovery for IONOS Cloud. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) { if conf.ionosEndpoint == "" { conf.ionosEndpoint = "https://api.ionos.com" } @@ -52,10 +54,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) } return refresh.NewDiscovery( - logger, - "ionos", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "ionos", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ), nil } @@ -86,7 +91,7 @@ func (c SDConfig) Name() string { // NewDiscoverer returns a new discovery.Discoverer for IONOS Cloud. func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(&c, options.Logger) + return NewDiscovery(&c, options.Logger, options.Registerer) } // UnmarshalYAML implements the yaml.Unmarshaler interface. diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 708e229a2..801a45f7c 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" @@ -30,12 +31,6 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - epAddCount = eventCount.WithLabelValues("endpoints", "add") - epUpdateCount = eventCount.WithLabelValues("endpoints", "update") - epDeleteCount = eventCount.WithLabelValues("endpoints", "delete") -) - // Endpoints discovers new endpoint targets. type Endpoints struct { logger log.Logger @@ -54,7 +49,7 @@ type Endpoints struct { } // NewEndpoints returns a new endpoints discovery. -func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *Endpoints { +func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { if l == nil { l = log.NewNopLogger() } @@ -73,15 +68,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca _, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - epAddCount.Inc() + eventCount.WithLabelValues("endpoints", "add").Inc() e.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - epUpdateCount.Inc() + eventCount.WithLabelValues("endpoints", "update").Inc() e.enqueue(o) }, DeleteFunc: func(o interface{}) { - epDeleteCount.Inc() + eventCount.WithLabelValues("endpoints", "delete").Inc() e.enqueue(o) }, }) @@ -112,15 +107,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca // TODO(fabxc): potentially remove add and delete event handlers. Those should // be triggered via the endpoint handlers already. AddFunc: func(o interface{}) { - svcAddCount.Inc() + eventCount.WithLabelValues("service", "add").Inc() serviceUpdate(o) }, UpdateFunc: func(_, o interface{}) { - svcUpdateCount.Inc() + eventCount.WithLabelValues("service", "update").Inc() serviceUpdate(o) }, DeleteFunc: func(o interface{}) { - svcDeleteCount.Inc() + eventCount.WithLabelValues("service", "delete").Inc() serviceUpdate(o) }, }) diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index a16862380..e2ac1de42 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" @@ -33,12 +34,6 @@ import ( "github.com/prometheus/prometheus/util/strutil" ) -var ( - epslAddCount = eventCount.WithLabelValues("endpointslice", "add") - epslUpdateCount = eventCount.WithLabelValues("endpointslice", "update") - epslDeleteCount = eventCount.WithLabelValues("endpointslice", "delete") -) - // EndpointSlice discovers new endpoint targets. type EndpointSlice struct { logger log.Logger @@ -57,7 +52,7 @@ type EndpointSlice struct { } // NewEndpointSlice returns a new endpointslice discovery. -func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *EndpointSlice { +func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { if l == nil { l = log.NewNopLogger() } @@ -76,15 +71,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod _, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - epslAddCount.Inc() + eventCount.WithLabelValues("endpointslice", "add").Inc() e.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - epslUpdateCount.Inc() + eventCount.WithLabelValues("endpointslice", "update").Inc() e.enqueue(o) }, DeleteFunc: func(o interface{}) { - epslDeleteCount.Inc() + eventCount.WithLabelValues("endpointslice", "delete").Inc() e.enqueue(o) }, }) @@ -115,15 +110,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod } _, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - svcAddCount.Inc() + eventCount.WithLabelValues("service", "add").Inc() serviceUpdate(o) }, UpdateFunc: func(_, o interface{}) { - svcUpdateCount.Inc() + eventCount.WithLabelValues("service", "update").Inc() serviceUpdate(o) }, DeleteFunc: func(o interface{}) { - svcDeleteCount.Inc() + eventCount.WithLabelValues("service", "delete").Inc() serviceUpdate(o) }, }) diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index fee4cc720..91b1fb0ee 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -21,6 +21,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" "k8s.io/api/networking/v1beta1" @@ -30,12 +31,6 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - ingressAddCount = eventCount.WithLabelValues("ingress", "add") - ingressUpdateCount = eventCount.WithLabelValues("ingress", "update") - ingressDeleteCount = eventCount.WithLabelValues("ingress", "delete") -) - // Ingress implements discovery of Kubernetes ingress. type Ingress struct { logger log.Logger @@ -45,19 +40,19 @@ type Ingress struct { } // NewIngress returns a new ingress discovery. -func NewIngress(l log.Logger, inf cache.SharedInformer) *Ingress { +func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")} _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - ingressAddCount.Inc() + eventCount.WithLabelValues("ingress", "add").Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { - ingressDeleteCount.Inc() + eventCount.WithLabelValues("ingress", "delete").Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - ingressUpdateCount.Inc() + eventCount.WithLabelValues("ingress", "update").Inc() s.enqueue(o) }, }) diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index 7bd96652f..4deaf3f68 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -58,24 +58,14 @@ import ( const ( // metaLabelPrefix is the meta prefix used for all meta labels. // in this discovery. - metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_" - namespaceLabel = metaLabelPrefix + "namespace" - metricsNamespace = "prometheus_sd_kubernetes" - presentValue = model.LabelValue("true") + metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_" + namespaceLabel = metaLabelPrefix + "namespace" + presentValue = model.LabelValue("true") ) var ( // Http header. userAgent = fmt.Sprintf("Prometheus/%s", version.Version) - // Custom events metric. - eventCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: metricsNamespace, - Name: "events_total", - Help: "The number of Kubernetes events handled.", - }, - []string{"role", "event"}, - ) // DefaultSDConfig is the default Kubernetes SD configuration. DefaultSDConfig = SDConfig{ HTTPClientConfig: config.DefaultHTTPClientConfig, @@ -84,15 +74,6 @@ var ( func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(eventCount) - // Initialize metric vectors. - for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} { - for _, evt := range []string{"add", "delete", "update"} { - eventCount.WithLabelValues(role, evt) - } - } - (&clientGoRequestMetricAdapter{}).Register(prometheus.DefaultRegisterer) - (&clientGoWorkqueueMetricsProvider{}).Register(prometheus.DefaultRegisterer) } // Role is role of the service in Kubernetes. @@ -137,7 +118,7 @@ func (*SDConfig) Name() string { return "kubernetes" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return New(opts.Logger, c) + return New(opts.Logger, opts.Registerer, c) } // SetDirectory joins any relative file paths with dir. @@ -274,6 +255,8 @@ type Discovery struct { selectors roleSelector ownNamespace string attachMetadata AttachMetadataConfig + eventCount *prometheus.CounterVec + metricRegisterer discovery.MetricRegisterer } func (d *Discovery) getNamespaces() []string { @@ -292,7 +275,7 @@ func (d *Discovery) getNamespaces() []string { } // New creates a new Kubernetes discovery for the given role. -func New(l log.Logger, conf *SDConfig) (*Discovery, error) { +func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, error) { if l == nil { l = log.NewNopLogger() } @@ -346,7 +329,7 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) { return nil, err } - return &Discovery{ + d := &Discovery{ client: c, logger: l, role: conf.Role, @@ -355,7 +338,26 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) { selectors: mapSelector(conf.Selectors), ownNamespace: ownNamespace, attachMetadata: conf.AttachMetadata, - }, nil + eventCount: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: discovery.KubernetesMetricsNamespace, + Name: "events_total", + Help: "The number of Kubernetes events handled.", + }, + []string{"role", "event"}, + ), + } + + d.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{d.eventCount}) + + // Initialize metric vectors. + for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} { + for _, evt := range []string{"add", "delete", "update"} { + d.eventCount.WithLabelValues(role, evt) + } + } + + return d, nil } func mapSelector(rawSelector []SelectorConfig) roleSelector { @@ -391,6 +393,14 @@ const resyncDisabled = 0 // Run implements the discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { d.Lock() + + err := d.metricRegisterer.RegisterMetrics() + if err != nil { + level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error()) + return + } + defer d.metricRegisterer.UnregisterMetrics() + namespaces := d.getNamespaces() switch d.role { @@ -482,6 +492,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), nodeInf, + d.eventCount, ) d.discoverers = append(d.discoverers, eps) go eps.endpointSliceInf.Run(ctx.Done()) @@ -541,6 +552,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), nodeInf, + d.eventCount, ) d.discoverers = append(d.discoverers, eps) go eps.endpointsInf.Run(ctx.Done()) @@ -572,6 +584,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { log.With(d.logger, "role", "pod"), d.newPodsByNodeInformer(plw), nodeInformer, + d.eventCount, ) d.discoverers = append(d.discoverers, pod) go pod.podInf.Run(ctx.Done()) @@ -594,6 +607,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { svc := NewService( log.With(d.logger, "role", "service"), cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), + d.eventCount, ) d.discoverers = append(d.discoverers, svc) go svc.informer.Run(ctx.Done()) @@ -651,13 +665,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { ingress := NewIngress( log.With(d.logger, "role", "ingress"), informer, + d.eventCount, ) d.discoverers = append(d.discoverers, ingress) go ingress.informer.Run(ctx.Done()) } case RoleNode: nodeInformer := d.newNodeInformer(ctx) - node := NewNode(log.With(d.logger, "role", "node"), nodeInformer) + node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.eventCount) d.discoverers = append(d.discoverers, node) go node.informer.Run(ctx.Done()) default: diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go index d0ed4c6ca..71c937e94 100644 --- a/discovery/kubernetes/kubernetes_test.go +++ b/discovery/kubernetes/kubernetes_test.go @@ -29,6 +29,8 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/cache" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/testutil" @@ -49,13 +51,25 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery) fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: k8sVer} - return &Discovery{ + d := &Discovery{ client: clientset, logger: log.NewNopLogger(), role: role, namespaceDiscovery: &nsDiscovery, ownNamespace: "own-ns", - }, clientset + eventCount: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: discovery.KubernetesMetricsNamespace, + Name: "events_total", + Help: "The number of Kubernetes events handled.", + }, + []string{"role", "event"}, + ), + } + + d.metricRegisterer = discovery.NewMetricRegisterer(prometheus.NewRegistry(), []prometheus.Collector{d.eventCount}) + + return d, clientset } // makeDiscoveryWithMetadata creates a kubernetes.Discovery instance with the specified metadata config. diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index b188a3ceb..f1e37e6fa 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" @@ -35,12 +36,6 @@ const ( NodeLegacyHostIP = "LegacyHostIP" ) -var ( - nodeAddCount = eventCount.WithLabelValues("node", "add") - nodeUpdateCount = eventCount.WithLabelValues("node", "update") - nodeDeleteCount = eventCount.WithLabelValues("node", "delete") -) - // Node discovers Kubernetes nodes. type Node struct { logger log.Logger @@ -50,22 +45,22 @@ type Node struct { } // NewNode returns a new node discovery. -func NewNode(l log.Logger, inf cache.SharedInformer) *Node { +func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { if l == nil { l = log.NewNopLogger() } n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")} _, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - nodeAddCount.Inc() + eventCount.WithLabelValues("node", "add").Inc() n.enqueue(o) }, DeleteFunc: func(o interface{}) { - nodeDeleteCount.Inc() + eventCount.WithLabelValues("node", "delete").Inc() n.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - nodeUpdateCount.Inc() + eventCount.WithLabelValues("node", "update").Inc() n.enqueue(o) }, }) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 88da7bba6..cc809b29c 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -23,6 +23,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -34,12 +35,6 @@ import ( const nodeIndex = "node" -var ( - podAddCount = eventCount.WithLabelValues("pod", "add") - podUpdateCount = eventCount.WithLabelValues("pod", "update") - podDeleteCount = eventCount.WithLabelValues("pod", "delete") -) - // Pod discovers new pod targets. type Pod struct { podInf cache.SharedIndexInformer @@ -51,7 +46,7 @@ type Pod struct { } // NewPod creates a new pod discovery. -func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer) *Pod { +func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { if l == nil { l = log.NewNopLogger() } @@ -66,15 +61,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo } _, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - podAddCount.Inc() + eventCount.WithLabelValues("pod", "add").Inc() p.enqueue(o) }, DeleteFunc: func(o interface{}) { - podDeleteCount.Inc() + eventCount.WithLabelValues("pod", "delete").Inc() p.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - podUpdateCount.Inc() + eventCount.WithLabelValues("pod", "update").Inc() p.enqueue(o) }, }) diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 9fcc6644c..a680ebee8 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" @@ -30,12 +31,6 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - svcAddCount = eventCount.WithLabelValues("service", "add") - svcUpdateCount = eventCount.WithLabelValues("service", "update") - svcDeleteCount = eventCount.WithLabelValues("service", "delete") -) - // Service implements discovery of Kubernetes services. type Service struct { logger log.Logger @@ -45,22 +40,22 @@ type Service struct { } // NewService returns a new service discovery. -func NewService(l log.Logger, inf cache.SharedInformer) *Service { +func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { if l == nil { l = log.NewNopLogger() } s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")} _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - svcAddCount.Inc() + eventCount.WithLabelValues("service", "add").Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { - svcDeleteCount.Inc() + eventCount.WithLabelValues("service", "delete").Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - svcUpdateCount.Inc() + eventCount.WithLabelValues("service", "update").Inc() s.enqueue(o) }, }) diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go index 74c544e72..101012daf 100644 --- a/discovery/legacymanager/manager.go +++ b/discovery/legacymanager/manager.go @@ -28,48 +28,6 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - failedConfigs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "prometheus_sd_failed_configs", - Help: "Current number of service discovery configurations that failed to load.", - }, - []string{"name"}, - ) - discoveredTargets = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "prometheus_sd_discovered_targets", - Help: "Current number of discovered targets.", - }, - []string{"name", "config"}, - ) - receivedUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_received_updates_total", - Help: "Total number of update events received from the SD providers.", - }, - []string{"name"}, - ) - delayedUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_updates_delayed_total", - Help: "Total number of update events that couldn't be sent immediately.", - }, - []string{"name"}, - ) - sentUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_updates_total", - Help: "Total number of update events sent to the SD consumers.", - }, - []string{"name"}, - ) -) - -func RegisterMetrics() { - prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates) -} - type poolKey struct { setName string provider string @@ -84,7 +42,7 @@ type provider struct { } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, metrics *discovery.Metrics, options ...func(*Manager)) *Manager { if logger == nil { logger = log.NewNopLogger() } @@ -96,6 +54,8 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager ctx: ctx, updatert: 5 * time.Second, triggerSend: make(chan struct{}, 1), + registerer: registerer, + metrics: metrics, } for _, option := range options { option(mgr) @@ -135,6 +95,11 @@ type Manager struct { // The triggerSend channel signals to the manager that new updates have been received from providers. triggerSend chan struct{} + + // A registerer for all service discovery metrics. + registerer prometheus.Registerer + + metrics *discovery.Metrics } // Run starts the background processing. @@ -157,7 +122,7 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error { for pk := range m.targets { if _, ok := cfg[pk.setName]; !ok { - discoveredTargets.DeleteLabelValues(m.name, pk.setName) + m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName) } } m.cancelDiscoverers() @@ -168,9 +133,9 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error { failedCount := 0 for name, scfg := range cfg { failedCount += m.registerProviders(scfg, name) - discoveredTargets.WithLabelValues(m.name, name).Set(0) + m.metrics.DiscoveredTargets.WithLabelValues(m.name, name).Set(0) } - failedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) + m.metrics.FailedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) for _, prov := range m.providers { m.startProvider(m.ctx, prov) @@ -207,7 +172,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ case <-ctx.Done(): return case tgs, ok := <-updates: - receivedUpdates.WithLabelValues(m.name).Inc() + m.metrics.ReceivedUpdates.WithLabelValues(m.name).Inc() if !ok { level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) return @@ -236,11 +201,11 @@ func (m *Manager) sender() { case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. select { case <-m.triggerSend: - sentUpdates.WithLabelValues(m.name).Inc() + m.metrics.SentUpdates.WithLabelValues(m.name).Inc() select { case m.syncCh <- m.allGroups(): default: - delayedUpdates.WithLabelValues(m.name).Inc() + m.metrics.DelayedUpdates.WithLabelValues(m.name).Inc() level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: @@ -288,7 +253,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { } } for setName, v := range n { - discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) + m.metrics.DiscoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) } return tSets } @@ -309,7 +274,8 @@ func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int } typ := cfg.Name() d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), + Logger: log.With(m.logger, "discovery", typ, "config", setName), + Registerer: m.registerer, }) if err != nil { level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go index 13b84e6e3..dccb687c2 100644 --- a/discovery/legacymanager/manager_test.go +++ b/discovery/legacymanager/manager_test.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -35,6 +36,12 @@ func TestMain(m *testing.M) { testutil.TolerantVerifyLeak(m) } +func newTestDiscoveryMetrics(t *testing.T) *discovery.Metrics { + metrics, err := discovery.NewMetrics(prometheus.NewRegistry()) + require.NoError(t, err) + return metrics +} + // TestTargetUpdatesOrder checks that the target updates are received in the expected order. func TestTargetUpdatesOrder(t *testing.T) { // The order by which the updates are send is determined by the interval passed to the mock discovery adapter @@ -664,7 +671,7 @@ func TestTargetUpdatesOrder(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond var totalUpdatesCount int @@ -746,7 +753,7 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -774,7 +781,7 @@ func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { func TestDiscovererConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -798,7 +805,7 @@ func TestDiscovererConfigs(t *testing.T) { func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -837,7 +844,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, nil) + discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -868,7 +875,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -893,7 +900,7 @@ func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Disco func TestGaugeFailedConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -907,7 +914,7 @@ func TestGaugeFailedConfigs(t *testing.T) { discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - failedCount := client_testutil.ToFloat64(failedConfigs) + failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) if failedCount != 3 { t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) } @@ -918,7 +925,7 @@ func TestGaugeFailedConfigs(t *testing.T) { discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - failedCount = client_testutil.ToFloat64(failedConfigs) + failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) if failedCount != 0 { t.Fatalf("Expected to get no failed config, got: %v", failedCount) } @@ -1049,7 +1056,7 @@ func TestCoordinationWithReceiver(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - mgr := NewManager(ctx, nil) + mgr := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) mgr.updatert = updateDelay go mgr.Run() diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index a5e047b94..38a5cdad4 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -67,24 +67,15 @@ const ( ) // DefaultSDConfig is the default Linode SD configuration. -var ( - DefaultSDConfig = SDConfig{ - TagSeparator: ",", - Port: 80, - RefreshInterval: model.Duration(60 * time.Second), - HTTPClientConfig: config.DefaultHTTPClientConfig, - } - - failuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_linode_failures_total", - Help: "Number of Linode service discovery refresh failures.", - }) -) +var DefaultSDConfig = SDConfig{ + TagSeparator: ",", + Port: 80, + RefreshInterval: model.Duration(60 * time.Second), + HTTPClientConfig: config.DefaultHTTPClientConfig, +} func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(failuresCount) } // SDConfig is the configuration for Linode based service discovery. @@ -101,7 +92,7 @@ func (*SDConfig) Name() string { return "linode" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -131,16 +122,22 @@ type Discovery struct { pollCount int lastResults []*targetgroup.Group eventPollingEnabled bool + failuresCount prometheus.Counter } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { d := &Discovery{ port: conf.Port, tagSeparator: conf.TagSeparator, pollCount: 0, lastRefreshTimestamp: time.Now().UTC(), eventPollingEnabled: true, + failuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_linode_failures_total", + Help: "Number of Linode service discovery refresh failures.", + }), } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd") @@ -158,10 +155,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { d.client = &client d.Discovery = refresh.NewDiscovery( - logger, - "linode", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "linode", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + Metrics: []prometheus.Collector{d.failuresCount}, + }, ) return d, nil } @@ -222,14 +223,14 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro // Gather all linode instances. instances, err := d.client.ListInstances(ctx, &linodego.ListOptions{PageSize: 500}) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } // Gather detailed IP address info for all IPs on all linode instances. detailedIPs, err := d.client.ListIPAddresses(ctx, &linodego.ListOptions{PageSize: 500}) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go index db4ee9bf8..536b12090 100644 --- a/discovery/linode/linode_test.go +++ b/discovery/linode/linode_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -52,7 +53,7 @@ func TestLinodeSDRefresh(t *testing.T) { Credentials: tokenID, Type: "Bearer", } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/manager.go b/discovery/manager.go index 86439d2c9..6afa1b622 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -28,48 +28,6 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - failedConfigs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "prometheus_sd_failed_configs", - Help: "Current number of service discovery configurations that failed to load.", - }, - []string{"name"}, - ) - discoveredTargets = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "prometheus_sd_discovered_targets", - Help: "Current number of discovered targets.", - }, - []string{"name", "config"}, - ) - receivedUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_received_updates_total", - Help: "Total number of update events received from the SD providers.", - }, - []string{"name"}, - ) - delayedUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_updates_delayed_total", - Help: "Total number of update events that couldn't be sent immediately.", - }, - []string{"name"}, - ) - sentUpdates = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_updates_total", - Help: "Total number of update events sent to the SD consumers.", - }, - []string{"name"}, - ) -) - -func RegisterMetrics() { - prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates) -} - type poolKey struct { setName string provider string @@ -107,7 +65,7 @@ func (p *Provider) Config() interface{} { } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, metrics *Metrics, options ...func(*Manager)) *Manager { if logger == nil { logger = log.NewNopLogger() } @@ -118,6 +76,8 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager ctx: ctx, updatert: 5 * time.Second, triggerSend: make(chan struct{}, 1), + registerer: registerer, + metrics: metrics, } for _, option := range options { option(mgr) @@ -170,6 +130,11 @@ type Manager struct { // lastProvider counts providers registered during Manager's lifetime. lastProvider uint + + // A registerer for all service discovery metrics. + registerer prometheus.Registerer + + metrics *Metrics } // Providers returns the currently configured SD providers. @@ -200,7 +165,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { for name, scfg := range cfg { failedCount += m.registerProviders(scfg, name) } - failedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) + m.metrics.FailedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) var ( wg sync.WaitGroup @@ -230,13 +195,13 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { // Remove obsolete subs' targets. if _, ok := prov.newSubs[s]; !ok { delete(m.targets, poolKey{s, prov.name}) - discoveredTargets.DeleteLabelValues(m.name, s) + m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, s) } } // Set metrics and targets for new subs. for s := range prov.newSubs { if _, ok := prov.subs[s]; !ok { - discoveredTargets.WithLabelValues(m.name, s).Set(0) + m.metrics.DiscoveredTargets.WithLabelValues(m.name, s).Set(0) } if l := len(refTargets); l > 0 { m.targets[poolKey{s, prov.name}] = make(map[string]*targetgroup.Group, l) @@ -316,7 +281,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ case <-ctx.Done(): return case tgs, ok := <-updates: - receivedUpdates.WithLabelValues(m.name).Inc() + m.metrics.ReceivedUpdates.WithLabelValues(m.name).Inc() if !ok { level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) // Wait for provider cancellation to ensure targets are cleaned up when expected. @@ -349,11 +314,11 @@ func (m *Manager) sender() { case <-ticker.C: // Some discoverers send updates too often, so we throttle these with the ticker. select { case <-m.triggerSend: - sentUpdates.WithLabelValues(m.name).Inc() + m.metrics.SentUpdates.WithLabelValues(m.name).Inc() select { case m.syncCh <- m.allGroups(): default: - delayedUpdates.WithLabelValues(m.name).Inc() + m.metrics.DelayedUpdates.WithLabelValues(m.name).Inc() level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: @@ -405,7 +370,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { } } for setName, v := range n { - discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) + m.metrics.DiscoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) } return tSets } @@ -428,6 +393,7 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int { d, err := cfg.NewDiscoverer(DiscovererOptions{ Logger: log.With(m.logger, "discovery", typ, "config", setName), HTTPClientOptions: m.httpOpts, + Registerer: m.registerer, }) if err != nil { level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 796b01458..3cb277343 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -35,6 +36,12 @@ func TestMain(m *testing.M) { testutil.TolerantVerifyLeak(m) } +func newTestDiscoveryMetrics(t *testing.T) *Metrics { + metrics, err := NewMetrics(prometheus.NewRegistry()) + require.NoError(t, err) + return metrics +} + // TestTargetUpdatesOrder checks that the target updates are received in the expected order. func TestTargetUpdatesOrder(t *testing.T) { // The order by which the updates are send is determined by the interval passed to the mock discovery adapter @@ -664,7 +671,7 @@ func TestTargetUpdatesOrder(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond var totalUpdatesCount int @@ -778,7 +785,7 @@ func pk(provider, setName string, n int) poolKey { func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -810,7 +817,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -845,7 +852,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -883,7 +890,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -944,7 +951,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -983,7 +990,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { func TestDiscovererConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1015,7 +1022,7 @@ func TestDiscovererConfigs(t *testing.T) { func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1062,7 +1069,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, nil) + discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1098,7 +1105,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1144,7 +1151,7 @@ func (s lockStaticDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup. func TestGaugeFailedConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1158,7 +1165,7 @@ func TestGaugeFailedConfigs(t *testing.T) { discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - failedCount := client_testutil.ToFloat64(failedConfigs) + failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) if failedCount != 3 { t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) } @@ -1169,7 +1176,7 @@ func TestGaugeFailedConfigs(t *testing.T) { discoveryManager.ApplyConfig(c) <-discoveryManager.SyncCh() - failedCount = client_testutil.ToFloat64(failedConfigs) + failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) if failedCount != 0 { t.Fatalf("Expected to get no failed config, got: %v", failedCount) } @@ -1300,7 +1307,7 @@ func TestCoordinationWithReceiver(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - mgr := NewManager(ctx, nil) + mgr := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) mgr.updatert = updateDelay go mgr.Run() @@ -1392,10 +1399,10 @@ func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { // TestTargetSetTargetGroupsUpdateDuringApplyConfig is used to detect races when // ApplyConfig happens at the same time as targets update. -func TestTargetSetTargetGroupsUpdateDuringApplyConfig(*testing.T) { +func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger()) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 27947fa8a..a6a6252fd 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -28,6 +28,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "marathon" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(*c, opts.Logger) + return NewDiscovery(*c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -132,7 +133,7 @@ type Discovery struct { } // NewDiscovery returns a new Marathon Discovery. -func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd") if err != nil { return nil, err @@ -154,10 +155,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { appsClient: fetchApps, } d.Discovery = refresh.NewDiscovery( - logger, - "marathon", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "marathon", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/marathon/marathon_test.go b/discovery/marathon/marathon_test.go index 258e3c8dd..a1ddce930 100644 --- a/discovery/marathon/marathon_test.go +++ b/discovery/marathon/marathon_test.go @@ -21,6 +21,7 @@ import ( "net/http/httptest" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -36,7 +37,7 @@ func testConfig() SDConfig { } func testUpdateServices(client appListClient) ([]*targetgroup.Group, error) { - md, err := NewDiscovery(testConfig(), nil) + md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry()) if err != nil { return nil, err } @@ -129,7 +130,7 @@ func TestMarathonSDSendGroup(t *testing.T) { } func TestMarathonSDRemoveApp(t *testing.T) { - md, err := NewDiscovery(testConfig(), nil) + md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry()) if err != nil { t.Fatalf("%s", err) } diff --git a/discovery/metrics.go b/discovery/metrics.go new file mode 100644 index 000000000..04f54b542 --- /dev/null +++ b/discovery/metrics.go @@ -0,0 +1,103 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package discovery + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + clientGoRequestMetrics = &clientGoRequestMetricAdapter{} + clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} +) + +func init() { + clientGoRequestMetrics.RegisterWithK8sGoClient() + clientGoWorkloadMetrics.RegisterWithK8sGoClient() +} + +// Metrics to be used with a discovery manager. +type Metrics struct { + FailedConfigs *prometheus.GaugeVec + DiscoveredTargets *prometheus.GaugeVec + ReceivedUpdates *prometheus.CounterVec + DelayedUpdates *prometheus.CounterVec + SentUpdates *prometheus.CounterVec +} + +func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { + m := &Metrics{} + + m.FailedConfigs = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_sd_failed_configs", + Help: "Current number of service discovery configurations that failed to load.", + }, + []string{"name"}, + ) + + m.DiscoveredTargets = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_sd_discovered_targets", + Help: "Current number of discovered targets.", + }, + []string{"name", "config"}, + ) + + m.ReceivedUpdates = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_sd_received_updates_total", + Help: "Total number of update events received from the SD providers.", + }, + []string{"name"}, + ) + + m.DelayedUpdates = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_sd_updates_delayed_total", + Help: "Total number of update events that couldn't be sent immediately.", + }, + []string{"name"}, + ) + + m.SentUpdates = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_sd_updates_total", + Help: "Total number of update events sent to the SD consumers.", + }, + []string{"name"}, + ) + + metrics := append( + []prometheus.Collector{ + m.FailedConfigs, + m.DiscoveredTargets, + m.ReceivedUpdates, + m.DelayedUpdates, + m.SentUpdates, + }, + clientGoMetrics()..., + ) + + for _, collector := range metrics { + err := registerer.Register(collector) + if err != nil { + return nil, fmt.Errorf("failed to register discovery manager metrics: %w", err) + } + } + + return m, nil +} diff --git a/discovery/kubernetes/client_metrics.go b/discovery/metrics_k8s_client.go similarity index 85% rename from discovery/kubernetes/client_metrics.go rename to discovery/metrics_k8s_client.go index 7b097b14a..4f161bc3e 100644 --- a/discovery/kubernetes/client_metrics.go +++ b/discovery/metrics_k8s_client.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package kubernetes +package discovery import ( "context" @@ -23,13 +23,22 @@ import ( "k8s.io/client-go/util/workqueue" ) -const workqueueMetricsNamespace = metricsNamespace + "_workqueue" +// This file registers metrics used by the Kubernetes Go client (k8s.io/client-go). +// Unfortunately, k8s.io/client-go metrics are global. +// If we instantiate multiple k8s SD instances, their k8s/client-go metrics will overlap. +// To prevent us from displaying misleading metrics, we register k8s.io/client-go metrics +// outside of the Kubernetes SD. + +const ( + KubernetesMetricsNamespace = "prometheus_sd_kubernetes" + workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue" +) var ( // Metrics for client-go's HTTP requests. clientGoRequestResultMetricVec = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: metricsNamespace, + Namespace: KubernetesMetricsNamespace, Name: "http_request_total", Help: "Total number of HTTP requests to the Kubernetes API by status code.", }, @@ -37,7 +46,7 @@ var ( ) clientGoRequestLatencyMetricVec = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Namespace: metricsNamespace, + Namespace: KubernetesMetricsNamespace, Name: "http_request_duration_seconds", Help: "Summary of latencies for HTTP requests to the Kubernetes API by endpoint.", Objectives: map[float64]float64{}, @@ -109,17 +118,28 @@ func (noopMetric) Set(float64) {} // Definition of client-go metrics adapters for HTTP requests observation. type clientGoRequestMetricAdapter struct{} -func (f *clientGoRequestMetricAdapter) Register(registerer prometheus.Registerer) { +// Returns all of the Prometheus metrics derived from k8s.io/client-go. +// This may be used tu register and unregister the metrics. +func clientGoMetrics() []prometheus.Collector { + return []prometheus.Collector{ + clientGoRequestResultMetricVec, + clientGoRequestLatencyMetricVec, + clientGoWorkqueueDepthMetricVec, + clientGoWorkqueueAddsMetricVec, + clientGoWorkqueueLatencyMetricVec, + clientGoWorkqueueUnfinishedWorkSecondsMetricVec, + clientGoWorkqueueLongestRunningProcessorMetricVec, + clientGoWorkqueueWorkDurationMetricVec, + } +} + +func (f *clientGoRequestMetricAdapter) RegisterWithK8sGoClient() { metrics.Register( metrics.RegisterOpts{ RequestLatency: f, RequestResult: f, }, ) - registerer.MustRegister( - clientGoRequestResultMetricVec, - clientGoRequestLatencyMetricVec, - ) } func (clientGoRequestMetricAdapter) Increment(_ context.Context, code, _, _ string) { @@ -133,16 +153,8 @@ func (clientGoRequestMetricAdapter) Observe(_ context.Context, _ string, u url.U // Definition of client-go workqueue metrics provider definition. type clientGoWorkqueueMetricsProvider struct{} -func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Registerer) { +func (f *clientGoWorkqueueMetricsProvider) RegisterWithK8sGoClient() { workqueue.SetProvider(f) - registerer.MustRegister( - clientGoWorkqueueDepthMetricVec, - clientGoWorkqueueAddsMetricVec, - clientGoWorkqueueLatencyMetricVec, - clientGoWorkqueueWorkDurationMetricVec, - clientGoWorkqueueUnfinishedWorkSecondsMetricVec, - clientGoWorkqueueLongestRunningProcessorMetricVec, - ) } func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 162833ece..a13bb8704 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -26,6 +26,7 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -80,7 +81,7 @@ func (*DockerSDConfig) Name() string { return "docker" } // NewDiscoverer returns a Discoverer for the Config. func (c *DockerSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDockerDiscovery(c, opts.Logger) + return NewDockerDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -114,7 +115,7 @@ type DockerDiscovery struct { } // NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets. -func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscovery, error) { +func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, reg prometheus.Registerer) (*DockerDiscovery, error) { var err error d := &DockerDiscovery{ @@ -165,10 +166,13 @@ func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscove } d.Discovery = refresh.NewDiscovery( - logger, - "docker", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "docker", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index f80c53b61..1a87ad2a1 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -37,7 +38,7 @@ host: %s var cfg DockerSDConfig require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg)) - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go index 371f9d5ed..bd87fea5a 100644 --- a/discovery/moby/dockerswarm.go +++ b/discovery/moby/dockerswarm.go @@ -23,6 +23,7 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -74,7 +75,7 @@ func (*DockerSwarmSDConfig) Name() string { return "dockerswarm" } // NewDiscoverer returns a Discoverer for the Config. func (c *DockerSwarmSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -117,7 +118,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { var err error d := &Discovery{ @@ -168,10 +169,13 @@ func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, err } d.Discovery = refresh.NewDiscovery( - logger, - "dockerswarm", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "dockerswarm", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go index 2bc383374..512ff7049 100644 --- a/discovery/moby/nodes_test.go +++ b/discovery/moby/nodes_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -38,7 +39,7 @@ host: %s var cfg DockerSwarmSDConfig require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg)) - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go index 81c8d31f1..816586dd7 100644 --- a/discovery/moby/services_test.go +++ b/discovery/moby/services_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -38,7 +39,7 @@ host: %s var cfg DockerSwarmSDConfig require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg)) - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() @@ -332,7 +333,7 @@ filters: var cfg DockerSwarmSDConfig require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg)) - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go index eed5f2924..764fda343 100644 --- a/discovery/moby/tasks_test.go +++ b/discovery/moby/tasks_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -38,7 +39,7 @@ host: %s var cfg DockerSwarmSDConfig require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg)) - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go index 7013f0737..3fdcf714e 100644 --- a/discovery/nomad/nomad.go +++ b/discovery/nomad/nomad.go @@ -49,27 +49,18 @@ const ( ) // DefaultSDConfig is the default nomad SD configuration. -var ( - DefaultSDConfig = SDConfig{ - AllowStale: true, - HTTPClientConfig: config.DefaultHTTPClientConfig, - Namespace: "default", - RefreshInterval: model.Duration(60 * time.Second), - Region: "global", - Server: "http://localhost:4646", - TagSeparator: ",", - } - - failuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prometheus_sd_nomad_failures_total", - Help: "Number of nomad service discovery refresh failures.", - }) -) +var DefaultSDConfig = SDConfig{ + AllowStale: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + Namespace: "default", + RefreshInterval: model.Duration(60 * time.Second), + Region: "global", + Server: "http://localhost:4646", + TagSeparator: ",", +} func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(failuresCount) } // SDConfig is the configuration for nomad based service discovery. @@ -88,7 +79,7 @@ func (*SDConfig) Name() string { return "nomad" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -121,10 +112,11 @@ type Discovery struct { region string server string tagSeparator string + failuresCount prometheus.Counter } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { d := &Discovery{ allowStale: conf.AllowStale, namespace: conf.Namespace, @@ -132,6 +124,11 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { region: conf.Region, server: conf.Server, tagSeparator: conf.TagSeparator, + failuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_nomad_failures_total", + Help: "Number of nomad service discovery refresh failures.", + }), } HTTPClient, err := config.NewClientFromConfig(conf.HTTPClientConfig, "nomad_sd") @@ -153,10 +150,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { d.client = client d.Discovery = refresh.NewDiscovery( - logger, - "nomad", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "nomad", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + Metrics: []prometheus.Collector{d.failuresCount}, + }, ) return d, nil } @@ -167,7 +168,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) { } stubs, _, err := d.client.Services().List(opts) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, err } @@ -179,7 +180,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) { for _, service := range stub.Services { instances, _, err := d.client.Services().Get(service.ServiceName, opts) if err != nil { - failuresCount.Inc() + d.failuresCount.Inc() return nil, fmt.Errorf("failed to fetch services: %w", err) } diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go index f9490f476..ca67a877e 100644 --- a/discovery/nomad/nomad_test.go +++ b/discovery/nomad/nomad_test.go @@ -22,6 +22,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" ) @@ -127,7 +128,7 @@ func TestConfiguredService(t *testing.T) { conf := &SDConfig{ Server: "http://localhost:4646", } - _, err := NewDiscovery(conf, nil) + _, err := NewDiscovery(conf, nil, prometheus.NewRegistry()) require.NoError(t, err) } @@ -141,7 +142,7 @@ func TestNomadSDRefresh(t *testing.T) { cfg := DefaultSDConfig cfg.Server = endpoint.String() - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) tgs, err := d.refresh(context.Background()) diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go index 92c83a4cf..9544a7c0f 100644 --- a/discovery/openstack/openstack.go +++ b/discovery/openstack/openstack.go @@ -24,6 +24,7 @@ import ( "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/mwitkow/go-conntrack" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -70,7 +71,7 @@ func (*SDConfig) Name() string { return "openstack" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -134,16 +135,19 @@ type refresher interface { } // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, l log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) { r, err := newRefresher(conf, l) if err != nil { return nil, err } return refresh.NewDiscovery( - l, - "openstack", - time.Duration(conf.RefreshInterval), - r.refresh, + refresh.Options{ + Logger: l, + Mech: "openstack", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: r.refresh, + Registry: reg, + }, ), nil } diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go index 535ade4df..eca284a85 100644 --- a/discovery/ovhcloud/ovhcloud.go +++ b/discovery/ovhcloud/ovhcloud.go @@ -22,6 +22,7 @@ import ( "github.com/go-kit/log" "github.com/ovh/go-ovh/ovh" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -93,7 +94,7 @@ func createClient(config *SDConfig) (*ovh.Client, error) { // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, options.Logger) + return NewDiscovery(c, options.Logger, options.Registerer) } func init() { @@ -140,16 +141,19 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { } // NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) { r, err := newRefresher(conf, logger) if err != nil { return nil, err } return refresh.NewDiscovery( - logger, - "ovhcloud", - time.Duration(conf.RefreshInterval), - r.refresh, + refresh.Options{ + Logger: logger, + Mech: "ovhcloud", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: r.refresh, + Registry: reg, + }, ), nil } diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go index efcd95bb0..9bd9ea954 100644 --- a/discovery/ovhcloud/ovhcloud_test.go +++ b/discovery/ovhcloud/ovhcloud_test.go @@ -18,6 +18,7 @@ import ( "fmt" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -122,7 +123,8 @@ func TestDiscoverer(t *testing.T) { conf, _ := getMockConf("vps") logger := testutil.NewLogger(t) _, err := conf.NewDiscoverer(discovery.DiscovererOptions{ - Logger: logger, + Logger: logger, + Registerer: prometheus.NewRegistry(), }) require.NoError(t, err) diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 9484a0aa6..616f2c61e 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -29,6 +29,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/regexp" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "puppetdb" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -130,7 +131,7 @@ type Discovery struct { } // NewDiscovery returns a new PuppetDB discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { if logger == nil { logger = log.NewNopLogger() } @@ -156,10 +157,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { } d.Discovery = refresh.NewDiscovery( - logger, - "http", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "http", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go index 236efec16..edd9b9d04 100644 --- a/discovery/puppetdb/puppetdb_test.go +++ b/discovery/puppetdb/puppetdb_test.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -62,7 +63,7 @@ func TestPuppetSlashInURL(t *testing.T) { Port: 80, RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) require.Equal(t, apiURL, d.url) } @@ -79,7 +80,7 @@ func TestPuppetDBRefresh(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() @@ -120,7 +121,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() @@ -172,7 +173,7 @@ func TestPuppetDBInvalidCode(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() @@ -193,7 +194,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) { RefreshInterval: model.Duration(30 * time.Second), } - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go index 919567a53..0b0e5a921 100644 --- a/discovery/refresh/refresh.go +++ b/discovery/refresh/refresh.go @@ -22,29 +22,17 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) -var ( - failuresCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "prometheus_sd_refresh_failures_total", - Help: "Number of refresh failures for the given SD mechanism.", - }, - []string{"mechanism"}, - ) - duration = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Name: "prometheus_sd_refresh_duration_seconds", - Help: "The duration of a refresh in seconds for the given SD mechanism.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, - []string{"mechanism"}, - ) -) - -func init() { - prometheus.MustRegister(duration, failuresCount) +type Options struct { + Logger log.Logger + Mech string + Interval time.Duration + RefreshF func(ctx context.Context) ([]*targetgroup.Group, error) + Registry prometheus.Registerer + Metrics []prometheus.Collector } // Discovery implements the Discoverer interface. @@ -54,25 +42,62 @@ type Discovery struct { refreshf func(ctx context.Context) ([]*targetgroup.Group, error) failures prometheus.Counter - duration prometheus.Observer + duration prometheus.Summary + + metricRegisterer discovery.MetricRegisterer } // NewDiscovery returns a Discoverer function that calls a refresh() function at every interval. -func NewDiscovery(l log.Logger, mech string, interval time.Duration, refreshf func(ctx context.Context) ([]*targetgroup.Group, error)) *Discovery { - if l == nil { - l = log.NewNopLogger() +func NewDiscovery(opts Options) *Discovery { + var logger log.Logger + if opts.Logger == nil { + logger = log.NewNopLogger() + } else { + logger = opts.Logger } - return &Discovery{ - logger: l, - interval: interval, - refreshf: refreshf, - failures: failuresCount.WithLabelValues(mech), - duration: duration.WithLabelValues(mech), + + d := Discovery{ + logger: logger, + interval: opts.Interval, + refreshf: opts.RefreshF, + failures: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_sd_refresh_failures_total", + Help: "Number of refresh failures for the given SD mechanism.", + ConstLabels: prometheus.Labels{ + "mechanism": opts.Mech, + }, + }), + duration: prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "prometheus_sd_refresh_duration_seconds", + Help: "The duration of a refresh in seconds for the given SD mechanism.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + ConstLabels: prometheus.Labels{ + "mechanism": opts.Mech, + }, + }), } + + metrics := []prometheus.Collector{d.failures, d.duration} + if opts.Metrics != nil { + metrics = append(metrics, opts.Metrics...) + } + + d.metricRegisterer = discovery.NewMetricRegisterer(opts.Registry, metrics) + + return &d } // Run implements the Discoverer interface. func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { + err := d.metricRegisterer.RegisterMetrics() + if err != nil { + level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error()) + return + } + defer d.metricRegisterer.UnregisterMetrics() + // Get an initial set right away. tgs, err := d.refresh(ctx) if err != nil { diff --git a/discovery/refresh/refresh_test.go b/discovery/refresh/refresh_test.go index 6decef19f..12e7ab3be 100644 --- a/discovery/refresh/refresh_test.go +++ b/discovery/refresh/refresh_test.go @@ -19,6 +19,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/goleak" @@ -65,7 +66,15 @@ func TestRefresh(t *testing.T) { return nil, fmt.Errorf("some error") } interval := time.Millisecond - d := NewDiscovery(nil, "test", interval, refresh) + d := NewDiscovery( + Options{ + Logger: nil, + Mech: "test", + Interval: interval, + RefreshF: refresh, + Registry: prometheus.NewRegistry(), + }, + ) ch := make(chan []*targetgroup.Group) ctx, cancel := context.WithCancel(context.Background()) diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go index 90091b317..86527b34e 100644 --- a/discovery/scaleway/scaleway.go +++ b/discovery/scaleway/scaleway.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/scaleway/scaleway-sdk-go/scw" @@ -160,7 +161,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { } func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(&c, options.Logger) + return NewDiscovery(&c, options.Logger, options.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -177,17 +178,20 @@ func init() { // the Discoverer interface. type Discovery struct{} -func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) { r, err := newRefresher(conf) if err != nil { return nil, err } return refresh.NewDiscovery( - logger, - "scaleway", - time.Duration(conf.RefreshInterval), - r.refresh, + refresh.Options{ + Logger: logger, + Mech: "scaleway", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: r.refresh, + Registry: reg, + }, ), nil } diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go index c83f3b34a..4839827ad 100644 --- a/discovery/triton/triton.go +++ b/discovery/triton/triton.go @@ -26,6 +26,7 @@ import ( "github.com/go-kit/log" "github.com/mwitkow/go-conntrack" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -74,7 +75,7 @@ func (*SDConfig) Name() string { return "triton" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return New(opts.Logger, c) + return New(opts.Logger, c, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -138,7 +139,7 @@ type Discovery struct { } // New returns a new Discovery which periodically refreshes its targets. -func New(logger log.Logger, conf *SDConfig) (*Discovery, error) { +func New(logger log.Logger, conf *SDConfig, reg prometheus.Registerer) (*Discovery, error) { tls, err := config.NewTLSConfig(&conf.TLSConfig) if err != nil { return nil, err @@ -159,10 +160,13 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) { sdConfig: conf, } d.Discovery = refresh.NewDiscovery( - logger, - "triton", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "triton", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go index 0ed9daa68..fa51a2e47 100644 --- a/discovery/triton/triton_test.go +++ b/discovery/triton/triton_test.go @@ -24,6 +24,7 @@ import ( "strings" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -79,7 +80,7 @@ var ( ) func newTritonDiscovery(c SDConfig) (*Discovery, error) { - return New(nil, &c) + return New(nil, &c, prometheus.NewRegistry()) } func TestTritonSDNew(t *testing.T) { diff --git a/discovery/util.go b/discovery/util.go new file mode 100644 index 000000000..83cc640dd --- /dev/null +++ b/discovery/util.go @@ -0,0 +1,72 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package discovery + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" +) + +// A utility to be used by implementations of discovery.Discoverer +// which need to manage the lifetime of their metrics. +type MetricRegisterer interface { + RegisterMetrics() error + UnregisterMetrics() +} + +// metricRegistererImpl is an implementation of MetricRegisterer. +type metricRegistererImpl struct { + reg prometheus.Registerer + metrics []prometheus.Collector +} + +var _ MetricRegisterer = &metricRegistererImpl{} + +// Creates an instance of a MetricRegisterer. +// Typically called inside the implementation of the NewDiscoverer() method. +func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer { + return &metricRegistererImpl{ + reg: reg, + metrics: metrics, + } +} + +// RegisterMetrics registers the metrics with a Prometheus registerer. +// If any metric fails to register, it will unregister all metrics that +// were registered so far, and return an error. +// Typically called at the start of the SD's Run() method. +func (rh *metricRegistererImpl) RegisterMetrics() error { + for _, collector := range rh.metrics { + err := rh.reg.Register(collector) + if err != nil { + // Unregister all metrics that were registered so far. + // This is so that if RegisterMetrics() gets called again, + // there will not be an error due to a duplicate registration. + rh.UnregisterMetrics() + + return fmt.Errorf("failed to register metric: %w", err) + } + } + return nil +} + +// UnregisterMetrics unregisters the metrics from the same Prometheus +// registerer which was used to register them. +// Typically called at the end of the SD's Run() method by a defer statement. +func (rh *metricRegistererImpl) UnregisterMetrics() { + for _, collector := range rh.metrics { + rh.reg.Unregister(collector) + } +} diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index bc33d28cb..744f3f96c 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -25,6 +25,7 @@ import ( "github.com/go-kit/log" "github.com/kolo/xmlrpc" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -115,7 +116,7 @@ func (*SDConfig) Name() string { return "uyuni" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -203,7 +204,7 @@ func getEndpointInfoForSystems( } // NewDiscovery returns a uyuni discovery for the given configuration. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { apiURL, err := url.Parse(conf.Server) if err != nil { return nil, err @@ -227,10 +228,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { } d.Discovery = refresh.NewDiscovery( - logger, - "uyuni", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "uyuni", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/uyuni/uyuni_test.go b/discovery/uyuni/uyuni_test.go index 9c910a3a3..fd03c88f1 100644 --- a/discovery/uyuni/uyuni_test.go +++ b/discovery/uyuni/uyuni_test.go @@ -23,6 +23,8 @@ import ( "github.com/stretchr/testify/require" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -35,7 +37,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err Server: ts.URL, } - md, err := NewDiscovery(&conf, nil) + md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry()) if err != nil { return nil, err } @@ -108,7 +110,7 @@ func TestUyuniSDSkipLogin(t *testing.T) { Server: ts.URL, } - md, err := NewDiscovery(&conf, nil) + md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry()) if err != nil { t.Error(err) } diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go index 42881d3c1..129800048 100644 --- a/discovery/vultr/vultr.go +++ b/discovery/vultr/vultr.go @@ -23,6 +23,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -78,7 +79,7 @@ func (*SDConfig) Name() string { return "vultr" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Registerer) } // SetDirectory joins any relative file paths with dir. @@ -106,7 +107,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) { d := &Discovery{ port: conf.Port, } @@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { } d.Discovery = refresh.NewDiscovery( - logger, - "vultr", - time.Duration(conf.RefreshInterval), - d.refresh, + refresh.Options{ + Logger: logger, + Mech: "vultr", + Interval: time.Duration(conf.RefreshInterval), + RefreshF: d.refresh, + Registry: reg, + }, ) return d, nil } diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go index 0977238e0..c50b11d2d 100644 --- a/discovery/vultr/vultr_test.go +++ b/discovery/vultr/vultr_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" ) @@ -46,7 +47,7 @@ func TestVultrSDRefresh(t *testing.T) { cfg := DefaultSDConfig cfg.HTTPClientConfig.BearerToken = APIKey - d, err := NewDiscovery(&cfg, log.NewNopLogger()) + d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) endpoint, err := url.Parse(sdMock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index bc88ba554..5ac4a42a8 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -30,35 +30,12 @@ import ( "github.com/prometheus/prometheus/util/strutil" ) -var ( - // DefaultKumaSDConfig is the default Kuma MADS SD configuration. - DefaultKumaSDConfig = KumaSDConfig{ - HTTPClientConfig: config.DefaultHTTPClientConfig, - RefreshInterval: model.Duration(15 * time.Second), - FetchTimeout: model.Duration(2 * time.Minute), - } - - kumaFetchFailuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_kuma_fetch_failures_total", - Help: "The number of Kuma MADS fetch call failures.", - }) - kumaFetchSkipUpdateCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_kuma_fetch_skipped_updates_total", - Help: "The number of Kuma MADS fetch calls that result in no updates to the targets.", - }) - kumaFetchDuration = prometheus.NewSummary( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "sd_kuma_fetch_duration_seconds", - Help: "The duration of a Kuma MADS fetch call.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, - ) -) +// DefaultKumaSDConfig is the default Kuma MADS SD configuration. +var DefaultKumaSDConfig = KumaSDConfig{ + HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(15 * time.Second), + FetchTimeout: model.Duration(2 * time.Minute), +} const ( // kumaMetaLabelPrefix is the meta prefix used for all kuma meta labels. @@ -120,7 +97,7 @@ func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discover logger = log.NewNopLogger() } - return NewKumaHTTPDiscovery(c, logger) + return NewKumaHTTPDiscovery(c, logger, opts.Registerer) } func convertKumaV1MonitoringAssignment(assignment *MonitoringAssignment) []model.LabelSet { @@ -176,7 +153,7 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L return targets, nil } -func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Discoverer, error) { +func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, reg prometheus.Registerer) (discovery.Discoverer, error) { // Default to "prometheus" if hostname is unavailable. clientID, err := osutil.GetFQDN() if err != nil { @@ -203,15 +180,41 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Disc } d := &fetchDiscovery{ - client: client, - logger: logger, - refreshInterval: time.Duration(conf.RefreshInterval), - source: "kuma", - parseResources: kumaMadsV1ResourceParser, - fetchFailuresCount: kumaFetchFailuresCount, - fetchSkipUpdateCount: kumaFetchSkipUpdateCount, - fetchDuration: kumaFetchDuration, + client: client, + logger: logger, + refreshInterval: time.Duration(conf.RefreshInterval), + source: "kuma", + parseResources: kumaMadsV1ResourceParser, + fetchFailuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_kuma_fetch_failures_total", + Help: "The number of Kuma MADS fetch call failures.", + }), + fetchSkipUpdateCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_kuma_fetch_skipped_updates_total", + Help: "The number of Kuma MADS fetch calls that result in no updates to the targets.", + }), + fetchDuration: prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_kuma_fetch_duration_seconds", + Help: "The duration of a Kuma MADS fetch call.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + ), } + d.metricRegisterer = discovery.NewMetricRegisterer( + reg, + []prometheus.Collector{ + d.fetchFailuresCount, + d.fetchSkipUpdateCount, + d.fetchDuration, + }, + ) + return d, nil } diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index 581be9fb1..0626f82a0 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -21,6 +21,7 @@ import ( "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" @@ -107,7 +108,7 @@ func getKumaMadsV1DiscoveryResponse(resources ...*MonitoringAssignment) (*v3.Dis } func newKumaTestHTTPDiscovery(c KumaSDConfig) (*fetchDiscovery, error) { - kd, err := NewKumaHTTPDiscovery(&c, nopLogger) + kd, err := NewKumaHTTPDiscovery(&c, nopLogger, prometheus.NewRegistry()) if err != nil { return nil, err } diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 48bdbab02..a04fe6862 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -69,9 +69,6 @@ func init() { // Register top-level SD Configs. discovery.RegisterConfig(&KumaSDConfig{}) - // Register metrics. - prometheus.MustRegister(kumaFetchDuration, kumaFetchSkipUpdateCount, kumaFetchFailuresCount) - // Register protobuf types that need to be marshalled/ unmarshalled. mustRegisterMessage(protoTypes, (&v3.DiscoveryRequest{}).ProtoReflect().Type()) mustRegisterMessage(protoTypes, (&v3.DiscoveryResponse{}).ProtoReflect().Type()) @@ -109,12 +106,20 @@ type fetchDiscovery struct { parseResources resourceParser logger log.Logger - fetchDuration prometheus.Observer + fetchDuration prometheus.Summary fetchSkipUpdateCount prometheus.Counter fetchFailuresCount prometheus.Counter + + metricRegisterer discovery.MetricRegisterer } func (d *fetchDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { + err := d.metricRegisterer.RegisterMetrics() + if err != nil { + level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error()) + return + } + defer d.metricRegisterer.UnregisterMetrics() defer d.client.Close() ticker := time.NewTicker(d.refreshInterval) diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index ae656db19..b712749df 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -28,8 +28,10 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + prom_discovery "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/documentation/examples/custom-sd/adapter" "github.com/prometheus/prometheus/util/strutil" @@ -268,7 +270,13 @@ func main() { if err != nil { fmt.Println("err: ", err) } - sdAdapter := adapter.NewAdapter(ctx, *outputFile, "exampleSD", disc, logger) + + discoveryMetrics, err := prom_discovery.NewMetrics(prometheus.DefaultRegisterer) + if err != nil { + level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + os.Exit(1) + } + sdAdapter := adapter.NewAdapter(ctx, *outputFile, "exampleSD", disc, logger, discoveryMetrics) sdAdapter.Run() <-ctx.Done() diff --git a/documentation/examples/custom-sd/adapter/adapter.go b/documentation/examples/custom-sd/adapter/adapter.go index 57c32ce49..8aedf0084 100644 --- a/documentation/examples/custom-sd/adapter/adapter.go +++ b/documentation/examples/custom-sd/adapter/adapter.go @@ -25,6 +25,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery" @@ -162,12 +163,12 @@ func (a *Adapter) Run() { } // NewAdapter creates a new instance of Adapter. -func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger) *Adapter { +func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger, metrics *discovery.Metrics) *Adapter { return &Adapter{ ctx: ctx, disc: d, groups: make(map[string]*customSD), - manager: discovery.NewManager(ctx, logger), + manager: discovery.NewManager(ctx, logger, prometheus.NewRegistry(), metrics), output: file, name: name, logger: logger, diff --git a/documentation/examples/custom-sd/adapter/adapter_test.go b/documentation/examples/custom-sd/adapter/adapter_test.go index eaf34c667..14cae47b4 100644 --- a/documentation/examples/custom-sd/adapter/adapter_test.go +++ b/documentation/examples/custom-sd/adapter/adapter_test.go @@ -18,9 +18,11 @@ import ( "os" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -226,6 +228,8 @@ func TestWriteOutput(t *testing.T) { require.NoError(t, err) defer os.Remove(tmpfile.Name()) tmpfile.Close() - adapter := NewAdapter(ctx, tmpfile.Name(), "test_sd", nil, nil) + metrics, err := discovery.NewMetrics(prometheus.NewRegistry()) + require.NoError(t, err) + adapter := NewAdapter(ctx, tmpfile.Name(), "test_sd", nil, nil, metrics) require.NoError(t, adapter.writeOutput()) } From 6a5306a53c437b9e5817423b8431937167b410d0 Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Wed, 15 Nov 2023 17:13:40 +0000 Subject: [PATCH 22/52] Use const labels for Discovery Manager metrics. Signed-off-by: Paulin Todev --- cmd/prometheus/main.go | 64 ++++++++++++------ discovery/legacymanager/manager.go | 25 ++++--- discovery/legacymanager/manager_test.go | 30 +++++---- discovery/manager.go | 25 ++++--- discovery/manager_test.go | 45 +++++++------ discovery/metrics.go | 66 +++++++++---------- discovery/metrics_k8s_client.go | 11 ++++ .../examples/custom-sd/adapter-usage/main.go | 5 +- .../examples/custom-sd/adapter/adapter.go | 4 +- .../custom-sd/adapter/adapter_test.go | 5 +- 10 files changed, 168 insertions(+), 112 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 24fc0f8e4..f30fbecd9 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -620,30 +620,56 @@ func main() { discoveryManagerNotify discoveryManager ) - // Register the metrics used by both "scrape" and "notify" discovery managers. - // The same metrics are used for both discovery managers. Hence the registration - // needs to be done here, outside the NewManager() calls, to avoid duplicate - // metric registrations. - discoveryMetrics, err := discovery.NewMetrics(prometheus.DefaultRegisterer) + // Kubernetes client metrics are used by Kubernetes SD. + // They are registered here in the main function, because SD mechanisms + // can only register metrics specific to a SD instance. + // Kubernetes client metrics are the same for the whole process - + // they are not specific to an SD instance. + err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err) os.Exit(1) } if cfg.enableNewSDManager { - discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discoveryMetrics, discovery.Name("scrape")) - discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discoveryMetrics, discovery.Name("notify")) - } else { - discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discoveryMetrics, legacymanager.Name("scrape")) - discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discoveryMetrics, legacymanager.Name("notify")) - } + { + discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discovery.Name("scrape")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + os.Exit(1) + } else { + discoveryManagerScrape = discMgr + } + } - if discoveryManagerScrape == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") - os.Exit(1) - } - if discoveryManagerNotify == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") - os.Exit(1) + { + discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discovery.Name("notify")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager notify") + os.Exit(1) + } else { + discoveryManagerNotify = discMgr + } + } + } else { + { + discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, legacymanager.Name("scrape")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + os.Exit(1) + } else { + discoveryManagerScrape = discMgr + } + } + + { + discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, legacymanager.Name("notify")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager notify") + os.Exit(1) + } else { + discoveryManagerNotify = discMgr + } + } } scrapeManager, err := scrape.NewManager( diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go index 101012daf..9c80f305a 100644 --- a/discovery/legacymanager/manager.go +++ b/discovery/legacymanager/manager.go @@ -42,7 +42,7 @@ type provider struct { } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, metrics *discovery.Metrics, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager { if logger == nil { logger = log.NewNopLogger() } @@ -55,11 +55,20 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re updatert: 5 * time.Second, triggerSend: make(chan struct{}, 1), registerer: registerer, - metrics: metrics, } for _, option := range options { option(mgr) } + + // Register the metrics. + // We have to do this after setting all options, so that the name of the Manager is set. + if metrics, err := discovery.NewMetrics(registerer, mgr.name); err == nil { + mgr.metrics = metrics + } else { + level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) + return nil + } + return mgr } @@ -133,9 +142,9 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error { failedCount := 0 for name, scfg := range cfg { failedCount += m.registerProviders(scfg, name) - m.metrics.DiscoveredTargets.WithLabelValues(m.name, name).Set(0) + m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0) } - m.metrics.FailedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) + m.metrics.FailedConfigs.Set(float64(failedCount)) for _, prov := range m.providers { m.startProvider(m.ctx, prov) @@ -172,7 +181,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ case <-ctx.Done(): return case tgs, ok := <-updates: - m.metrics.ReceivedUpdates.WithLabelValues(m.name).Inc() + m.metrics.ReceivedUpdates.Inc() if !ok { level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) return @@ -201,11 +210,11 @@ func (m *Manager) sender() { case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. select { case <-m.triggerSend: - m.metrics.SentUpdates.WithLabelValues(m.name).Inc() + m.metrics.SentUpdates.Inc() select { case m.syncCh <- m.allGroups(): default: - m.metrics.DelayedUpdates.WithLabelValues(m.name).Inc() + m.metrics.DelayedUpdates.Inc() level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: @@ -253,7 +262,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { } } for setName, v := range n { - m.metrics.DiscoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) + m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v)) } return tSets } diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go index dccb687c2..7a2e8feea 100644 --- a/discovery/legacymanager/manager_test.go +++ b/discovery/legacymanager/manager_test.go @@ -36,12 +36,6 @@ func TestMain(m *testing.M) { testutil.TolerantVerifyLeak(m) } -func newTestDiscoveryMetrics(t *testing.T) *discovery.Metrics { - metrics, err := discovery.NewMetrics(prometheus.NewRegistry()) - require.NoError(t, err) - return metrics -} - // TestTargetUpdatesOrder checks that the target updates are received in the expected order. func TestTargetUpdatesOrder(t *testing.T) { // The order by which the updates are send is determined by the interval passed to the mock discovery adapter @@ -671,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond var totalUpdatesCount int @@ -753,7 +748,8 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -781,7 +777,8 @@ func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { func TestDiscovererConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -805,7 +802,8 @@ func TestDiscovererConfigs(t *testing.T) { func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -844,7 +842,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -875,7 +874,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -900,7 +900,8 @@ func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Disco func TestGaugeFailedConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1056,7 +1057,8 @@ func TestCoordinationWithReceiver(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - mgr := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + mgr := NewManager(ctx, nil, prometheus.NewRegistry()) + require.NotNil(t, mgr) mgr.updatert = updateDelay go mgr.Run() diff --git a/discovery/manager.go b/discovery/manager.go index 6afa1b622..67e326c41 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -65,7 +65,7 @@ func (p *Provider) Config() interface{} { } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, metrics *Metrics, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager { if logger == nil { logger = log.NewNopLogger() } @@ -77,11 +77,20 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re updatert: 5 * time.Second, triggerSend: make(chan struct{}, 1), registerer: registerer, - metrics: metrics, } for _, option := range options { option(mgr) } + + // Register the metrics. + // We have to do this after setting all options, so that the name of the Manager is set. + if metrics, err := NewMetrics(registerer, mgr.name); err == nil { + mgr.metrics = metrics + } else { + level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) + return nil + } + return mgr } @@ -165,7 +174,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { for name, scfg := range cfg { failedCount += m.registerProviders(scfg, name) } - m.metrics.FailedConfigs.WithLabelValues(m.name).Set(float64(failedCount)) + m.metrics.FailedConfigs.Set(float64(failedCount)) var ( wg sync.WaitGroup @@ -201,7 +210,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { // Set metrics and targets for new subs. for s := range prov.newSubs { if _, ok := prov.subs[s]; !ok { - m.metrics.DiscoveredTargets.WithLabelValues(m.name, s).Set(0) + m.metrics.DiscoveredTargets.WithLabelValues(s).Set(0) } if l := len(refTargets); l > 0 { m.targets[poolKey{s, prov.name}] = make(map[string]*targetgroup.Group, l) @@ -281,7 +290,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ case <-ctx.Done(): return case tgs, ok := <-updates: - m.metrics.ReceivedUpdates.WithLabelValues(m.name).Inc() + m.metrics.ReceivedUpdates.Inc() if !ok { level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) // Wait for provider cancellation to ensure targets are cleaned up when expected. @@ -314,11 +323,11 @@ func (m *Manager) sender() { case <-ticker.C: // Some discoverers send updates too often, so we throttle these with the ticker. select { case <-m.triggerSend: - m.metrics.SentUpdates.WithLabelValues(m.name).Inc() + m.metrics.SentUpdates.Inc() select { case m.syncCh <- m.allGroups(): default: - m.metrics.DelayedUpdates.WithLabelValues(m.name).Inc() + m.metrics.DelayedUpdates.Inc() level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: @@ -370,7 +379,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { } } for setName, v := range n { - m.metrics.DiscoveredTargets.WithLabelValues(m.name, setName).Set(float64(v)) + m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v)) } return tSets } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 3cb277343..f22de75a4 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -36,12 +36,6 @@ func TestMain(m *testing.M) { testutil.TolerantVerifyLeak(m) } -func newTestDiscoveryMetrics(t *testing.T) *Metrics { - metrics, err := NewMetrics(prometheus.NewRegistry()) - require.NoError(t, err) - return metrics -} - // TestTargetUpdatesOrder checks that the target updates are received in the expected order. func TestTargetUpdatesOrder(t *testing.T) { // The order by which the updates are send is determined by the interval passed to the mock discovery adapter @@ -671,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond var totalUpdatesCount int @@ -785,7 +780,8 @@ func pk(provider, setName string, n int) poolKey { func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -817,7 +813,8 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -852,7 +849,8 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -890,7 +888,8 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -951,7 +950,8 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -990,7 +990,8 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { func TestDiscovererConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1022,7 +1023,8 @@ func TestDiscovererConfigs(t *testing.T) { func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1069,7 +1071,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1105,7 +1108,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { } ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1151,7 +1155,8 @@ func (s lockStaticDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup. func TestGaugeFailedConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1307,7 +1312,8 @@ func TestCoordinationWithReceiver(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - mgr := NewManager(ctx, nil, prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + mgr := NewManager(ctx, nil, prometheus.NewRegistry()) + require.NotNil(t, mgr) mgr.updatert = updateDelay go mgr.Run() @@ -1402,7 +1408,8 @@ func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry(), newTestDiscoveryMetrics(t)) + discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry()) + require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() diff --git a/discovery/metrics.go b/discovery/metrics.go index 04f54b542..6a6060395 100644 --- a/discovery/metrics.go +++ b/discovery/metrics.go @@ -31,66 +31,64 @@ func init() { // Metrics to be used with a discovery manager. type Metrics struct { - FailedConfigs *prometheus.GaugeVec + FailedConfigs prometheus.Gauge DiscoveredTargets *prometheus.GaugeVec - ReceivedUpdates *prometheus.CounterVec - DelayedUpdates *prometheus.CounterVec - SentUpdates *prometheus.CounterVec + ReceivedUpdates prometheus.Counter + DelayedUpdates prometheus.Counter + SentUpdates prometheus.Counter } -func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { +func NewMetrics(registerer prometheus.Registerer, sdManagerName string) (*Metrics, error) { m := &Metrics{} - m.FailedConfigs = prometheus.NewGaugeVec( + m.FailedConfigs = prometheus.NewGauge( prometheus.GaugeOpts{ - Name: "prometheus_sd_failed_configs", - Help: "Current number of service discovery configurations that failed to load.", + Name: "prometheus_sd_failed_configs", + Help: "Current number of service discovery configurations that failed to load.", + ConstLabels: prometheus.Labels{"name": sdManagerName}, }, - []string{"name"}, ) m.DiscoveredTargets = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "prometheus_sd_discovered_targets", - Help: "Current number of discovered targets.", + Name: "prometheus_sd_discovered_targets", + Help: "Current number of discovered targets.", + ConstLabels: prometheus.Labels{"name": sdManagerName}, }, - []string{"name", "config"}, + []string{"config"}, ) - m.ReceivedUpdates = prometheus.NewCounterVec( + m.ReceivedUpdates = prometheus.NewCounter( prometheus.CounterOpts{ - Name: "prometheus_sd_received_updates_total", - Help: "Total number of update events received from the SD providers.", + Name: "prometheus_sd_received_updates_total", + Help: "Total number of update events received from the SD providers.", + ConstLabels: prometheus.Labels{"name": sdManagerName}, }, - []string{"name"}, ) - m.DelayedUpdates = prometheus.NewCounterVec( + m.DelayedUpdates = prometheus.NewCounter( prometheus.CounterOpts{ - Name: "prometheus_sd_updates_delayed_total", - Help: "Total number of update events that couldn't be sent immediately.", + Name: "prometheus_sd_updates_delayed_total", + Help: "Total number of update events that couldn't be sent immediately.", + ConstLabels: prometheus.Labels{"name": sdManagerName}, }, - []string{"name"}, ) - m.SentUpdates = prometheus.NewCounterVec( + m.SentUpdates = prometheus.NewCounter( prometheus.CounterOpts{ - Name: "prometheus_sd_updates_total", - Help: "Total number of update events sent to the SD consumers.", + Name: "prometheus_sd_updates_total", + Help: "Total number of update events sent to the SD consumers.", + ConstLabels: prometheus.Labels{"name": sdManagerName}, }, - []string{"name"}, ) - metrics := append( - []prometheus.Collector{ - m.FailedConfigs, - m.DiscoveredTargets, - m.ReceivedUpdates, - m.DelayedUpdates, - m.SentUpdates, - }, - clientGoMetrics()..., - ) + metrics := []prometheus.Collector{ + m.FailedConfigs, + m.DiscoveredTargets, + m.ReceivedUpdates, + m.DelayedUpdates, + m.SentUpdates, + } for _, collector := range metrics { err := registerer.Register(collector) diff --git a/discovery/metrics_k8s_client.go b/discovery/metrics_k8s_client.go index 4f161bc3e..f16245684 100644 --- a/discovery/metrics_k8s_client.go +++ b/discovery/metrics_k8s_client.go @@ -15,6 +15,7 @@ package discovery import ( "context" + "fmt" "net/url" "time" @@ -133,6 +134,16 @@ func clientGoMetrics() []prometheus.Collector { } } +func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error { + for _, collector := range clientGoMetrics() { + err := registerer.Register(collector) + if err != nil { + return fmt.Errorf("failed to register Kubernetes Go Client metrics: %w", err) + } + } + return nil +} + func (f *clientGoRequestMetricAdapter) RegisterWithK8sGoClient() { metrics.Register( metrics.RegisterOpts{ diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index b712749df..f4bba7394 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -28,10 +28,8 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - prom_discovery "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/documentation/examples/custom-sd/adapter" "github.com/prometheus/prometheus/util/strutil" @@ -271,12 +269,11 @@ func main() { fmt.Println("err: ", err) } - discoveryMetrics, err := prom_discovery.NewMetrics(prometheus.DefaultRegisterer) if err != nil { level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) os.Exit(1) } - sdAdapter := adapter.NewAdapter(ctx, *outputFile, "exampleSD", disc, logger, discoveryMetrics) + sdAdapter := adapter.NewAdapter(ctx, *outputFile, "exampleSD", disc, logger) sdAdapter.Run() <-ctx.Done() diff --git a/documentation/examples/custom-sd/adapter/adapter.go b/documentation/examples/custom-sd/adapter/adapter.go index 8aedf0084..7fbf94aa9 100644 --- a/documentation/examples/custom-sd/adapter/adapter.go +++ b/documentation/examples/custom-sd/adapter/adapter.go @@ -163,12 +163,12 @@ func (a *Adapter) Run() { } // NewAdapter creates a new instance of Adapter. -func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger, metrics *discovery.Metrics) *Adapter { +func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger) *Adapter { return &Adapter{ ctx: ctx, disc: d, groups: make(map[string]*customSD), - manager: discovery.NewManager(ctx, logger, prometheus.NewRegistry(), metrics), + manager: discovery.NewManager(ctx, logger, prometheus.NewRegistry()), output: file, name: name, logger: logger, diff --git a/documentation/examples/custom-sd/adapter/adapter_test.go b/documentation/examples/custom-sd/adapter/adapter_test.go index 14cae47b4..8e5920eb4 100644 --- a/documentation/examples/custom-sd/adapter/adapter_test.go +++ b/documentation/examples/custom-sd/adapter/adapter_test.go @@ -18,11 +18,9 @@ import ( "os" "testing" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -228,8 +226,7 @@ func TestWriteOutput(t *testing.T) { require.NoError(t, err) defer os.Remove(tmpfile.Name()) tmpfile.Close() - metrics, err := discovery.NewMetrics(prometheus.NewRegistry()) require.NoError(t, err) - adapter := NewAdapter(ctx, tmpfile.Name(), "test_sd", nil, nil, metrics) + adapter := NewAdapter(ctx, tmpfile.Name(), "test_sd", nil, nil) require.NoError(t, adapter.writeOutput()) } From 6279497124673eda4de899b08bbf6d66e6041892 Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Tue, 5 Dec 2023 18:01:39 +0000 Subject: [PATCH 23/52] Remove unnecessary else. This was flagged by the linter. Signed-off-by: Paulin Todev --- cmd/prometheus/main.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index f30fbecd9..7438476c3 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -636,9 +636,8 @@ func main() { if discMgr == nil { level.Error(logger).Log("msg", "failed to create a discovery manager scrape") os.Exit(1) - } else { - discoveryManagerScrape = discMgr } + discoveryManagerScrape = discMgr } { @@ -646,9 +645,8 @@ func main() { if discMgr == nil { level.Error(logger).Log("msg", "failed to create a discovery manager notify") os.Exit(1) - } else { - discoveryManagerNotify = discMgr } + discoveryManagerNotify = discMgr } } else { { @@ -656,9 +654,8 @@ func main() { if discMgr == nil { level.Error(logger).Log("msg", "failed to create a discovery manager scrape") os.Exit(1) - } else { - discoveryManagerScrape = discMgr } + discoveryManagerScrape = discMgr } { @@ -666,9 +663,8 @@ func main() { if discMgr == nil { level.Error(logger).Log("msg", "failed to create a discovery manager notify") os.Exit(1) - } else { - discoveryManagerNotify = discMgr } + discoveryManagerNotify = discMgr } } From 8f6cf3aabb1b9c0b385d57c922f8e2f571bba61b Mon Sep 17 00:00:00 2001 From: Matthieu MOREL Date: Thu, 16 Nov 2023 19:54:41 +0100 Subject: [PATCH 24/52] tsdb: use Go standard errors Signed-off-by: Matthieu MOREL --- .golangci.yml | 6 +- go.mod | 2 +- tsdb/agent/db.go | 2 +- tsdb/block.go | 29 ++++--- tsdb/blockwriter.go | 11 +-- tsdb/chunkenc/chunk_test.go | 5 +- tsdb/chunks/head_chunks.go | 9 ++- tsdb/compact.go | 50 ++++++------ tsdb/compact_test.go | 2 +- tsdb/db.go | 154 +++++++++++++++++++----------------- tsdb/db_test.go | 3 +- tsdb/exemplar.go | 3 +- tsdb/head.go | 48 +++++------ tsdb/head_append.go | 50 ++++++------ tsdb/head_bench_test.go | 2 +- tsdb/head_read.go | 15 ++-- tsdb/head_test.go | 15 ++-- tsdb/head_wal.go | 103 +++++++++++++----------- tsdb/index/index.go | 6 +- tsdb/mocks_test.go | 4 +- tsdb/querier.go | 40 +++++----- tsdb/querier_test.go | 4 +- tsdb/repair.go | 43 ++++++---- tsdb/wal.go | 101 ++++++++++++----------- tsdb/wlog/wlog.go | 4 + 25 files changed, 382 insertions(+), 329 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 01ba9deb0..166b2e0d4 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -36,13 +36,9 @@ issues: - path: _test.go linters: - errcheck - - path: tsdb/ + - path: "tsdb/head_wal.go" linters: - errorlint - - path: tsdb/ - text: "import 'github.com/pkg/errors' is not allowed" - linters: - - depguard - linters: - godot source: "^// ===" diff --git a/go.mod b/go.mod index 95cc7b8af..f785de723 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,6 @@ require ( github.com/oklog/run v1.1.0 github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.4.3 - github.com/pkg/errors v0.9.1 github.com/prometheus/alertmanager v0.26.0 github.com/prometheus/client_golang v1.17.0 github.com/prometheus/client_model v0.5.0 @@ -167,6 +166,7 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/spf13/pflag v1.0.5 // indirect diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 557fb7854..d39989713 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -359,7 +359,7 @@ func (db *DB) replayWAL() error { start := time.Now() dir, startFrom, err := wlog.LastCheckpoint(db.wal.Dir()) - if err != nil && err != record.ErrNotFound { + if err != nil && !errors.Is(err, record.ErrNotFound) { return fmt.Errorf("find last checkpoint: %w", err) } diff --git a/tsdb/block.go b/tsdb/block.go index a586536b1..e2562de03 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -17,6 +17,7 @@ package tsdb import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -26,7 +27,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/ulid" - "github.com/pkg/errors" "golang.org/x/exp/slices" "github.com/prometheus/prometheus/model/labels" @@ -479,14 +479,19 @@ func (r blockIndexReader) SortedLabelValues(ctx context.Context, name string, ma slices.Sort(st) } } - - return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) + if err != nil { + return st, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err) + } + return st, nil } func (r blockIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) { if len(matchers) == 0 { st, err := r.ir.LabelValues(ctx, name) - return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) + if err != nil { + return st, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err) + } + return st, nil } return labelValuesWithMatchers(ctx, r.ir, name, matchers...) @@ -503,7 +508,7 @@ func (r blockIndexReader) LabelNames(ctx context.Context, matchers ...*labels.Ma func (r blockIndexReader) Postings(ctx context.Context, name string, values ...string) (index.Postings, error) { p, err := r.ir.Postings(ctx, name, values...) if err != nil { - return p, errors.Wrapf(err, "block: %s", r.b.Meta().ULID) + return p, fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err) } return p, nil } @@ -514,7 +519,7 @@ func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings { func (r blockIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error { if err := r.ir.Series(ref, builder, chks); err != nil { - return errors.Wrapf(err, "block: %s", r.b.Meta().ULID) + return fmt.Errorf("block: %s: %w", r.b.Meta().ULID, err) } return nil } @@ -566,7 +571,7 @@ func (pb *Block) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Mat p, err := PostingsForMatchers(ctx, pb.indexr, ms...) if err != nil { - return errors.Wrap(err, "select series") + return fmt.Errorf("select series: %w", err) } ir := pb.indexr @@ -654,12 +659,12 @@ func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, bool, er func (pb *Block) Snapshot(dir string) error { blockDir := filepath.Join(dir, pb.meta.ULID.String()) if err := os.MkdirAll(blockDir, 0o777); err != nil { - return errors.Wrap(err, "create snapshot block dir") + return fmt.Errorf("create snapshot block dir: %w", err) } chunksDir := chunkDir(blockDir) if err := os.MkdirAll(chunksDir, 0o777); err != nil { - return errors.Wrap(err, "create snapshot chunk dir") + return fmt.Errorf("create snapshot chunk dir: %w", err) } // Hardlink meta, index and tombstones @@ -669,7 +674,7 @@ func (pb *Block) Snapshot(dir string) error { tombstones.TombstonesFilename, } { if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil { - return errors.Wrapf(err, "create snapshot %s", fname) + return fmt.Errorf("create snapshot %s: %w", fname, err) } } @@ -677,13 +682,13 @@ func (pb *Block) Snapshot(dir string) error { curChunkDir := chunkDir(pb.dir) files, err := os.ReadDir(curChunkDir) if err != nil { - return errors.Wrap(err, "ReadDir the current chunk dir") + return fmt.Errorf("ReadDir the current chunk dir: %w", err) } for _, f := range files { err := os.Link(filepath.Join(curChunkDir, f.Name()), filepath.Join(chunksDir, f.Name())) if err != nil { - return errors.Wrap(err, "hardlink a chunk") + return fmt.Errorf("hardlink a chunk: %w", err) } } diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 0d017e095..73bc5f1e3 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -15,13 +15,14 @@ package tsdb import ( "context" + "errors" + "fmt" "math" "os" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/ulid" - "github.com/pkg/errors" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/storage" @@ -65,7 +66,7 @@ func NewBlockWriter(logger log.Logger, dir string, blockSize int64) (*BlockWrite func (w *BlockWriter) initHead() error { chunkDir, err := os.MkdirTemp(os.TempDir(), "head") if err != nil { - return errors.Wrap(err, "create temp dir") + return fmt.Errorf("create temp dir: %w", err) } w.chunkDir = chunkDir opts := DefaultHeadOptions() @@ -74,7 +75,7 @@ func (w *BlockWriter) initHead() error { opts.EnableNativeHistograms.Store(true) h, err := NewHead(nil, w.logger, nil, nil, opts, NewHeadStats()) if err != nil { - return errors.Wrap(err, "tsdb.NewHead") + return fmt.Errorf("tsdb.NewHead: %w", err) } w.head = h @@ -102,11 +103,11 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { []int64{w.blockSize}, chunkenc.NewPool(), nil) if err != nil { - return ulid.ULID{}, errors.Wrap(err, "create leveled compactor") + return ulid.ULID{}, fmt.Errorf("create leveled compactor: %w", err) } id, err := compactor.Write(w.destinationDir, w.head, mint, maxt, nil) if err != nil { - return ulid.ULID{}, errors.Wrap(err, "compactor write") + return ulid.ULID{}, fmt.Errorf("compactor write: %w", err) } return id, nil diff --git a/tsdb/chunkenc/chunk_test.go b/tsdb/chunkenc/chunk_test.go index 3d22f74cc..9db1bf364 100644 --- a/tsdb/chunkenc/chunk_test.go +++ b/tsdb/chunkenc/chunk_test.go @@ -14,6 +14,7 @@ package chunkenc import ( + "errors" "fmt" "io" "math/rand" @@ -153,8 +154,8 @@ func benchmarkIterator(b *testing.B, newChunk func() Chunk) { res = v i++ } - if it.Err() != io.EOF { - require.NoError(b, it.Err()) + if err := it.Err(); err != nil && !errors.Is(err, io.EOF) { + require.NoError(b, err) } _ = res } diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index b495b6182..12c3e7b90 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -111,6 +111,10 @@ func (e *CorruptionErr) Error() string { return fmt.Errorf("corruption in head chunk file %s: %w", segmentFile(e.Dir, e.FileIndex), e.Err).Error() } +func (e *CorruptionErr) Unwrap() error { + return e.Err +} + // chunkPos keeps track of the position in the head chunk files. // chunkPos is not thread-safe, a lock must be used to protect it. type chunkPos struct { @@ -400,7 +404,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro buf := make([]byte, MagicChunksSize) size, err := f.Read(buf) - if err != nil && err != io.EOF { + if err != nil && !errors.Is(err, io.EOF) { return files, fmt.Errorf("failed to read magic number during last head chunk file repair: %w", err) } if err := f.Close(); err != nil { @@ -892,7 +896,8 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu // Extract the encoding from the byte. ChunkDiskMapper uses only the last 7 bits for the encoding. chkEnc = cdm.RemoveMasks(chkEnc) if err := f(seriesRef, chunkRef, mint, maxt, numSamples, chkEnc, isOOO); err != nil { - if cerr, ok := err.(*CorruptionErr); ok { + var cerr *CorruptionErr + if errors.As(err, &cerr) { cerr.Dir = cdm.dir.Name() cerr.FileIndex = segID return cerr diff --git a/tsdb/compact.go b/tsdb/compact.go index 32c88d2cc..7b4ff9c1b 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -16,6 +16,7 @@ package tsdb import ( "context" "crypto/rand" + "errors" "fmt" "io" "os" @@ -25,7 +26,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/ulid" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "golang.org/x/exp/slices" @@ -485,7 +485,7 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, if !errors.Is(err, context.Canceled) { for _, b := range bs { if err := b.setCompactionFailed(); err != nil { - errs.Add(errors.Wrapf(err, "setting compaction failed for block: %s", b.Dir())) + errs.Add(fmt.Errorf("setting compaction failed for block: %s: %w", b.Dir(), err)) } } } @@ -586,7 +586,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl chunkw, err = chunks.NewWriterWithSegSize(chunkDir(tmp), c.maxBlockChunkSegmentSize) if err != nil { - return errors.Wrap(err, "open chunk writer") + return fmt.Errorf("open chunk writer: %w", err) } closers = append(closers, chunkw) // Record written chunk sizes on level 1 compactions. @@ -601,12 +601,12 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl indexw, err := index.NewWriter(c.ctx, filepath.Join(tmp, indexFilename)) if err != nil { - return errors.Wrap(err, "open index writer") + return fmt.Errorf("open index writer: %w", err) } closers = append(closers, indexw) if err := blockPopulator.PopulateBlock(c.ctx, c.metrics, c.logger, c.chunkPool, c.mergeFunc, blocks, meta, indexw, chunkw); err != nil { - return errors.Wrap(err, "populate block") + return fmt.Errorf("populate block: %w", err) } select { @@ -634,17 +634,17 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl } if _, err = writeMetaFile(c.logger, tmp, meta); err != nil { - return errors.Wrap(err, "write merged meta") + return fmt.Errorf("write merged meta: %w", err) } // Create an empty tombstones file. if _, err := tombstones.WriteFile(c.logger, tmp, tombstones.NewMemTombstones()); err != nil { - return errors.Wrap(err, "write new tombstones file") + return fmt.Errorf("write new tombstones file: %w", err) } df, err := fileutil.OpenDir(tmp) if err != nil { - return errors.Wrap(err, "open temporary block dir") + return fmt.Errorf("open temporary block dir: %w", err) } defer func() { if df != nil { @@ -653,18 +653,18 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl }() if err := df.Sync(); err != nil { - return errors.Wrap(err, "sync temporary dir file") + return fmt.Errorf("sync temporary dir file: %w", err) } // Close temp dir before rename block dir (for windows platform). if err = df.Close(); err != nil { - return errors.Wrap(err, "close temporary dir") + return fmt.Errorf("close temporary dir: %w", err) } df = nil // Block successfully written, make it visible in destination dir by moving it from tmp one. if err := fileutil.Replace(tmp, dir); err != nil { - return errors.Wrap(err, "rename block dir") + return fmt.Errorf("rename block dir: %w", err) } return nil @@ -693,7 +693,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa defer func() { errs := tsdb_errors.NewMulti(err) if cerr := tsdb_errors.CloseAll(closers); cerr != nil { - errs.Add(errors.Wrap(cerr, "close")) + errs.Add(fmt.Errorf("close: %w", cerr)) } err = errs.Err() metrics.PopulatingBlocks.Set(0) @@ -721,19 +721,19 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa indexr, err := b.Index() if err != nil { - return errors.Wrapf(err, "open index reader for block %+v", b.Meta()) + return fmt.Errorf("open index reader for block %+v: %w", b.Meta(), err) } closers = append(closers, indexr) chunkr, err := b.Chunks() if err != nil { - return errors.Wrapf(err, "open chunk reader for block %+v", b.Meta()) + return fmt.Errorf("open chunk reader for block %+v: %w", b.Meta(), err) } closers = append(closers, chunkr) tombsr, err := b.Tombstones() if err != nil { - return errors.Wrapf(err, "open tombstone reader for block %+v", b.Meta()) + return fmt.Errorf("open tombstone reader for block %+v: %w", b.Meta(), err) } closers = append(closers, tombsr) @@ -755,11 +755,11 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa for symbols.Next() { if err := indexw.AddSymbol(symbols.At()); err != nil { - return errors.Wrap(err, "add symbol") + return fmt.Errorf("add symbol: %w", err) } } - if symbols.Err() != nil { - return errors.Wrap(symbols.Err(), "next symbol") + if err := symbols.Err(); err != nil { + return fmt.Errorf("next symbol: %w", err) } var ( @@ -791,8 +791,8 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa // chunk file purposes. chks = append(chks, chksIter.At()) } - if chksIter.Err() != nil { - return errors.Wrap(chksIter.Err(), "chunk iter") + if err := chksIter.Err(); err != nil { + return fmt.Errorf("chunk iter: %w", err) } // Skip the series with all deleted chunks. @@ -801,10 +801,10 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa } if err := chunkw.WriteChunks(chks...); err != nil { - return errors.Wrap(err, "write chunks") + return fmt.Errorf("write chunks: %w", err) } if err := indexw.AddSeries(ref, s.Labels(), chks...); err != nil { - return errors.Wrap(err, "add series") + return fmt.Errorf("add series: %w", err) } meta.Stats.NumChunks += uint64(len(chks)) @@ -815,13 +815,13 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa for _, chk := range chks { if err := chunkPool.Put(chk.Chunk); err != nil { - return errors.Wrap(err, "put chunk") + return fmt.Errorf("put chunk: %w", err) } } ref++ } - if set.Err() != nil { - return errors.Wrap(set.Err(), "iterate compaction set") + if err := set.Err(); err != nil { + return fmt.Errorf("iterate compaction set: %w", err) } return nil diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 75d564139..f33bb73c1 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -15,6 +15,7 @@ package tsdb import ( "context" + "errors" "fmt" "math" "math/rand" @@ -27,7 +28,6 @@ import ( "github.com/go-kit/log" "github.com/oklog/ulid" - "github.com/pkg/errors" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" diff --git a/tsdb/db.go b/tsdb/db.go index 2e3801a9e..6d283fdd8 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -16,6 +16,7 @@ package tsdb import ( "context" + "errors" "fmt" "io" "io/fs" @@ -30,7 +31,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/ulid" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "go.uber.org/atomic" "golang.org/x/exp/slices" @@ -386,7 +386,7 @@ type DBReadOnly struct { // OpenDBReadOnly opens DB in the given directory for read only operations. func OpenDBReadOnly(dir string, l log.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { - return nil, errors.Wrap(err, "opening the db dir") + return nil, fmt.Errorf("opening the db dir: %w", err) } if l == nil { @@ -407,7 +407,7 @@ func OpenDBReadOnly(dir string, l log.Logger) (*DBReadOnly, error) { func (db *DBReadOnly) FlushWAL(dir string) (returnErr error) { blockReaders, err := db.Blocks() if err != nil { - return errors.Wrap(err, "read blocks") + return fmt.Errorf("read blocks: %w", err) } maxBlockTime := int64(math.MinInt64) if len(blockReaders) > 0 { @@ -432,15 +432,16 @@ func (db *DBReadOnly) FlushWAL(dir string) (returnErr error) { return err } defer func() { - returnErr = tsdb_errors.NewMulti( - returnErr, - errors.Wrap(head.Close(), "closing Head"), - ).Err() + errs := tsdb_errors.NewMulti(returnErr) + if err := head.Close(); err != nil { + errs.Add(fmt.Errorf("closing Head: %w", err)) + } + returnErr = errs.Err() }() // Set the min valid time for the ingested wal samples // to be no lower than the maxt of the last block. if err := head.Init(maxBlockTime); err != nil { - return errors.Wrap(err, "read WAL") + return fmt.Errorf("read WAL: %w", err) } mint := head.MinTime() maxt := head.MaxTime() @@ -454,12 +455,15 @@ func (db *DBReadOnly) FlushWAL(dir string) (returnErr error) { nil, ) if err != nil { - return errors.Wrap(err, "create leveled compactor") + return fmt.Errorf("create leveled compactor: %w", err) } // Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime). // Because of this block intervals are always +1 than the total samples it includes. _, err = compactor.Write(dir, rh, mint, maxt+1, nil) - return errors.Wrap(err, "writing WAL") + if err != nil { + return fmt.Errorf("writing WAL: %w", err) + } + return nil } func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQueryable, error) { @@ -518,7 +522,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue // Set the min valid time for the ingested wal samples // to be no lower than the maxt of the last block. if err := head.Init(maxBlockTime); err != nil { - return nil, errors.Wrap(err, "read WAL") + return nil, fmt.Errorf("read WAL: %w", err) } // Set the wal to nil to disable all wal operations. // This is mainly to avoid blocking when closing the head. @@ -580,7 +584,9 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { } errs := tsdb_errors.NewMulti() for ulid, err := range corrupted { - errs.Add(errors.Wrapf(err, "corrupted block %s", ulid.String())) + if err != nil { + errs.Add(fmt.Errorf("corrupted block %s: %w", ulid.String(), err)) + } } return nil, errs.Err() } @@ -761,7 +767,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs // Fixup bad format written by Prometheus 2.1. if err := repairBadIndexVersion(l, dir); err != nil { - return nil, errors.Wrap(err, "repair bad index version") + return nil, fmt.Errorf("repair bad index version: %w", err) } walDir := filepath.Join(dir, "wal") @@ -769,12 +775,12 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs // Migrate old WAL if one exists. if err := MigrateWAL(l, walDir); err != nil { - return nil, errors.Wrap(err, "migrate WAL") + return nil, fmt.Errorf("migrate WAL: %w", err) } for _, tmpDir := range []string{walDir, dir} { // Remove tmp dirs. if err := removeBestEffortTmpDirs(l, tmpDir); err != nil { - return nil, errors.Wrap(err, "remove tmp dirs") + return nil, fmt.Errorf("remove tmp dirs: %w", err) } } @@ -797,11 +803,11 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs } close(db.donec) // DB is never run if it was an error, so close this channel here. - - returnedErr = tsdb_errors.NewMulti( - returnedErr, - errors.Wrap(db.Close(), "close DB after failed startup"), - ).Err() + errs := tsdb_errors.NewMulti(returnedErr) + if err := db.Close(); err != nil { + errs.Add(fmt.Errorf("close DB after failed startup: %w", err)) + } + returnedErr = errs.Err() }() if db.blocksToDelete == nil { @@ -823,7 +829,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs db.compactor, err = NewLeveledCompactorWithChunkSize(ctx, r, l, rngs, db.chunkPool, opts.MaxBlockChunkSegmentSize, nil) if err != nil { cancel() - return nil, errors.Wrap(err, "create leveled compactor") + return nil, fmt.Errorf("create leveled compactor: %w", err) } db.compactCancel = cancel @@ -905,17 +911,17 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs if initErr := db.head.Init(minValidTime); initErr != nil { db.head.metrics.walCorruptionsTotal.Inc() - e, ok := initErr.(*errLoadWbl) - if ok { + var e *errLoadWbl + if errors.As(initErr, &e) { level.Warn(db.logger).Log("msg", "Encountered WBL read error, attempting repair", "err", initErr) if err := wbl.Repair(e.err); err != nil { - return nil, errors.Wrap(err, "repair corrupted WBL") + return nil, fmt.Errorf("repair corrupted WBL: %w", err) } level.Info(db.logger).Log("msg", "Successfully repaired WBL") } else { level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr) if err := wal.Repair(initErr); err != nil { - return nil, errors.Wrap(err, "repair corrupted WAL") + return nil, fmt.Errorf("repair corrupted WAL: %w", err) } level.Info(db.logger).Log("msg", "Successfully repaired WAL") } @@ -1131,10 +1137,11 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { lastBlockMaxt := int64(math.MinInt64) defer func() { - returnErr = tsdb_errors.NewMulti( - returnErr, - errors.Wrap(db.head.truncateWAL(lastBlockMaxt), "WAL truncation in Compact defer"), - ).Err() + errs := tsdb_errors.NewMulti(returnErr) + if err := db.head.truncateWAL(lastBlockMaxt); err != nil { + errs.Add(fmt.Errorf("WAL truncation in Compact defer: %w", err)) + } + returnErr = errs.Err() }() start := time.Now() @@ -1168,7 +1175,7 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { db.head.WaitForAppendersOverlapping(rh.MaxTime()) if err := db.compactHead(rh); err != nil { - return errors.Wrap(err, "compact head") + return fmt.Errorf("compact head: %w", err) } // Consider only successful compactions for WAL truncation. lastBlockMaxt = maxt @@ -1177,7 +1184,7 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { // Clear some disk space before compacting blocks, especially important // when Head compaction happened over a long time range. if err := db.head.truncateWAL(lastBlockMaxt); err != nil { - return errors.Wrap(err, "WAL truncation in Compact") + return fmt.Errorf("WAL truncation in Compact: %w", err) } compactionDuration := time.Since(start) @@ -1192,7 +1199,7 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { if lastBlockMaxt != math.MinInt64 { // The head was compacted, so we compact OOO head as well. if err := db.compactOOOHead(ctx); err != nil { - return errors.Wrap(err, "compact ooo head") + return fmt.Errorf("compact ooo head: %w", err) } } @@ -1205,11 +1212,11 @@ func (db *DB) CompactHead(head *RangeHead) error { defer db.cmtx.Unlock() if err := db.compactHead(head); err != nil { - return errors.Wrap(err, "compact head") + return fmt.Errorf("compact head: %w", err) } if err := db.head.truncateWAL(head.BlockMaxTime()); err != nil { - return errors.Wrap(err, "WAL truncation") + return fmt.Errorf("WAL truncation: %w", err) } return nil } @@ -1228,12 +1235,12 @@ func (db *DB) compactOOOHead(ctx context.Context) error { } oooHead, err := NewOOOCompactionHead(ctx, db.head) if err != nil { - return errors.Wrap(err, "get ooo compaction head") + return fmt.Errorf("get ooo compaction head: %w", err) } ulids, err := db.compactOOO(db.dir, oooHead) if err != nil { - return errors.Wrap(err, "compact ooo head") + return fmt.Errorf("compact ooo head: %w", err) } if err := db.reloadBlocks(); err != nil { errs := tsdb_errors.NewMulti(err) @@ -1242,7 +1249,7 @@ func (db *DB) compactOOOHead(ctx context.Context) error { errs.Add(errRemoveAll) } } - return errors.Wrap(errs.Err(), "reloadBlocks blocks after failed compact ooo head") + return fmt.Errorf("reloadBlocks blocks after failed compact ooo head: %w", errs.Err()) } lastWBLFile, minOOOMmapRef := oooHead.LastWBLFile(), oooHead.LastMmapRef() @@ -1262,7 +1269,7 @@ func (db *DB) compactOOOHead(ctx context.Context) error { } if err := db.head.truncateOOO(lastWBLFile, minOOOMmapRef); err != nil { - return errors.Wrap(err, "truncate ooo wbl") + return fmt.Errorf("truncate ooo wbl: %w", err) } } @@ -1298,12 +1305,12 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID blockDir := filepath.Join(dest, uid.String()) meta, _, err := readMetaFile(blockDir) if err != nil { - return ulids, errors.Wrap(err, "read meta") + return ulids, fmt.Errorf("read meta: %w", err) } meta.Compaction.SetOutOfOrder() _, err = writeMetaFile(db.logger, blockDir, meta) if err != nil { - return ulids, errors.Wrap(err, "write meta") + return ulids, fmt.Errorf("write meta: %w", err) } } } @@ -1329,20 +1336,20 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID func (db *DB) compactHead(head *RangeHead) error { uid, err := db.compactor.Write(db.dir, head, head.MinTime(), head.BlockMaxTime(), nil) if err != nil { - return errors.Wrap(err, "persist head block") + return fmt.Errorf("persist head block: %w", err) } if err := db.reloadBlocks(); err != nil { if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil { return tsdb_errors.NewMulti( - errors.Wrap(err, "reloadBlocks blocks"), - errors.Wrapf(errRemoveAll, "delete persisted head block after failed db reloadBlocks:%s", uid), + fmt.Errorf("reloadBlocks blocks: %w", err), + fmt.Errorf("delete persisted head block after failed db reloadBlocks:%s: %w", uid, errRemoveAll), ).Err() } - return errors.Wrap(err, "reloadBlocks blocks") + return fmt.Errorf("reloadBlocks blocks: %w", err) } if err = db.head.truncateMemory(head.BlockMaxTime()); err != nil { - return errors.Wrap(err, "head memory truncate") + return fmt.Errorf("head memory truncate: %w", err) } return nil } @@ -1354,7 +1361,7 @@ func (db *DB) compactBlocks() (err error) { for { plan, err := db.compactor.Plan(db.dir) if err != nil { - return errors.Wrap(err, "plan compaction") + return fmt.Errorf("plan compaction: %w", err) } if len(plan) == 0 { break @@ -1368,14 +1375,14 @@ func (db *DB) compactBlocks() (err error) { uid, err := db.compactor.Compact(db.dir, plan, db.blocks) if err != nil { - return errors.Wrapf(err, "compact %s", plan) + return fmt.Errorf("compact %s: %w", plan, err) } if err := db.reloadBlocks(); err != nil { if err := os.RemoveAll(filepath.Join(db.dir, uid.String())); err != nil { - return errors.Wrapf(err, "delete compacted block after failed db reloadBlocks:%s", uid) + return fmt.Errorf("delete compacted block after failed db reloadBlocks:%s: %w", uid, err) } - return errors.Wrap(err, "reloadBlocks blocks") + return fmt.Errorf("reloadBlocks blocks: %w", err) } } @@ -1396,14 +1403,14 @@ func getBlock(allBlocks []*Block, id ulid.ULID) (*Block, bool) { // reload reloads blocks and truncates the head and its WAL. func (db *DB) reload() error { if err := db.reloadBlocks(); err != nil { - return errors.Wrap(err, "reloadBlocks") + return fmt.Errorf("reloadBlocks: %w", err) } maxt, ok := db.inOrderBlocksMaxTime() if !ok { return nil } if err := db.head.Truncate(maxt); err != nil { - return errors.Wrap(err, "head truncate") + return fmt.Errorf("head truncate: %w", err) } return nil } @@ -1457,7 +1464,9 @@ func (db *DB) reloadBlocks() (err error) { } errs := tsdb_errors.NewMulti() for ulid, err := range corrupted { - errs.Add(errors.Wrapf(err, "corrupted block %s", ulid.String())) + if err != nil { + errs.Add(fmt.Errorf("corrupted block %s: %w", ulid.String(), err)) + } } return errs.Err() } @@ -1509,7 +1518,7 @@ func (db *DB) reloadBlocks() (err error) { } } if err := db.deleteBlocks(deletable); err != nil { - return errors.Wrapf(err, "delete %v blocks", len(deletable)) + return fmt.Errorf("delete %v blocks: %w", len(deletable), err) } return nil } @@ -1517,7 +1526,7 @@ func (db *DB) reloadBlocks() (err error) { func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { - return nil, nil, errors.Wrap(err, "find blocks") + return nil, nil, fmt.Errorf("find blocks: %w", err) } corrupted = make(map[ulid.ULID]error) @@ -1651,16 +1660,16 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { // Noop. continue case err != nil: - return errors.Wrapf(err, "stat dir %v", toDelete) + return fmt.Errorf("stat dir %v: %w", toDelete, err) } // Replace atomically to avoid partial block when process would crash during deletion. tmpToDelete := filepath.Join(db.dir, fmt.Sprintf("%s%s", ulid, tmpForDeletionBlockDirSuffix)) if err := fileutil.Replace(toDelete, tmpToDelete); err != nil { - return errors.Wrapf(err, "replace of obsolete block for deletion %s", ulid) + return fmt.Errorf("replace of obsolete block for deletion %s: %w", ulid, err) } if err := os.RemoveAll(tmpToDelete); err != nil { - return errors.Wrapf(err, "delete obsolete block %s", ulid) + return fmt.Errorf("delete obsolete block %s: %w", ulid, err) } level.Info(db.logger).Log("msg", "Deleting obsolete block", "block", ulid) } @@ -1868,7 +1877,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error { level.Info(db.logger).Log("msg", "Snapshotting block", "block", b) if err := b.Snapshot(dir); err != nil { - return errors.Wrapf(err, "error snapshotting block: %s", b.Dir()) + return fmt.Errorf("error snapshotting block: %s: %w", b.Dir(), err) } } if !withHead { @@ -1881,7 +1890,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error { // Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime). // Because of this block intervals are always +1 than the total samples it includes. if _, err := db.compactor.Write(dir, head, mint, maxt+1, nil); err != nil { - return errors.Wrap(err, "snapshot head block") + return fmt.Errorf("snapshot head block: %w", err) } return nil } @@ -1916,7 +1925,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { var err error inOrderHeadQuerier, err := NewBlockQuerier(rh, mint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open block querier for head %s", rh) + return nil, fmt.Errorf("open block querier for head %s: %w", rh, err) } // Getting the querier above registers itself in the queue that the truncation waits on. @@ -1925,7 +1934,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt) if shouldClose { if err := inOrderHeadQuerier.Close(); err != nil { - return nil, errors.Wrapf(err, "closing head block querier %s", rh) + return nil, fmt.Errorf("closing head block querier %s: %w", rh, err) } inOrderHeadQuerier = nil } @@ -1933,7 +1942,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { rh := NewRangeHead(db.head, newMint, maxt) inOrderHeadQuerier, err = NewBlockQuerier(rh, newMint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open block querier for head while getting new querier %s", rh) + return nil, fmt.Errorf("open block querier for head while getting new querier %s: %w", rh, err) } } @@ -1950,7 +1959,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { // If NewBlockQuerier() failed, make sure to clean up the pending read created by NewOOORangeHead. rh.isoState.Close() - return nil, errors.Wrapf(err, "open block querier for ooo head %s", rh) + return nil, fmt.Errorf("open block querier for ooo head %s: %w", rh, err) } blockQueriers = append(blockQueriers, outOfOrderHeadQuerier) @@ -1959,7 +1968,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { for _, b := range blocks { q, err := NewBlockQuerier(b, mint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open querier for block %s", b) + return nil, fmt.Errorf("open querier for block %s: %w", b, err) } blockQueriers = append(blockQueriers, q) } @@ -1997,7 +2006,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer rh := NewRangeHead(db.head, mint, maxt) inOrderHeadQuerier, err := NewBlockChunkQuerier(rh, mint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open querier for head %s", rh) + return nil, fmt.Errorf("open querier for head %s: %w", rh, err) } // Getting the querier above registers itself in the queue that the truncation waits on. @@ -2006,7 +2015,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt) if shouldClose { if err := inOrderHeadQuerier.Close(); err != nil { - return nil, errors.Wrapf(err, "closing head querier %s", rh) + return nil, fmt.Errorf("closing head querier %s: %w", rh, err) } inOrderHeadQuerier = nil } @@ -2014,7 +2023,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer rh := NewRangeHead(db.head, newMint, maxt) inOrderHeadQuerier, err = NewBlockChunkQuerier(rh, newMint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open querier for head while getting new querier %s", rh) + return nil, fmt.Errorf("open querier for head while getting new querier %s: %w", rh, err) } } @@ -2027,7 +2036,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef) outOfOrderHeadQuerier, err := NewBlockChunkQuerier(rh, mint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open block chunk querier for ooo head %s", rh) + return nil, fmt.Errorf("open block chunk querier for ooo head %s: %w", rh, err) } blockQueriers = append(blockQueriers, outOfOrderHeadQuerier) @@ -2036,7 +2045,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer for _, b := range blocks { q, err := NewBlockChunkQuerier(b, mint, maxt) if err != nil { - return nil, errors.Wrapf(err, "open querier for block %s", b) + return nil, fmt.Errorf("open querier for block %s: %w", b, err) } blockQueriers = append(blockQueriers, q) } @@ -2105,7 +2114,7 @@ func (db *DB) CleanTombstones() (err error) { for _, pb := range db.Blocks() { uid, safeToDelete, cleanErr := pb.CleanTombstones(db.Dir(), db.compactor) if cleanErr != nil { - return errors.Wrapf(cleanErr, "clean tombstones: %s", pb.Dir()) + return fmt.Errorf("clean tombstones: %s: %w", pb.Dir(), cleanErr) } if !safeToDelete { // There was nothing to clean. @@ -2133,7 +2142,10 @@ func (db *DB) CleanTombstones() (err error) { level.Error(db.logger).Log("msg", "failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) } } - return errors.Wrap(err, "reload blocks") + if err != nil { + return fmt.Errorf("reload blocks: %w", err) + } + return nil } } return nil diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 4dcdef858..cb12f8630 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -34,7 +34,6 @@ import ( "github.com/go-kit/log" "github.com/oklog/ulid" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" @@ -355,7 +354,7 @@ func TestDBAppenderAddRef(t *testing.T) { // Missing labels & invalid refs should fail. _, err = app2.Append(9999999, labels.EmptyLabels(), 1, 1) - require.Equal(t, ErrInvalidSample, errors.Cause(err)) + require.ErrorIs(t, err, ErrInvalidSample) require.NoError(t, app2.Commit()) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 8eaf42653..805de70da 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -15,6 +15,7 @@ package tsdb import ( "context" + "errors" "sync" "unicode/utf8" @@ -363,7 +364,7 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp err := ce.validateExemplar(seriesLabels, e, true) if err != nil { - if err == storage.ErrDuplicateExemplar { + if errors.Is(err, storage.ErrDuplicateExemplar) { // Duplicate exemplar, noop. return nil } diff --git a/tsdb/head.go b/tsdb/head.go index 848357359..63d8e9ea1 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -15,6 +15,7 @@ package tsdb import ( "context" + "errors" "fmt" "io" "math" @@ -27,7 +28,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/oklog/ulid" - "github.com/pkg/errors" "go.uber.org/atomic" "github.com/prometheus/client_golang/prometheus" @@ -623,11 +623,11 @@ func (h *Head) Init(minValidTime int64) error { if h.wal != nil { _, endAt, err := wlog.Segments(h.wal.Dir()) if err != nil { - return errors.Wrap(err, "finding WAL segments") + return fmt.Errorf("finding WAL segments: %w", err) } _, idx, _, err := LastChunkSnapshot(h.opts.ChunkDirRoot) - if err != nil && err != record.ErrNotFound { + if err != nil && !errors.Is(err, record.ErrNotFound) { level.Error(h.logger).Log("msg", "Could not find last snapshot", "err", err) } @@ -674,7 +674,8 @@ func (h *Head) Init(minValidTime int64) error { if err != nil { // TODO(codesome): clear out all m-map chunks here for refSeries. level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) - if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok { + var cerr *chunks.CorruptionErr + if errors.As(err, &cerr) { h.metrics.mmapChunkCorruptionTotal.Inc() } @@ -701,14 +702,14 @@ func (h *Head) Init(minValidTime int64) error { checkpointReplayStart := time.Now() // Backfill the checkpoint first if it exists. dir, startFrom, err := wlog.LastCheckpoint(h.wal.Dir()) - if err != nil && err != record.ErrNotFound { - return errors.Wrap(err, "find last checkpoint") + if err != nil && !errors.Is(err, record.ErrNotFound) { + return fmt.Errorf("find last checkpoint: %w", err) } // Find the last segment. _, endAt, e := wlog.Segments(h.wal.Dir()) if e != nil { - return errors.Wrap(e, "finding WAL segments") + return fmt.Errorf("finding WAL segments: %w", e) } h.startWALReplayStatus(startFrom, endAt) @@ -717,7 +718,7 @@ func (h *Head) Init(minValidTime int64) error { if err == nil && startFrom >= snapIdx { sr, err := wlog.NewSegmentsReader(dir) if err != nil { - return errors.Wrap(err, "open checkpoint") + return fmt.Errorf("open checkpoint: %w", err) } defer func() { if err := sr.Close(); err != nil { @@ -728,7 +729,7 @@ func (h *Head) Init(minValidTime int64) error { // A corrupted checkpoint is a hard error for now and requires user // intervention. There's likely little data that can be recovered anyway. if err := h.loadWAL(wlog.NewReader(sr), multiRef, mmappedChunks, oooMmappedChunks); err != nil { - return errors.Wrap(err, "backfill checkpoint") + return fmt.Errorf("backfill checkpoint: %w", err) } h.updateWALReplayStatusRead(startFrom) startFrom++ @@ -745,7 +746,7 @@ func (h *Head) Init(minValidTime int64) error { for i := startFrom; i <= endAt; i++ { s, err := wlog.OpenReadSegment(wlog.SegmentName(h.wal.Dir(), i)) if err != nil { - return errors.Wrap(err, fmt.Sprintf("open WAL segment: %d", i)) + return fmt.Errorf("open WAL segment: %d: %w", i, err) } offset := 0 @@ -758,7 +759,7 @@ func (h *Head) Init(minValidTime int64) error { continue } if err != nil { - return errors.Wrapf(err, "segment reader (offset=%d)", offset) + return fmt.Errorf("segment reader (offset=%d): %w", offset, err) } err = h.loadWAL(wlog.NewReader(sr), multiRef, mmappedChunks, oooMmappedChunks) if err := sr.Close(); err != nil { @@ -777,14 +778,14 @@ func (h *Head) Init(minValidTime int64) error { // Replay WBL. startFrom, endAt, e = wlog.Segments(h.wbl.Dir()) if e != nil { - return &errLoadWbl{errors.Wrap(e, "finding WBL segments")} + return &errLoadWbl{fmt.Errorf("finding WBL segments: %w", e)} } h.startWALReplayStatus(startFrom, endAt) for i := startFrom; i <= endAt; i++ { s, err := wlog.OpenReadSegment(wlog.SegmentName(h.wbl.Dir(), i)) if err != nil { - return &errLoadWbl{errors.Wrap(err, fmt.Sprintf("open WBL segment: %d", i))} + return &errLoadWbl{fmt.Errorf("open WBL segment: %d: %w", i, err)} } sr := wlog.NewSegmentBufReader(s) @@ -905,7 +906,7 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) return nil }); err != nil { // secondLastRef because the lastRef caused an error. - return nil, nil, secondLastRef, errors.Wrap(err, "iterate on on-disk chunks") + return nil, nil, secondLastRef, fmt.Errorf("iterate on on-disk chunks: %w", err) } return mmappedChunks, oooMmappedChunks, lastRef, nil } @@ -1224,12 +1225,12 @@ func (h *Head) truncateWAL(mint int64) error { first, last, err := wlog.Segments(h.wal.Dir()) if err != nil { - return errors.Wrap(err, "get segment range") + return fmt.Errorf("get segment range: %w", err) } // Start a new segment, so low ingestion volume TSDB don't have more WAL than // needed. if _, err := h.wal.NextSegment(); err != nil { - return errors.Wrap(err, "next segment") + return fmt.Errorf("next segment: %w", err) } last-- // Never consider last segment for checkpoint. if last < 0 { @@ -1256,10 +1257,11 @@ func (h *Head) truncateWAL(mint int64) error { h.metrics.checkpointCreationTotal.Inc() if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, keep, mint); err != nil { h.metrics.checkpointCreationFail.Inc() - if _, ok := errors.Cause(err).(*wlog.CorruptionErr); ok { + var cerr *chunks.CorruptionErr + if errors.As(err, &cerr) { h.metrics.walCorruptionsTotal.Inc() } - return errors.Wrap(err, "create checkpoint") + return fmt.Errorf("create checkpoint: %w", err) } if err := h.wal.Truncate(last + 1); err != nil { // If truncating fails, we'll just try again at the next checkpoint. @@ -1352,7 +1354,7 @@ func (h *Head) truncateSeriesAndChunkDiskMapper(caller string) error { // Truncate the chunk m-mapper. if err := h.chunkDiskMapper.Truncate(uint32(minMmapFile)); err != nil { - return errors.Wrap(err, "truncate chunks.HeadReadWriter by file number") + return fmt.Errorf("truncate chunks.HeadReadWriter by file number: %w", err) } return nil } @@ -1467,13 +1469,13 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match p, err := PostingsForMatchers(ctx, ir, ms...) if err != nil { - return errors.Wrap(err, "select series") + return fmt.Errorf("select series: %w", err) } var stones []tombstones.Stone for p.Next() { if err := ctx.Err(); err != nil { - return errors.Wrap(err, "select series") + return fmt.Errorf("select series: %w", err) } series := h.series.getByID(chunks.HeadSeriesRef(p.At())) @@ -1495,8 +1497,8 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match if p.Err() != nil { return p.Err() } - if ctx.Err() != nil { - return errors.Wrap(err, "select series") + if err := ctx.Err(); err != nil { + return fmt.Errorf("select series: %w", err) } if h.wal != nil { diff --git a/tsdb/head_append.go b/tsdb/head_append.go index afb461afe..f509317c8 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -15,11 +15,11 @@ package tsdb import ( "context" + "errors" "fmt" "math" "github.com/go-kit/log/level" - "github.com/pkg/errors" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -358,10 +358,10 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } if err != nil { - switch err { - case storage.ErrOutOfOrderSample: + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() - case storage.ErrTooOldSample: + case errors.Is(err, storage.ErrTooOldSample): a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Inc() } return 0, err @@ -428,10 +428,10 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { // Ensure no empty labels have gotten through. lset = lset.WithoutEmpty() if lset.IsEmpty() { - return nil, errors.Wrap(ErrInvalidSample, "empty labelset") + return nil, fmt.Errorf("empty labelset: %w", ErrInvalidSample) } if l, dup := lset.HasDuplicateLabelNames(); dup { - return nil, errors.Wrap(ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, l)) + return nil, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) } var created bool var err error @@ -557,7 +557,7 @@ func (a *headAppender) AppendExemplar(ref storage.SeriesRef, lset labels.Labels, err := a.head.exemplars.ValidateExemplar(s.lset, e) if err != nil { - if err == storage.ErrDuplicateExemplar || err == storage.ErrExemplarsDisabled { + if errors.Is(err, storage.ErrDuplicateExemplar) || errors.Is(err, storage.ErrExemplarsDisabled) { // Duplicate, don't return an error but don't accept the exemplar. return 0, nil } @@ -596,11 +596,11 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels // Ensure no empty labels have gotten through. lset = lset.WithoutEmpty() if lset.IsEmpty() { - return 0, errors.Wrap(ErrInvalidSample, "empty labelset") + return 0, fmt.Errorf("empty labelset: %w", ErrInvalidSample) } if l, dup := lset.HasDuplicateLabelNames(); dup { - return 0, errors.Wrap(ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, l)) + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) } var created bool @@ -628,7 +628,7 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels s.Lock() if err := s.appendableHistogram(t, h); err != nil { s.Unlock() - if err == storage.ErrOutOfOrderSample { + if errors.Is(err, storage.ErrOutOfOrderSample) { a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err @@ -645,7 +645,7 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels s.Lock() if err := s.appendableFloatHistogram(t, fh); err != nil { s.Unlock() - if err == storage.ErrOutOfOrderSample { + if errors.Is(err, storage.ErrOutOfOrderSample) { a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err @@ -729,7 +729,7 @@ func (a *headAppender) log() error { buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log series") + return fmt.Errorf("log series: %w", err) } } if len(a.metadata) > 0 { @@ -737,7 +737,7 @@ func (a *headAppender) log() error { buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log metadata") + return fmt.Errorf("log metadata: %w", err) } } if len(a.samples) > 0 { @@ -745,21 +745,21 @@ func (a *headAppender) log() error { buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log samples") + return fmt.Errorf("log samples: %w", err) } } if len(a.histograms) > 0 { rec = enc.HistogramSamples(a.histograms, buf) buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log histograms") + return fmt.Errorf("log histograms: %w", err) } } if len(a.floatHistograms) > 0 { rec = enc.FloatHistogramSamples(a.floatHistograms, buf) buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log float histograms") + return fmt.Errorf("log float histograms: %w", err) } } // Exemplars should be logged after samples (float/native histogram/etc), @@ -771,7 +771,7 @@ func (a *headAppender) log() error { buf = rec[:0] if err := a.head.wal.Log(rec); err != nil { - return errors.Wrap(err, "log exemplars") + return fmt.Errorf("log exemplars: %w", err) } } return nil @@ -800,7 +800,7 @@ func (a *headAppender) Commit() (err error) { if err := a.log(); err != nil { _ = a.Rollback() // Most likely the same error will happen again. - return errors.Wrap(err, "write to WAL") + return fmt.Errorf("write to WAL: %w", err) } if a.head.writeNotified != nil { @@ -818,7 +818,7 @@ func (a *headAppender) Commit() (err error) { } // We don't instrument exemplar appends here, all is instrumented by storage. if err := a.head.exemplars.AddExemplar(s.lset, e.exemplar); err != nil { - if err == storage.ErrOutOfOrderExemplar { + if errors.Is(err, storage.ErrOutOfOrderExemplar) { continue } level.Debug(a.head.logger).Log("msg", "Unknown error while adding exemplar", "err", err) @@ -898,16 +898,16 @@ func (a *headAppender) Commit() (err error) { series.Lock() oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch err { - case nil: + switch { + case err == nil: // Do nothing. - case storage.ErrOutOfOrderSample: + case errors.Is(err, storage.ErrOutOfOrderSample): samplesAppended-- oooRejected++ - case storage.ErrOutOfBounds: + case errors.Is(err, storage.ErrOutOfBounds): samplesAppended-- oobRejected++ - case storage.ErrTooOldSample: + case errors.Is(err, storage.ErrTooOldSample): samplesAppended-- tooOldRejected++ default: @@ -1487,7 +1487,7 @@ func (s *memSeries) mmapChunks(chunkDiskMapper *chunks.ChunkDiskMapper) (count i } func handleChunkWriteError(err error) { - if err != nil && err != chunks.ErrChunkDiskMapperClosed { + if err != nil && !errors.Is(err, chunks.ErrChunkDiskMapperClosed) { panic(err) } } diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index 8fdf94db0..a03794810 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,10 +14,10 @@ package tsdb import ( + "errors" "strconv" "testing" - "github.com/pkg/errors" "github.com/stretchr/testify/require" "go.uber.org/atomic" diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 35ef26a58..362764480 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -15,11 +15,12 @@ package tsdb import ( "context" + "errors" + "fmt" "math" "sync" "github.com/go-kit/log/level" - "github.com/pkg/errors" "golang.org/x/exp/slices" "github.com/prometheus/prometheus/model/labels" @@ -133,7 +134,7 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { } } if err := p.Err(); err != nil { - return index.ErrPostings(errors.Wrap(err, "expand postings")) + return index.ErrPostings(fmt.Errorf("expand postings: %w", err)) } slices.SortFunc(series, func(a, b *memSeries) int { @@ -388,7 +389,8 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi if ix < len(s.mmappedChunks) { chk, err := chunkDiskMapper.Chunk(s.mmappedChunks[ix].ref) if err != nil { - if _, ok := err.(*chunks.CorruptionErr); ok { + var cerr *chunks.CorruptionErr + if errors.As(err, &cerr) { panic(err) } return nil, false, false, err @@ -516,14 +518,15 @@ func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMappe xor, err = s.ooo.oooHeadChunk.chunk.ToXORBetweenTimestamps(meta.OOOLastMinTime, meta.OOOLastMaxTime) } if err != nil { - return nil, errors.Wrap(err, "failed to convert ooo head chunk to xor chunk") + return nil, fmt.Errorf("failed to convert ooo head chunk to xor chunk: %w", err) } iterable = xor } else { chk, err := cdm.Chunk(c.ref) if err != nil { - if _, ok := err.(*chunks.CorruptionErr); ok { - return nil, errors.Wrap(err, "invalid ooo mmapped chunk") + var cerr *chunks.CorruptionErr + if errors.As(err, &cerr) { + return nil, fmt.Errorf("invalid ooo mmapped chunk: %w", err) } return nil, err } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 535647d3a..5c2749bed 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -30,7 +30,6 @@ import ( "testing" "time" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" @@ -2056,9 +2055,8 @@ func TestWalRepair_DecodingError(t *testing.T) { require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) initErr := h.Init(math.MinInt64) - err = errors.Cause(initErr) // So that we can pick up errors even if wrapped. - _, corrErr := err.(*wlog.CorruptionErr) - require.True(t, corrErr, "reading the wal didn't return corruption error") + var cerr *wlog.CorruptionErr + require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") require.NoError(t, h.Close()) // Head will close the wal as well. } @@ -2129,12 +2127,11 @@ func TestWblRepair_DecodingError(t *testing.T) { require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) initErr := h.Init(math.MinInt64) - _, ok := initErr.(*errLoadWbl) - require.True(t, ok) // Wbl errors are wrapped into errLoadWbl, make sure we can unwrap it. + var elb *errLoadWbl + require.ErrorAs(t, initErr, &elb) // Wbl errors are wrapped into errLoadWbl, make sure we can unwrap it. - err = errors.Cause(initErr) // So that we can pick up errors even if wrapped. - _, corrErr := err.(*wlog.CorruptionErr) - require.True(t, corrErr, "reading the wal didn't return corruption error") + var cerr *wlog.CorruptionErr + require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") require.NoError(t, h.Close()) // Head will close the wal as well. } diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 07fa8280c..a492a85a0 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -14,6 +14,7 @@ package tsdb import ( + "errors" "fmt" "math" "os" @@ -24,7 +25,6 @@ import ( "time" "github.com/go-kit/log/level" - "github.com/pkg/errors" "go.uber.org/atomic" "github.com/prometheus/prometheus/model/exemplar" @@ -128,7 +128,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. // At the moment the only possible error here is out of order exemplars, which we shouldn't see when // replaying the WAL, so lets just log the error if it's not that type. err = h.exemplars.AddExemplar(ms.lset, exemplar.Exemplar{Ts: e.T, Value: e.V, Labels: e.Labels}) - if err != nil && err == storage.ErrOutOfOrderExemplar { + if err != nil && errors.Is(err, storage.ErrOutOfOrderExemplar) { level.Warn(h.logger).Log("msg", "Unexpected error when replaying WAL on exemplar record", "err", err) } } @@ -145,7 +145,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. series, err = dec.Series(rec, series) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode series"), + Err: fmt.Errorf("decode series: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -157,7 +157,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. samples, err = dec.Samples(rec, samples) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode samples"), + Err: fmt.Errorf("decode samples: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -169,7 +169,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. tstones, err = dec.Tombstones(rec, tstones) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode tombstones"), + Err: fmt.Errorf("decode tombstones: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -181,7 +181,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. exemplars, err = dec.Exemplars(rec, exemplars) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode exemplars"), + Err: fmt.Errorf("decode exemplars: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -193,7 +193,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. hists, err = dec.HistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode histograms"), + Err: fmt.Errorf("decode histograms: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -205,7 +205,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. hists, err = dec.FloatHistogramSamples(rec, hists) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode float histograms"), + Err: fmt.Errorf("decode float histograms: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -217,7 +217,7 @@ func (h *Head) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. meta, err := dec.Metadata(rec, meta) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode metadata"), + Err: fmt.Errorf("decode metadata: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -416,8 +416,8 @@ Outer: close(exemplarsInput) wg.Wait() - if r.Err() != nil { - return errors.Wrap(r.Err(), "read records") + if err := r.Err(); err != nil { + return fmt.Errorf("read records: %w", err) } if unknownRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load() > 0 { @@ -708,7 +708,7 @@ func (h *Head) loadWBL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. samples, err = dec.Samples(rec, samples) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode samples"), + Err: fmt.Errorf("decode samples: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -720,7 +720,7 @@ func (h *Head) loadWBL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. markers, err = dec.MmapMarkers(rec, markers) if err != nil { decodeErr = &wlog.CorruptionErr{ - Err: errors.Wrap(err, "decode mmap markers"), + Err: fmt.Errorf("decode mmap markers: %w", err), Segment: r.Segment(), Offset: r.Offset(), } @@ -806,8 +806,8 @@ func (h *Head) loadWBL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks. } wg.Wait() - if r.Err() != nil { - return errors.Wrap(r.Err(), "read records") + if err := r.Err(); err != nil { + return fmt.Errorf("read records: %w", err) } if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 { @@ -995,7 +995,7 @@ func decodeSeriesFromChunkSnapshot(d *record.Decoder, b []byte) (csr chunkSnapsh chk, err := chunkenc.FromData(enc, chunkBytesCopy) if err != nil { - return csr, errors.Wrap(err, "chunk from data") + return csr, fmt.Errorf("chunk from data: %w", err) } csr.mc.chunk = chk @@ -1030,7 +1030,7 @@ func encodeTombstonesToSnapshotRecord(tr tombstones.Reader) ([]byte, error) { buf.PutByte(chunkSnapshotRecordTypeTombstones) b, err := tombstones.Encode(tr) if err != nil { - return nil, errors.Wrap(err, "encode tombstones") + return nil, fmt.Errorf("encode tombstones: %w", err) } buf.PutUvarintBytes(b) @@ -1045,7 +1045,10 @@ func decodeTombstonesSnapshotRecord(b []byte) (tombstones.Reader, error) { } tr, err := tombstones.Decode(dec.UvarintBytes()) - return tr, errors.Wrap(err, "decode tombstones") + if err != nil { + return tr, fmt.Errorf("decode tombstones: %w", err) + } + return tr, nil } const chunkSnapshotPrefix = "chunk_snapshot." @@ -1072,13 +1075,13 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { stats := &ChunkSnapshotStats{} wlast, woffset, err := h.wal.LastSegmentAndOffset() - if err != nil && err != record.ErrNotFound { - return stats, errors.Wrap(err, "get last wal segment and offset") + if err != nil && !errors.Is(err, record.ErrNotFound) { + return stats, fmt.Errorf("get last wal segment and offset: %w", err) } _, cslast, csoffset, err := LastChunkSnapshot(h.opts.ChunkDirRoot) - if err != nil && err != record.ErrNotFound { - return stats, errors.Wrap(err, "find last chunk snapshot") + if err != nil && !errors.Is(err, record.ErrNotFound) { + return stats, fmt.Errorf("find last chunk snapshot: %w", err) } if wlast == cslast && woffset == csoffset { @@ -1093,11 +1096,11 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { stats.Dir = cpdir if err := os.MkdirAll(cpdirtmp, 0o777); err != nil { - return stats, errors.Wrap(err, "create chunk snapshot dir") + return stats, fmt.Errorf("create chunk snapshot dir: %w", err) } cp, err := wlog.New(nil, nil, cpdirtmp, h.wal.CompressionType()) if err != nil { - return stats, errors.Wrap(err, "open chunk snapshot") + return stats, fmt.Errorf("open chunk snapshot: %w", err) } // Ensures that an early return caused by an error doesn't leave any tmp files. @@ -1126,7 +1129,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { if len(buf) > 10*1024*1024 { if err := cp.Log(recs...); err != nil { h.series.locks[i].RUnlock() - return stats, errors.Wrap(err, "flush records") + return stats, fmt.Errorf("flush records: %w", err) } buf, recs = buf[:0], recs[:0] } @@ -1139,16 +1142,16 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { // Add tombstones to the snapshot. tombstonesReader, err := h.Tombstones() if err != nil { - return stats, errors.Wrap(err, "get tombstones") + return stats, fmt.Errorf("get tombstones: %w", err) } rec, err := encodeTombstonesToSnapshotRecord(tombstonesReader) if err != nil { - return stats, errors.Wrap(err, "encode tombstones") + return stats, fmt.Errorf("encode tombstones: %w", err) } recs = append(recs, rec) // Flush remaining series records and tombstones. if err := cp.Log(recs...); err != nil { - return stats, errors.Wrap(err, "flush records") + return stats, fmt.Errorf("flush records: %w", err) } buf = buf[:0] @@ -1167,7 +1170,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { encbuf.PutByte(chunkSnapshotRecordTypeExemplars) enc.EncodeExemplarsIntoBuffer(batch, &encbuf) if err := cp.Log(encbuf.Get()); err != nil { - return errors.Wrap(err, "log exemplars") + return fmt.Errorf("log exemplars: %w", err) } buf, batch = buf[:0], batch[:0] return nil @@ -1175,7 +1178,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { err = h.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error { if len(batch) >= maxExemplarsPerRecord { if err := flushExemplars(); err != nil { - return errors.Wrap(err, "flush exemplars") + return fmt.Errorf("flush exemplars: %w", err) } } @@ -1193,19 +1196,19 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { return nil }) if err != nil { - return stats, errors.Wrap(err, "iterate exemplars") + return stats, fmt.Errorf("iterate exemplars: %w", err) } // Flush remaining exemplars. if err := flushExemplars(); err != nil { - return stats, errors.Wrap(err, "flush exemplars at the end") + return stats, fmt.Errorf("flush exemplars at the end: %w", err) } if err := cp.Close(); err != nil { - return stats, errors.Wrap(err, "close chunk snapshot") + return stats, fmt.Errorf("close chunk snapshot: %w", err) } if err := fileutil.Replace(cpdirtmp, cpdir); err != nil { - return stats, errors.Wrap(err, "rename chunk snapshot directory") + return stats, fmt.Errorf("rename chunk snapshot directory: %w", err) } if err := DeleteChunkSnapshots(h.opts.ChunkDirRoot, wlast, woffset); err != nil { @@ -1229,7 +1232,10 @@ func (h *Head) performChunkSnapshot() error { if err == nil { level.Info(h.logger).Log("msg", "chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) } - return errors.Wrap(err, "chunk snapshot") + if err != nil { + return fmt.Errorf("chunk snapshot: %w", err) + } + return nil } // ChunkSnapshotStats returns stats about a created chunk snapshot. @@ -1327,16 +1333,16 @@ func DeleteChunkSnapshots(dir string, maxIndex, maxOffset int) error { func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSeries, error) { dir, snapIdx, snapOffset, err := LastChunkSnapshot(h.opts.ChunkDirRoot) if err != nil { - if err == record.ErrNotFound { + if errors.Is(err, record.ErrNotFound) { return snapIdx, snapOffset, nil, nil } - return snapIdx, snapOffset, nil, errors.Wrap(err, "find last chunk snapshot") + return snapIdx, snapOffset, nil, fmt.Errorf("find last chunk snapshot: %w", err) } start := time.Now() sr, err := wlog.NewSegmentsReader(dir) if err != nil { - return snapIdx, snapOffset, nil, errors.Wrap(err, "open chunk snapshot") + return snapIdx, snapOffset, nil, fmt.Errorf("open chunk snapshot: %w", err) } defer func() { if err := sr.Close(); err != nil { @@ -1424,7 +1430,7 @@ Outer: numSeries++ csr, err := decodeSeriesFromChunkSnapshot(&dec, rec) if err != nil { - loopErr = errors.Wrap(err, "decode series record") + loopErr = fmt.Errorf("decode series record: %w", err) break Outer } recordChan <- csr @@ -1432,7 +1438,7 @@ Outer: case chunkSnapshotRecordTypeTombstones: tr, err := decodeTombstonesSnapshotRecord(rec) if err != nil { - loopErr = errors.Wrap(err, "decode tombstones") + loopErr = fmt.Errorf("decode tombstones: %w", err) break Outer } @@ -1440,7 +1446,7 @@ Outer: h.tombstones.AddInterval(ref, ivs...) return nil }); err != nil { - loopErr = errors.Wrap(err, "iterate tombstones") + loopErr = fmt.Errorf("iterate tombstones: %w", err) break Outer } @@ -1468,7 +1474,7 @@ Outer: exemplarBuf = exemplarBuf[:0] exemplarBuf, err = dec.ExemplarsFromBuffer(&decbuf, exemplarBuf) if err != nil { - loopErr = errors.Wrap(err, "exemplars from buffer") + loopErr = fmt.Errorf("exemplars from buffer: %w", err) break Outer } @@ -1484,7 +1490,7 @@ Outer: Value: e.V, Ts: e.T, }); err != nil { - loopErr = errors.Wrap(err, "add exemplar") + loopErr = fmt.Errorf("add exemplar: %w", err) break Outer } } @@ -1502,16 +1508,19 @@ Outer: } close(errChan) - merr := tsdb_errors.NewMulti(errors.Wrap(loopErr, "decode loop")) + merr := tsdb_errors.NewMulti() + if loopErr != nil { + merr.Add(fmt.Errorf("decode loop: %w", loopErr)) + } for err := range errChan { - merr.Add(errors.Wrap(err, "record processing")) + merr.Add(fmt.Errorf("record processing: %w", err)) } if err := merr.Err(); err != nil { return -1, -1, nil, err } - if r.Err() != nil { - return -1, -1, nil, errors.Wrap(r.Err(), "read records") + if err := r.Err(); err != nil { + return -1, -1, nil, fmt.Errorf("read records: %w", err) } if len(refSeries) == 0 { diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 44ee66386..2b025a352 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -425,7 +425,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... // We add padding to 16 bytes to increase the addressable space we get through 4 byte // series references. if err := w.addPadding(16); err != nil { - return fmt.Errorf("failed to write padding bytes: %v", err) + return fmt.Errorf("failed to write padding bytes: %w", err) } if w.f.pos%16 != 0 { @@ -442,7 +442,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... if !ok { nameIndex, err = w.symbols.ReverseLookup(l.Name) if err != nil { - return fmt.Errorf("symbol entry for %q does not exist, %v", l.Name, err) + return fmt.Errorf("symbol entry for %q does not exist, %w", l.Name, err) } } w.labelNames[l.Name]++ @@ -452,7 +452,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... if !ok || cacheEntry.lastValue != l.Value { valueIndex, err = w.symbols.ReverseLookup(l.Value) if err != nil { - return fmt.Errorf("symbol entry for %q does not exist, %v", l.Value, err) + return fmt.Errorf("symbol entry for %q does not exist, %w", l.Value, err) } w.symbolCache[l.Name] = symbolCacheEntry{ index: nameIndex, diff --git a/tsdb/mocks_test.go b/tsdb/mocks_test.go index 268017caa..d7c2b0a4f 100644 --- a/tsdb/mocks_test.go +++ b/tsdb/mocks_test.go @@ -14,7 +14,7 @@ package tsdb import ( - "github.com/pkg/errors" + "fmt" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -41,7 +41,7 @@ func (m *mockIndexWriter) AddSeries(_ storage.SeriesRef, l labels.Labels, chks . for i, chk := range chks { c, err := copyChunk(chk.Chunk) if err != nil { - return errors.Wrap(err, "mockIndexWriter: copy chunk") + return fmt.Errorf("mockIndexWriter: copy chunk: %w", err) } chksNew[i] = chunks.Meta{MaxTime: chk.MaxTime, MinTime: chk.MinTime, Chunk: c} } diff --git a/tsdb/querier.go b/tsdb/querier.go index 6584d7da0..f88e4415e 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -15,13 +15,13 @@ package tsdb import ( "context" + "errors" "fmt" "math" "strings" "unicode/utf8" "github.com/oklog/ulid" - "github.com/pkg/errors" "golang.org/x/exp/slices" "github.com/prometheus/prometheus/model/histogram" @@ -63,18 +63,18 @@ type blockBaseQuerier struct { func newBlockBaseQuerier(b BlockReader, mint, maxt int64) (*blockBaseQuerier, error) { indexr, err := b.Index() if err != nil { - return nil, errors.Wrap(err, "open index reader") + return nil, fmt.Errorf("open index reader: %w", err) } chunkr, err := b.Chunks() if err != nil { indexr.Close() - return nil, errors.Wrap(err, "open chunk reader") + return nil, fmt.Errorf("open chunk reader: %w", err) } tombsr, err := b.Tombstones() if err != nil { indexr.Close() chunkr.Close() - return nil, errors.Wrap(err, "open tombstone reader") + return nil, fmt.Errorf("open tombstone reader: %w", err) } if tombsr == nil { @@ -442,12 +442,12 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, matchers ...*labels.Matcher) ([]string, error) { p, err := PostingsForMatchers(ctx, r, matchers...) if err != nil { - return nil, errors.Wrap(err, "fetching postings for matchers") + return nil, fmt.Errorf("fetching postings for matchers: %w", err) } allValues, err := r.LabelValues(ctx, name) if err != nil { - return nil, errors.Wrapf(err, "fetching values of label %s", name) + return nil, fmt.Errorf("fetching values of label %s: %w", name, err) } // If we have a matcher for the label name, we can filter out values that don't match @@ -473,12 +473,12 @@ func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, ma for i, value := range allValues { valuesPostings[i], err = r.Postings(ctx, name, value) if err != nil { - return nil, errors.Wrapf(err, "fetching postings for %s=%q", name, value) + return nil, fmt.Errorf("fetching postings for %s=%q: %w", name, value, err) } } indexes, err := index.FindIntersectingPostings(p, valuesPostings) if err != nil { - return nil, errors.Wrap(err, "intersecting postings") + return nil, fmt.Errorf("intersecting postings: %w", err) } values := make([]string, 0, len(indexes)) @@ -499,8 +499,8 @@ func labelNamesWithMatchers(ctx context.Context, r IndexReader, matchers ...*lab for p.Next() { postings = append(postings, p.At()) } - if p.Err() != nil { - return nil, errors.Wrapf(p.Err(), "postings for label names with matchers") + if err := p.Err(); err != nil { + return nil, fmt.Errorf("postings for label names with matchers: %w", err) } return r.LabelNamesFor(ctx, postings...) @@ -539,10 +539,10 @@ func (b *blockBaseSeriesSet) Next() bool { for b.p.Next() { if err := b.index.Series(b.p.At(), &b.builder, &b.bufChks); err != nil { // Postings may be stale. Skip if no underlying series exists. - if errors.Cause(err) == storage.ErrNotFound { + if errors.Is(err, storage.ErrNotFound) { continue } - b.err = errors.Wrapf(err, "get series %d", b.p.At()) + b.err = fmt.Errorf("get series %d: %w", b.p.At(), err) return false } @@ -552,7 +552,7 @@ func (b *blockBaseSeriesSet) Next() bool { intervals, err := b.tombstones.Get(b.p.At()) if err != nil { - b.err = errors.Wrap(err, "get tombstones") + b.err = fmt.Errorf("get tombstones: %w", err) return false } @@ -702,7 +702,7 @@ func (p *populateWithDelGenericSeriesIterator) next(copyHeadChunk bool) bool { } if p.err != nil { - p.err = errors.Wrapf(p.err, "cannot populate chunk %d from block %s", p.currMeta.Ref, p.blockID.String()) + p.err = fmt.Errorf("cannot populate chunk %d from block %s: %w", p.currMeta.Ref, p.blockID.String(), p.err) return false } @@ -900,7 +900,7 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool { valueType := p.currDelIter.Next() if valueType == chunkenc.ValNone { if err := p.currDelIter.Err(); err != nil { - p.err = errors.Wrap(err, "iterate chunk while re-encoding") + p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) } return false } @@ -968,11 +968,11 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool { } if err != nil { - p.err = errors.Wrap(err, "iterate chunk while re-encoding") + p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) return false } if err := p.currDelIter.Err(); err != nil { - p.err = errors.Wrap(err, "iterate chunk while re-encoding") + p.err = fmt.Errorf("iterate chunk while re-encoding: %w", err) return false } @@ -991,7 +991,7 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { firstValueType := p.currDelIter.Next() if firstValueType == chunkenc.ValNone { if err := p.currDelIter.Err(); err != nil { - p.err = errors.Wrap(err, "populateChunksFromIterable: no samples could be read") + p.err = fmt.Errorf("populateChunksFromIterable: no samples could be read: %w", err) return false } return false @@ -1075,11 +1075,11 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { } if err != nil { - p.err = errors.Wrap(err, "populateChunksFromIterable: error when writing new chunks") + p.err = fmt.Errorf("populateChunksFromIterable: error when writing new chunks: %w", err) return false } if err = p.currDelIter.Err(); err != nil { - p.err = errors.Wrap(err, "populateChunksFromIterable: currDelIter error when writing new chunks") + p.err = fmt.Errorf("populateChunksFromIterable: currDelIter error when writing new chunks: %w", err) return false } diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 64e16392d..18d81b85b 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -15,6 +15,7 @@ package tsdb import ( "context" + "errors" "fmt" "math" "math/rand" @@ -26,7 +27,6 @@ import ( "time" "github.com/oklog/ulid" - "github.com/pkg/errors" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -2317,7 +2317,7 @@ func (m mockIndex) Postings(ctx context.Context, name string, values ...string) func (m mockIndex) SortedPostings(p index.Postings) index.Postings { ep, err := index.ExpandPostings(p) if err != nil { - return index.ErrPostings(errors.Wrap(err, "expand postings")) + return index.ErrPostings(fmt.Errorf("expand postings: %w", err)) } sort.Slice(ep, func(i, j int) bool { diff --git a/tsdb/repair.go b/tsdb/repair.go index 081116454..9d2c5738d 100644 --- a/tsdb/repair.go +++ b/tsdb/repair.go @@ -22,7 +22,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/pkg/errors" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/fileutil" @@ -35,7 +34,7 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { // We must actually set the index file version to 2 and revert the meta.json version back to 1. dirs, err := blockDirs(dir) if err != nil { - return errors.Wrapf(err, "list block dirs in %q", dir) + return fmt.Errorf("list block dirs in %q: %w", dir, err) } tmpFiles := make([]string, 0, len(dirs)) @@ -71,44 +70,54 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { repl, err := os.Create(filepath.Join(d, "index.repaired")) if err != nil { - return errors.Wrapf(err, "create index.repaired for block dir: %v", d) + return fmt.Errorf("create index.repaired for block dir: %v: %w", d, err) } tmpFiles = append(tmpFiles, repl.Name()) broken, err := os.Open(filepath.Join(d, indexFilename)) if err != nil { - return errors.Wrapf(err, "open broken index for block dir: %v", d) + return fmt.Errorf("open broken index for block dir: %v: %w", d, err) } if _, err := io.Copy(repl, broken); err != nil { - return errors.Wrapf(err, "copy content of index to index.repaired for block dir: %v", d) + return fmt.Errorf("copy content of index to index.repaired for block dir: %v: %w", d, err) } // Set the 5th byte to 2 to indicate the correct file format version. if _, err := repl.WriteAt([]byte{2}, 4); err != nil { - return tsdb_errors.NewMulti( - errors.Wrapf(err, "rewrite of index.repaired for block dir: %v", d), - errors.Wrap(repl.Close(), "close"), - ).Err() + errs := tsdb_errors.NewMulti( + fmt.Errorf("rewrite of index.repaired for block dir: %v: %w", d, err)) + if err := repl.Close(); err != nil { + errs.Add(fmt.Errorf("close: %w", err)) + } + return errs.Err() } if err := repl.Sync(); err != nil { - return tsdb_errors.NewMulti( - errors.Wrapf(err, "sync of index.repaired for block dir: %v", d), - errors.Wrap(repl.Close(), "close"), - ).Err() + errs := tsdb_errors.NewMulti( + fmt.Errorf("sync of index.repaired for block dir: %v: %w", d, err)) + if err := repl.Close(); err != nil { + errs.Add(fmt.Errorf("close: %w", err)) + } + return errs.Err() } if err := repl.Close(); err != nil { - return errors.Wrapf(repl.Close(), "close repaired index for block dir: %v", d) + return fmt.Errorf("close repaired index for block dir: %v: %w", d, err) } if err := broken.Close(); err != nil { - return errors.Wrapf(repl.Close(), "close broken index for block dir: %v", d) + if err := repl.Close(); err != nil { + return fmt.Errorf("close broken index for block dir: %v: %w", d, err) + } } if err := fileutil.Replace(repl.Name(), broken.Name()); err != nil { - return errors.Wrapf(repl.Close(), "replaced broken index with index.repaired for block dir: %v", d) + if err := repl.Close(); err != nil { + return fmt.Errorf("replaced broken index with index.repaired for block dir: %v: %w", d, err) + } } // Reset version of meta.json to 1. meta.Version = metaVersion1 if _, err := writeMetaFile(logger, d, meta); err != nil { - return errors.Wrapf(repl.Close(), "write meta for block dir: %v", d) + if err := repl.Close(); err != nil { + return fmt.Errorf("write meta for block dir: %v: %w", d, err) + } } } return nil diff --git a/tsdb/wal.go b/tsdb/wal.go index bc7db35bf..1509c9cd9 100644 --- a/tsdb/wal.go +++ b/tsdb/wal.go @@ -16,6 +16,7 @@ package tsdb import ( "bufio" "encoding/binary" + "errors" "fmt" "hash" "hash/crc32" @@ -28,7 +29,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/storage" @@ -210,7 +210,7 @@ func OpenSegmentWAL(dir string, logger log.Logger, flushInterval time.Duration, for _, fn := range fns[i:] { if err := os.Remove(fn); err != nil { - return w, errors.Wrap(err, "removing segment failed") + return w, fmt.Errorf("removing segment failed: %w", err) } } break @@ -237,8 +237,8 @@ func (r *repairingWALReader) Read( if err == nil { return nil } - cerr, ok := errors.Cause(err).(walCorruptionErr) - if !ok { + var cerr *walCorruptionErr + if !errors.As(err, &cerr) { return err } r.wal.metrics.corruptions.Inc() @@ -309,7 +309,7 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(chunks.HeadSeriesRef) bool) // Past WAL files are closed. We have to reopen them for another read. f, err := w.openSegmentFile(sf.Name()) if err != nil { - return errors.Wrap(err, "open old WAL segment for read") + return fmt.Errorf("open old WAL segment for read: %w", err) } candidates = append(candidates, &segmentFile{ File: f, @@ -326,7 +326,7 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(chunks.HeadSeriesRef) bool) // Create a new tmp file. f, err := w.createSegmentFile(filepath.Join(w.dirFile.Name(), "compact.tmp")) if err != nil { - return errors.Wrap(err, "create compaction segment") + return fmt.Errorf("create compaction segment: %w", err) } defer func() { if err := os.RemoveAll(f.Name()); err != nil { @@ -352,7 +352,7 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(chunks.HeadSeriesRef) bool) err := r.decodeSeries(flag, byt, &decSeries) if err != nil { - return errors.Wrap(err, "decode samples while truncating") + return fmt.Errorf("decode samples while truncating: %w", err) } for _, s := range decSeries { if keep(s.Ref) { @@ -367,11 +367,11 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(chunks.HeadSeriesRef) bool) w.putBuffer(buf) if err != nil { - return errors.Wrap(err, "write to compaction segment") + return fmt.Errorf("write to compaction segment: %w", err) } } - if r.Err() != nil { - return errors.Wrap(r.Err(), "read candidate WAL files") + if err := r.Err(); err != nil { + return fmt.Errorf("read candidate WAL files: %w", err) } off, err := csf.Seek(0, io.SeekCurrent) @@ -390,12 +390,12 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(chunks.HeadSeriesRef) bool) _ = candidates[0].Close() // need close before remove on platform windows if err := fileutil.Replace(csf.Name(), candidates[0].Name()); err != nil { - return errors.Wrap(err, "rename compaction segment") + return fmt.Errorf("rename compaction segment: %w", err) } for _, f := range candidates[1:] { f.Close() // need close before remove on platform windows if err := os.RemoveAll(f.Name()); err != nil { - return errors.Wrap(err, "delete WAL segment file") + return fmt.Errorf("delete WAL segment file: %w", err) } } if err := w.dirFile.Sync(); err != nil { @@ -435,7 +435,7 @@ func (w *SegmentWAL) LogSeries(series []record.RefSeries) error { w.putBuffer(buf) if err != nil { - return errors.Wrap(err, "log series") + return fmt.Errorf("log series: %w", err) } tf := w.head() @@ -462,7 +462,7 @@ func (w *SegmentWAL) LogSamples(samples []record.RefSample) error { w.putBuffer(buf) if err != nil { - return errors.Wrap(err, "log series") + return fmt.Errorf("log series: %w", err) } tf := w.head() @@ -488,7 +488,7 @@ func (w *SegmentWAL) LogDeletes(stones []tombstones.Stone) error { w.putBuffer(buf) if err != nil { - return errors.Wrap(err, "log series") + return fmt.Errorf("log series: %w", err) } tf := w.head() @@ -523,7 +523,7 @@ func (w *SegmentWAL) openSegmentFile(name string) (*os.File, error) { switch n, err := f.Read(metab); { case err != nil: - return nil, errors.Wrapf(err, "validate meta %q", f.Name()) + return nil, fmt.Errorf("validate meta %q: %w", f.Name(), err) case n != 8: return nil, fmt.Errorf("invalid header size %d in %q", n, f.Name()) } @@ -573,16 +573,16 @@ func (w *SegmentWAL) cut() error { w.actorc <- func() error { off, err := hf.Seek(0, io.SeekCurrent) if err != nil { - return errors.Wrapf(err, "finish old segment %s", hf.Name()) + return fmt.Errorf("finish old segment %s: %w", hf.Name(), err) } if err := hf.Truncate(off); err != nil { - return errors.Wrapf(err, "finish old segment %s", hf.Name()) + return fmt.Errorf("finish old segment %s: %w", hf.Name(), err) } if err := hf.Sync(); err != nil { - return errors.Wrapf(err, "finish old segment %s", hf.Name()) + return fmt.Errorf("finish old segment %s: %w", hf.Name(), err) } if err := hf.Close(); err != nil { - return errors.Wrapf(err, "finish old segment %s", hf.Name()) + return fmt.Errorf("finish old segment %s: %w", hf.Name(), err) } return nil } @@ -600,7 +600,10 @@ func (w *SegmentWAL) cut() error { go func() { w.actorc <- func() error { - return errors.Wrap(w.dirFile.Sync(), "sync WAL directory") + if err := w.dirFile.Sync(); err != nil { + return fmt.Errorf("sync WAL directory: %w", err) + } + return nil } }() @@ -635,7 +638,7 @@ func (w *SegmentWAL) Sync() error { head = w.head() }() if err != nil { - return errors.Wrap(err, "flush buffer") + return fmt.Errorf("flush buffer: %w", err) } if head != nil { // But only fsync the head segment after releasing the mutex as it will block on disk I/O. @@ -726,11 +729,13 @@ func (w *SegmentWAL) Close() error { // only the current segment will still be open. if hf := w.head(); hf != nil { if err := hf.Close(); err != nil { - return errors.Wrapf(err, "closing WAL head %s", hf.Name()) + return fmt.Errorf("closing WAL head %s: %w", hf.Name(), err) } } - - return errors.Wrapf(w.dirFile.Close(), "closing WAL dir %s", w.dirFile.Name()) + if err := w.dirFile.Close(); err != nil { + return fmt.Errorf("closing WAL dir %s: %w", w.dirFile.Name(), err) + } + return nil } func (w *SegmentWAL) write(t WALEntryType, flag uint8, buf []byte) error { @@ -921,7 +926,7 @@ func (r *walReader) Read( err = r.decodeSeries(flag, b, &series) if err != nil { - err = errors.Wrap(err, "decode series entry") + err = fmt.Errorf("decode series entry: %w", err) break } datac <- series @@ -940,7 +945,7 @@ func (r *walReader) Read( err = r.decodeSamples(flag, b, &samples) if err != nil { - err = errors.Wrap(err, "decode samples entry") + err = fmt.Errorf("decode samples entry: %w", err) break } datac <- samples @@ -960,7 +965,7 @@ func (r *walReader) Read( err = r.decodeDeletes(flag, b, &deletes) if err != nil { - err = errors.Wrap(err, "decode delete entry") + err = fmt.Errorf("decode delete entry: %w", err) break } datac <- deletes @@ -982,8 +987,8 @@ func (r *walReader) Read( if err != nil { return err } - if r.Err() != nil { - return errors.Wrap(r.Err(), "read entry") + if err := r.Err(); err != nil { + return fmt.Errorf("read entry: %w", err) } return nil } @@ -1046,12 +1051,16 @@ type walCorruptionErr struct { lastOffset int64 } -func (e walCorruptionErr) Error() string { +func (e *walCorruptionErr) Error() string { return fmt.Sprintf("%s ", e.err, e.file, e.lastOffset) } +func (e *walCorruptionErr) Unwrap() error { + return e.err +} + func (r *walReader) corruptionErr(s string, args ...interface{}) error { - return walCorruptionErr{ + return &walCorruptionErr{ err: fmt.Errorf(s, args...), file: r.cur, lastOffset: r.lastOffset, @@ -1152,8 +1161,8 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample) }) } - if dec.Err() != nil { - return errors.Wrapf(dec.Err(), "decode error after %d samples", len(*res)) + if err := dec.Err(); err != nil { + return fmt.Errorf("decode error after %d samples: %w", len(*res), err) } if len(dec.B) > 0 { return fmt.Errorf("unexpected %d bytes left in entry", len(dec.B)) @@ -1185,7 +1194,7 @@ func deprecatedWALExists(logger log.Logger, dir string) (bool, error) { // Detect whether we still have the old WAL. fns, err := sequenceFiles(dir) if err != nil && !os.IsNotExist(err) { - return false, errors.Wrap(err, "list sequence files") + return false, fmt.Errorf("list sequence files: %w", err) } if len(fns) == 0 { return false, nil // No WAL at all yet. @@ -1194,13 +1203,13 @@ func deprecatedWALExists(logger log.Logger, dir string) (bool, error) { // old WAL. f, err := os.Open(fns[0]) if err != nil { - return false, errors.Wrap(err, "check first existing segment") + return false, fmt.Errorf("check first existing segment: %w", err) } defer f.Close() var hdr [4]byte - if _, err := f.Read(hdr[:]); err != nil && err != io.EOF { - return false, errors.Wrap(err, "read header from first segment") + if _, err := f.Read(hdr[:]); err != nil && !errors.Is(err, io.EOF) { + return false, fmt.Errorf("read header from first segment: %w", err) } // If we cannot read the magic header for segments of the old WAL, abort. // Either it's migrated already or there's a corruption issue with which @@ -1223,11 +1232,11 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { tmpdir := dir + ".tmp" if err := os.RemoveAll(tmpdir); err != nil { - return errors.Wrap(err, "cleanup replacement dir") + return fmt.Errorf("cleanup replacement dir: %w", err) } repl, err := wlog.New(logger, nil, tmpdir, wlog.CompressionNone) if err != nil { - return errors.Wrap(err, "open new WAL") + return fmt.Errorf("open new WAL: %w", err) } // It should've already been closed as part of the previous finalization. @@ -1240,7 +1249,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { w, err := OpenSegmentWAL(dir, logger, time.Minute, nil) if err != nil { - return errors.Wrap(err, "open old WAL") + return fmt.Errorf("open old WAL: %w", err) } defer w.Close() @@ -1271,22 +1280,22 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { }, ) if decErr != nil { - return errors.Wrap(err, "decode old entries") + return fmt.Errorf("decode old entries: %w", err) } if err != nil { - return errors.Wrap(err, "write new entries") + return fmt.Errorf("write new entries: %w", err) } // We explicitly close even when there is a defer for Windows to be // able to delete it. The defer is in place to close it in-case there // are errors above. if err := w.Close(); err != nil { - return errors.Wrap(err, "close old WAL") + return fmt.Errorf("close old WAL: %w", err) } if err := repl.Close(); err != nil { - return errors.Wrap(err, "close new WAL") + return fmt.Errorf("close new WAL: %w", err) } if err := fileutil.Replace(tmpdir, dir); err != nil { - return errors.Wrap(err, "replace old WAL") + return fmt.Errorf("replace old WAL: %w", err) } return nil } diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index c3ae001d9..fdea75694 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -116,6 +116,10 @@ func (e *CorruptionErr) Error() string { return fmt.Sprintf("corruption in segment %s at %d: %s", SegmentName(e.Dir, e.Segment), e.Offset, e.Err) } +func (e *CorruptionErr) Unwrap() error { + return e.Err +} + // OpenWriteSegment opens segment k in dir. The returned segment is ready for new appends. func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { segName := SegmentName(dir, k) From 108a749a459e346de0d76c109f9cded21d4b0932 Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Mon, 11 Dec 2023 13:12:43 +0000 Subject: [PATCH 25/52] Set up labels for counters in advance Signed-off-by: Paulin Todev --- discovery/kubernetes/endpoints.go | 21 +++++++++++++++------ discovery/kubernetes/endpointslice.go | 21 +++++++++++++++------ discovery/kubernetes/ingress.go | 19 +++++++++++++++---- discovery/kubernetes/node.go | 19 +++++++++++++++---- discovery/kubernetes/pod.go | 10 +++++++--- discovery/kubernetes/service.go | 19 +++++++++++++++---- 6 files changed, 82 insertions(+), 27 deletions(-) diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 801a45f7c..f97f1ee56 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -53,6 +53,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca if l == nil { l = log.NewNopLogger() } + + epAddCount := eventCount.WithLabelValues("endpoints", "add") + epUpdateCount := eventCount.WithLabelValues("endpoints", "update") + epDeleteCount := eventCount.WithLabelValues("endpoints", "delete") + + svcAddCount := eventCount.WithLabelValues("service", "add") + svcUpdateCount := eventCount.WithLabelValues("service", "update") + svcDeleteCount := eventCount.WithLabelValues("service", "delete") + e := &Endpoints{ logger: l, endpointsInf: eps, @@ -68,15 +77,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca _, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("endpoints", "add").Inc() + epAddCount.Inc() e.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("endpoints", "update").Inc() + epUpdateCount.Inc() e.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("endpoints", "delete").Inc() + epDeleteCount.Inc() e.enqueue(o) }, }) @@ -107,15 +116,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca // TODO(fabxc): potentially remove add and delete event handlers. Those should // be triggered via the endpoint handlers already. AddFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "add").Inc() + svcAddCount.Inc() serviceUpdate(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("service", "update").Inc() + svcUpdateCount.Inc() serviceUpdate(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "delete").Inc() + svcDeleteCount.Inc() serviceUpdate(o) }, }) diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index e2ac1de42..a7e0ea98c 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -56,6 +56,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod if l == nil { l = log.NewNopLogger() } + + epslAddCount := eventCount.WithLabelValues("endpointslice", "add") + epslUpdateCount := eventCount.WithLabelValues("endpointslice", "update") + epslDeleteCount := eventCount.WithLabelValues("endpointslice", "delete") + + svcAddCount := eventCount.WithLabelValues("service", "add") + svcUpdateCount := eventCount.WithLabelValues("service", "update") + svcDeleteCount := eventCount.WithLabelValues("service", "delete") + e := &EndpointSlice{ logger: l, endpointSliceInf: eps, @@ -71,15 +80,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod _, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("endpointslice", "add").Inc() + epslAddCount.Inc() e.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("endpointslice", "update").Inc() + epslUpdateCount.Inc() e.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("endpointslice", "delete").Inc() + epslDeleteCount.Inc() e.enqueue(o) }, }) @@ -110,15 +119,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod } _, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "add").Inc() + svcAddCount.Inc() serviceUpdate(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("service", "update").Inc() + svcUpdateCount.Inc() serviceUpdate(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "delete").Inc() + svcDeleteCount.Inc() serviceUpdate(o) }, }) diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 91b1fb0ee..72712245a 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -41,18 +41,29 @@ type Ingress struct { // NewIngress returns a new ingress discovery. func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { - s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")} + + ingressAddCount := eventCount.WithLabelValues("ingress", "add") + ingressUpdateCount := eventCount.WithLabelValues("ingress", "update") + ingressDeleteCount := eventCount.WithLabelValues("ingress", "delete") + + s := &Ingress{ + logger: l, + informer: inf, + store: inf.GetStore(), + queue: workqueue.NewNamed("ingress"), + } + _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("ingress", "add").Inc() + ingressAddCount.Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("ingress", "delete").Inc() + ingressDeleteCount.Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("ingress", "update").Inc() + ingressUpdateCount.Inc() s.enqueue(o) }, }) diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index f1e37e6fa..f9a318336 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -49,18 +49,29 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun if l == nil { l = log.NewNopLogger() } - n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")} + + nodeAddCount := eventCount.WithLabelValues("node", "add") + nodeUpdateCount := eventCount.WithLabelValues("node", "update") + nodeDeleteCount := eventCount.WithLabelValues("node", "delete") + + n := &Node{ + logger: l, + informer: inf, + store: inf.GetStore(), + queue: workqueue.NewNamed("node"), + } + _, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("node", "add").Inc() + nodeAddCount.Inc() n.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("node", "delete").Inc() + nodeDeleteCount.Inc() n.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("node", "update").Inc() + nodeUpdateCount.Inc() n.enqueue(o) }, }) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index cc809b29c..31936f9c0 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -51,6 +51,10 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo l = log.NewNopLogger() } + podAddCount := eventCount.WithLabelValues("pod", "add") + podDeleteCount := eventCount.WithLabelValues("pod", "delete") + podUpdateCount := eventCount.WithLabelValues("pod", "update") + p := &Pod{ podInf: pods, nodeInf: nodes, @@ -61,15 +65,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo } _, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("pod", "add").Inc() + podAddCount.Inc() p.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("pod", "delete").Inc() + podDeleteCount.Inc() p.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("pod", "update").Inc() + podUpdateCount.Inc() p.enqueue(o) }, }) diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index a680ebee8..02158b124 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -44,18 +44,29 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C if l == nil { l = log.NewNopLogger() } - s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")} + + svcAddCount := eventCount.WithLabelValues("service", "add") + svcUpdateCount := eventCount.WithLabelValues("service", "update") + svcDeleteCount := eventCount.WithLabelValues("service", "delete") + + s := &Service{ + logger: l, + informer: inf, + store: inf.GetStore(), + queue: workqueue.NewNamed("service"), + } + _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "add").Inc() + svcAddCount.Inc() s.enqueue(o) }, DeleteFunc: func(o interface{}) { - eventCount.WithLabelValues("service", "delete").Inc() + svcDeleteCount.Inc() s.enqueue(o) }, UpdateFunc: func(_, o interface{}) { - eventCount.WithLabelValues("service", "update").Inc() + svcUpdateCount.Inc() s.enqueue(o) }, }) From 27bb57a37bd92523f932f17b202c98985888e21f Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Mon, 11 Dec 2023 13:17:47 +0000 Subject: [PATCH 26/52] Define metric label values in one place Signed-off-by: Paulin Todev --- discovery/kubernetes/endpoints.go | 14 +++++++------- discovery/kubernetes/endpointslice.go | 14 +++++++------- discovery/kubernetes/ingress.go | 8 ++++---- discovery/kubernetes/kubernetes.go | 24 ++++++++++++++++++++++-- discovery/kubernetes/node.go | 8 ++++---- discovery/kubernetes/pod.go | 8 ++++---- discovery/kubernetes/service.go | 8 ++++---- 7 files changed, 52 insertions(+), 32 deletions(-) diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index f97f1ee56..512d77552 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -54,13 +54,13 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca l = log.NewNopLogger() } - epAddCount := eventCount.WithLabelValues("endpoints", "add") - epUpdateCount := eventCount.WithLabelValues("endpoints", "update") - epDeleteCount := eventCount.WithLabelValues("endpoints", "delete") + epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd) + epUpdateCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleUpdate) + epDeleteCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleDelete) - svcAddCount := eventCount.WithLabelValues("service", "add") - svcUpdateCount := eventCount.WithLabelValues("service", "update") - svcDeleteCount := eventCount.WithLabelValues("service", "delete") + svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) + svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate) + svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete) e := &Endpoints{ logger: l, @@ -72,7 +72,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca podStore: pod.GetStore(), nodeInf: node, withNodeMetadata: node != nil, - queue: workqueue.NewNamed("endpoints"), + queue: workqueue.NewNamed(RoleEndpoint.String()), } _, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index a7e0ea98c..21095df4a 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -57,13 +57,13 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod l = log.NewNopLogger() } - epslAddCount := eventCount.WithLabelValues("endpointslice", "add") - epslUpdateCount := eventCount.WithLabelValues("endpointslice", "update") - epslDeleteCount := eventCount.WithLabelValues("endpointslice", "delete") + epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd) + epslUpdateCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleUpdate) + epslDeleteCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleDelete) - svcAddCount := eventCount.WithLabelValues("service", "add") - svcUpdateCount := eventCount.WithLabelValues("service", "update") - svcDeleteCount := eventCount.WithLabelValues("service", "delete") + svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) + svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate) + svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete) e := &EndpointSlice{ logger: l, @@ -75,7 +75,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod podStore: pod.GetStore(), nodeInf: node, withNodeMetadata: node != nil, - queue: workqueue.NewNamed("endpointSlice"), + queue: workqueue.NewNamed(RoleEndpointSlice.String()), } _, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 72712245a..80f508a64 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -42,15 +42,15 @@ type Ingress struct { // NewIngress returns a new ingress discovery. func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { - ingressAddCount := eventCount.WithLabelValues("ingress", "add") - ingressUpdateCount := eventCount.WithLabelValues("ingress", "update") - ingressDeleteCount := eventCount.WithLabelValues("ingress", "delete") + ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd) + ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate) + ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete) s := &Ingress{ logger: l, informer: inf, store: inf.GetStore(), - queue: workqueue.NewNamed("ingress"), + queue: workqueue.NewNamed(RoleIngress.String()), } _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index 4deaf3f68..1e59c0db6 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -102,6 +102,16 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error { } } +func (c Role) String() string { + return string(c) +} + +const ( + MetricLabelRoleAdd = "add" + MetricLabelRoleDelete = "delete" + MetricLabelRoleUpdate = "update" +) + // SDConfig is the configuration for Kubernetes service discovery. type SDConfig struct { APIServer config.URL `yaml:"api_server,omitempty"` @@ -351,8 +361,18 @@ func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, e d.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{d.eventCount}) // Initialize metric vectors. - for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} { - for _, evt := range []string{"add", "delete", "update"} { + for _, role := range []string{ + RoleEndpointSlice.String(), + RoleEndpoint.String(), + RoleNode.String(), + RolePod.String(), + RoleService.String(), + RoleIngress.String()} { + for _, evt := range []string{ + MetricLabelRoleAdd, + MetricLabelRoleDelete, + MetricLabelRoleUpdate, + } { d.eventCount.WithLabelValues(role, evt) } } diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index f9a318336..74d87e22c 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -50,15 +50,15 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun l = log.NewNopLogger() } - nodeAddCount := eventCount.WithLabelValues("node", "add") - nodeUpdateCount := eventCount.WithLabelValues("node", "update") - nodeDeleteCount := eventCount.WithLabelValues("node", "delete") + nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd) + nodeUpdateCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleUpdate) + nodeDeleteCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleDelete) n := &Node{ logger: l, informer: inf, store: inf.GetStore(), - queue: workqueue.NewNamed("node"), + queue: workqueue.NewNamed(RoleNode.String()), } _, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 31936f9c0..615717c13 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -51,9 +51,9 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo l = log.NewNopLogger() } - podAddCount := eventCount.WithLabelValues("pod", "add") - podDeleteCount := eventCount.WithLabelValues("pod", "delete") - podUpdateCount := eventCount.WithLabelValues("pod", "update") + podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd) + podDeleteCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleDelete) + podUpdateCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleUpdate) p := &Pod{ podInf: pods, @@ -61,7 +61,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo withNodeMetadata: nodes != nil, store: pods.GetStore(), logger: l, - queue: workqueue.NewNamed("pod"), + queue: workqueue.NewNamed(RolePod.String()), } _, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(o interface{}) { diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 02158b124..51204a5a1 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -45,15 +45,15 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C l = log.NewNopLogger() } - svcAddCount := eventCount.WithLabelValues("service", "add") - svcUpdateCount := eventCount.WithLabelValues("service", "update") - svcDeleteCount := eventCount.WithLabelValues("service", "delete") + svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) + svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate) + svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete) s := &Service{ logger: l, informer: inf, store: inf.GetStore(), - queue: workqueue.NewNamed("service"), + queue: workqueue.NewNamed(RoleService.String()), } _, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ From d2e997030ee2cbb4ee9a5c32d77ae2bdf644ec77 Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Mon, 11 Dec 2023 14:05:15 +0000 Subject: [PATCH 27/52] Fix linter issues Signed-off-by: Paulin Todev --- discovery/kubernetes/ingress.go | 1 - discovery/kubernetes/kubernetes.go | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 80f508a64..7b6366b25 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -41,7 +41,6 @@ type Ingress struct { // NewIngress returns a new ingress discovery. func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { - ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd) ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate) ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete) diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index 1e59c0db6..5c5f3dfb6 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -367,7 +367,8 @@ func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, e RoleNode.String(), RolePod.String(), RoleService.String(), - RoleIngress.String()} { + RoleIngress.String(), + } { for _, evt := range []string{ MetricLabelRoleAdd, MetricLabelRoleDelete, From d12ccf9fa29dbc2d0f40013f04dbe7adb8a1113e Mon Sep 17 00:00:00 2001 From: Marcin Skalski Date: Mon, 11 Dec 2023 16:33:42 +0100 Subject: [PATCH 28/52] kuma_sd: Extend Kuma SD configuration to allow users to specify ClientId Signed-off-by: Marcin Skalski --- config/config_test.go | 1 + config/testdata/conf.good.yml | 1 + config/testdata/roundtrip.good.yml | 1 + discovery/xds/kuma.go | 18 ++++++++++++++---- discovery/xds/kuma_test.go | 2 +- discovery/xds/xds.go | 1 + discovery/xds/xds_test.go | 1 + docs/configuration/configuration.md | 5 +++++ 8 files changed, 25 insertions(+), 5 deletions(-) diff --git a/config/config_test.go b/config/config_test.go index 5d753a0f7..7c061fd54 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -568,6 +568,7 @@ var expectedConf = &Config{ ServiceDiscoveryConfigs: discovery.Configs{ &xds.KumaSDConfig{ Server: "http://kuma-control-plane.kuma-system.svc:5676", + ClientId: "main-prometheus", HTTPClientConfig: config.DefaultHTTPClientConfig, RefreshInterval: model.Duration(15 * time.Second), FetchTimeout: model.Duration(2 * time.Minute), diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index e034eff43..972099800 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -221,6 +221,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 + clientId: main-prometheus - job_name: service-marathon marathon_sd_configs: diff --git a/config/testdata/roundtrip.good.yml b/config/testdata/roundtrip.good.yml index f2634d257..26589ad1b 100644 --- a/config/testdata/roundtrip.good.yml +++ b/config/testdata/roundtrip.good.yml @@ -108,6 +108,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 + clientId: main-prometheus marathon_sd_configs: - servers: diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index bc88ba554..1cec053e4 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -178,10 +178,11 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Discoverer, error) { // Default to "prometheus" if hostname is unavailable. - clientID, err := osutil.GetFQDN() - if err != nil { - level.Debug(logger).Log("msg", "error getting FQDN", "err", err) - clientID = "prometheus" + var clientID string + if conf.ClientId == "" { + clientID = defaultClientId(logger) + } else { + clientID = conf.ClientId } clientConfig := &HTTPResourceClientConfig{ @@ -215,3 +216,12 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Disc return d, nil } + +func defaultClientId(logger log.Logger) string { + clientID, err := osutil.GetFQDN() + if err != nil { + level.Debug(logger).Log("msg", "error getting FQDN", "err", err) + clientID = "prometheus" + } + return clientID +} diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index 581be9fb1..7f2b0ce3b 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -204,7 +204,7 @@ func TestNewKumaHTTPDiscovery(t *testing.T) { require.True(t, ok) require.Equal(t, kumaConf.Server, resClient.Server()) require.Equal(t, KumaMadsV1ResourceTypeURL, resClient.ResourceTypeURL()) - require.NotEmpty(t, resClient.ID()) + require.Equal(t, kumaConf.ClientId, resClient.ID()) require.Equal(t, KumaMadsV1ResourceType, resClient.config.ResourceType) } diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 48bdbab02..47baece78 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -55,6 +55,7 @@ type SDConfig struct { RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` FetchTimeout model.Duration `yaml:"fetch_timeout,omitempty"` Server string `yaml:"server,omitempty"` + ClientId string `yaml:"clientId,omitempty"` } // mustRegisterMessage registers the provided message type in the typeRegistry, and panics diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go index 974a47342..2e0f24e19 100644 --- a/discovery/xds/xds_test.go +++ b/discovery/xds/xds_test.go @@ -36,6 +36,7 @@ var ( sdConf = SDConfig{ Server: "http://127.0.0.1", RefreshInterval: model.Duration(10 * time.Second), + ClientId: "test-id", } testFetchFailuresCount = prometheus.NewCounter( diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index f05925d2b..d3ee459be 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2230,6 +2230,11 @@ See below for the configuration options for Kuma MonitoringAssignment discovery: # Address of the Kuma Control Plane's MADS xDS server. server: +# Client id is used by Kuma Control Plane to compute Monitoring Assignment for specific Prometheus backend. +# This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh +# When not specified, system hostname/fqdn will be used if available, if not `prometheus` will be used. +clientId: + # The time to wait between polling update requests. [ refresh_interval: | default = 30s ] From 0af810aa718a9cb9a9466785715103ad6117e959 Mon Sep 17 00:00:00 2001 From: Marcin Skalski Date: Mon, 11 Dec 2023 17:01:52 +0100 Subject: [PATCH 29/52] fix go lint Signed-off-by: Marcin Skalski --- config/testdata/conf.good.yml | 2 +- config/testdata/roundtrip.good.yml | 2 +- discovery/xds/kuma.go | 8 ++++---- discovery/xds/kuma_test.go | 2 +- discovery/xds/xds.go | 2 +- discovery/xds/xds_test.go | 2 +- docs/configuration/configuration.md | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 972099800..b58430164 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -221,7 +221,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 - clientId: main-prometheus + client_id: main-prometheus - job_name: service-marathon marathon_sd_configs: diff --git a/config/testdata/roundtrip.good.yml b/config/testdata/roundtrip.good.yml index 26589ad1b..24ab7d259 100644 --- a/config/testdata/roundtrip.good.yml +++ b/config/testdata/roundtrip.good.yml @@ -108,7 +108,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 - clientId: main-prometheus + client_id: main-prometheus marathon_sd_configs: - servers: diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index 1cec053e4..e79195bb2 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -179,10 +179,10 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Discoverer, error) { // Default to "prometheus" if hostname is unavailable. var clientID string - if conf.ClientId == "" { - clientID = defaultClientId(logger) + if conf.ClientID == "" { + clientID = defaultClientID(logger) } else { - clientID = conf.ClientId + clientID = conf.ClientID } clientConfig := &HTTPResourceClientConfig{ @@ -217,7 +217,7 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Disc return d, nil } -func defaultClientId(logger log.Logger) string { +func defaultClientID(logger log.Logger) string { clientID, err := osutil.GetFQDN() if err != nil { level.Debug(logger).Log("msg", "error getting FQDN", "err", err) diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index 7f2b0ce3b..6b4bb8784 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -204,7 +204,7 @@ func TestNewKumaHTTPDiscovery(t *testing.T) { require.True(t, ok) require.Equal(t, kumaConf.Server, resClient.Server()) require.Equal(t, KumaMadsV1ResourceTypeURL, resClient.ResourceTypeURL()) - require.Equal(t, kumaConf.ClientId, resClient.ID()) + require.Equal(t, kumaConf.ClientID, resClient.ID()) require.Equal(t, KumaMadsV1ResourceType, resClient.config.ResourceType) } diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 47baece78..16aa3f148 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -55,7 +55,7 @@ type SDConfig struct { RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` FetchTimeout model.Duration `yaml:"fetch_timeout,omitempty"` Server string `yaml:"server,omitempty"` - ClientId string `yaml:"clientId,omitempty"` + ClientID string `yaml:"client_id,omitempty"` } // mustRegisterMessage registers the provided message type in the typeRegistry, and panics diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go index 2e0f24e19..f57fff996 100644 --- a/discovery/xds/xds_test.go +++ b/discovery/xds/xds_test.go @@ -36,7 +36,7 @@ var ( sdConf = SDConfig{ Server: "http://127.0.0.1", RefreshInterval: model.Duration(10 * time.Second), - ClientId: "test-id", + ClientID: "test-id", } testFetchFailuresCount = prometheus.NewCounter( diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index d3ee459be..41f54dbd2 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2233,7 +2233,7 @@ server: # Client id is used by Kuma Control Plane to compute Monitoring Assignment for specific Prometheus backend. # This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh # When not specified, system hostname/fqdn will be used if available, if not `prometheus` will be used. -clientId: +client_id: # The time to wait between polling update requests. [ refresh_interval: | default = 30s ] From 48934aaef3c8e18b91d232a436c00c802c99a77a Mon Sep 17 00:00:00 2001 From: Marcin Skalski Date: Mon, 11 Dec 2023 17:04:56 +0100 Subject: [PATCH 30/52] fix go lint Signed-off-by: Marcin Skalski --- config/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config_test.go b/config/config_test.go index 7c061fd54..e614a4463 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -568,7 +568,7 @@ var expectedConf = &Config{ ServiceDiscoveryConfigs: discovery.Configs{ &xds.KumaSDConfig{ Server: "http://kuma-control-plane.kuma-system.svc:5676", - ClientId: "main-prometheus", + ClientID: "main-prometheus", HTTPClientConfig: config.DefaultHTTPClientConfig, RefreshInterval: model.Duration(15 * time.Second), FetchTimeout: model.Duration(2 * time.Minute), From e27232614a37edbc40f7778c659806a23a3dd61b Mon Sep 17 00:00:00 2001 From: Marcin Skalski Date: Tue, 12 Dec 2023 08:32:46 +0100 Subject: [PATCH 31/52] code review Signed-off-by: Marcin Skalski --- discovery/xds/kuma.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index e79195bb2..567e5ab7c 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -178,11 +178,14 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Discoverer, error) { // Default to "prometheus" if hostname is unavailable. - var clientID string - if conf.ClientID == "" { - clientID = defaultClientID(logger) - } else { - clientID = conf.ClientID + clientID := conf.ClientID + if clientID == "" { + var err error + clientID, err = osutil.GetFQDN() + if err != nil { + level.Debug(logger).Log("msg", "error getting FQDN", "err", err) + clientID = "prometheus" + } } clientConfig := &HTTPResourceClientConfig{ @@ -216,12 +219,3 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Disc return d, nil } - -func defaultClientID(logger log.Logger) string { - clientID, err := osutil.GetFQDN() - if err != nil { - level.Debug(logger).Log("msg", "error getting FQDN", "err", err) - clientID = "prometheus" - } - return clientID -} From f36b56a62ce01e9852d73bd25b310985b486a17c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Tue, 12 Dec 2023 11:58:54 +0200 Subject: [PATCH 32/52] tsdb: remove unused option (#13282) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Digging around the TSDB code and I've found that this flag is unused so let's remove it. Signed-off-by: Giedrius Statkevičius --- cmd/prometheus/main.go | 1 - tsdb/db.go | 35 +++++++++++++---------------------- tsdb/db_test.go | 3 --- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 106f9d05c..7e8e23444 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -1597,7 +1597,6 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond), MaxBytes: int64(opts.MaxBytes), NoLockfile: opts.NoLockfile, - AllowOverlappingCompaction: true, WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType), HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize, SamplesPerChunk: opts.SamplesPerChunk, diff --git a/tsdb/db.go b/tsdb/db.go index 6d283fdd8..b2cc37a19 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -70,20 +70,19 @@ var ErrNotReady = errors.New("TSDB not ready") // millisecond precision timestamps. func DefaultOptions() *Options { return &Options{ - WALSegmentSize: wlog.DefaultSegmentSize, - MaxBlockChunkSegmentSize: chunks.DefaultChunkSegmentSize, - RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond), - MinBlockDuration: DefaultBlockDuration, - MaxBlockDuration: DefaultBlockDuration, - NoLockfile: false, - AllowOverlappingCompaction: true, - SamplesPerChunk: DefaultSamplesPerChunk, - WALCompression: wlog.CompressionNone, - StripeSize: DefaultStripeSize, - HeadChunksWriteBufferSize: chunks.DefaultWriteBufferSize, - IsolationDisabled: defaultIsolationDisabled, - HeadChunksWriteQueueSize: chunks.DefaultWriteQueueSize, - OutOfOrderCapMax: DefaultOutOfOrderCapMax, + WALSegmentSize: wlog.DefaultSegmentSize, + MaxBlockChunkSegmentSize: chunks.DefaultChunkSegmentSize, + RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond), + MinBlockDuration: DefaultBlockDuration, + MaxBlockDuration: DefaultBlockDuration, + NoLockfile: false, + SamplesPerChunk: DefaultSamplesPerChunk, + WALCompression: wlog.CompressionNone, + StripeSize: DefaultStripeSize, + HeadChunksWriteBufferSize: chunks.DefaultWriteBufferSize, + IsolationDisabled: defaultIsolationDisabled, + HeadChunksWriteQueueSize: chunks.DefaultWriteQueueSize, + OutOfOrderCapMax: DefaultOutOfOrderCapMax, } } @@ -115,14 +114,6 @@ type Options struct { // NoLockfile disables creation and consideration of a lock file. NoLockfile bool - // Compaction of overlapping blocks are allowed if AllowOverlappingCompaction is true. - // This is an optional flag for overlapping blocks. - // The reason why this flag exists is because there are various users of the TSDB - // that do not want vertical compaction happening on ingest time. Instead, - // they'd rather keep overlapping blocks and let another component do the overlapping compaction later. - // For Prometheus, this will always be true. - AllowOverlappingCompaction bool - // WALCompression configures the compression type to use on records in the WAL. WALCompression wlog.CompressionType diff --git a/tsdb/db_test.go b/tsdb/db_test.go index cb12f8630..3bc094a3d 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -4877,7 +4877,6 @@ func Test_Querier_OOOQuery(t *testing.T) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() - opts.AllowOverlappingCompaction = false series1 := labels.FromStrings("foo", "bar1") @@ -4962,7 +4961,6 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() - opts.AllowOverlappingCompaction = false series1 := labels.FromStrings("foo", "bar1") @@ -6646,7 +6644,6 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) { t.Helper() opts := DefaultOptions() - opts.AllowOverlappingCompaction = true // TODO(jesusvazquez): This replaced AllowOverlappingBlocks, make sure that works. db := openTestDB(t, opts, nil) t.Cleanup(func() { require.NoError(t, db.Close()) From 19709f75d05deb8f24c69788fa7b5a4939394734 Mon Sep 17 00:00:00 2001 From: Marcin Skalski Date: Tue, 12 Dec 2023 14:49:43 +0100 Subject: [PATCH 33/52] fix kuma_sd docs Signed-off-by: Marcin Skalski --- docs/configuration/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 41f54dbd2..5e2f31c1c 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2231,9 +2231,9 @@ See below for the configuration options for Kuma MonitoringAssignment discovery: server: # Client id is used by Kuma Control Plane to compute Monitoring Assignment for specific Prometheus backend. -# This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh +# This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh. # When not specified, system hostname/fqdn will be used if available, if not `prometheus` will be used. -client_id: +[ client_id: ] # The time to wait between polling update requests. [ refresh_interval: | default = 30s ] From bb8363dbb30fc3864de8e837db11713022713958 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Wed, 13 Dec 2023 08:30:02 +0100 Subject: [PATCH 34/52] Add comment on SampleRingIterator Signed-off-by: Filip Petkovski --- promql/engine.go | 7 +------ storage/buffer.go | 10 ++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 8b4987921..16b8ee500 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2114,12 +2114,7 @@ loop: if floats == nil { floats = getFPointSlice(16) } - if n := len(floats); n < cap(floats) { - floats = floats[:n+1] - floats[n].T, floats[n].F = t, f - } else { - floats = append(floats, FPoint{T: t, F: f}) - } + floats = append(floats, FPoint{T: t, F: f}) } } } diff --git a/storage/buffer.go b/storage/buffer.go index 75a935f9d..234771a77 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -310,6 +310,8 @@ func (r *sampleRing) iterator() *SampleRingIterator { return &r.it } +// SampleRingIterator is returned by BufferedSeriesIterator.Buffer() and can be +// used to iterate samples buffered in the lookback window. type SampleRingIterator struct { r *sampleRing i int @@ -358,14 +360,6 @@ func (it *SampleRingIterator) Next() chunkenc.ValueType { } } -func (it *SampleRingIterator) Seek(int64) chunkenc.ValueType { - return chunkenc.ValNone -} - -func (it *SampleRingIterator) Err() error { - return nil -} - func (it *SampleRingIterator) At() (int64, float64) { return it.t, it.f } From ea356c472e9926d707f873ff1bef2f8aa5bd19f8 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Wed, 13 Dec 2023 08:35:02 +0100 Subject: [PATCH 35/52] Add comment on SampleRingIterator methods Signed-off-by: Filip Petkovski --- storage/buffer.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/storage/buffer.go b/storage/buffer.go index 234771a77..d19f841d4 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -360,14 +360,20 @@ func (it *SampleRingIterator) Next() chunkenc.ValueType { } } +// At returns the current float element of the iterator. func (it *SampleRingIterator) At() (int64, float64) { return it.t, it.f } +// AtHistogram returns the current histogram element of the iterator. func (it *SampleRingIterator) AtHistogram() (int64, *histogram.Histogram) { return it.t, it.h } +// AtFloatHistogram returns the current histogram element of the iterator. If the +// current sample is an integer histogram, it will be converted to a float histogram. +// An optional histogram.FloatHistogram can be provided to avoid allocating a new +// object for the conversion. func (it *SampleRingIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { if it.fh == nil { return it.t, it.h.ToFloat(fh) From 103133124a55ad902b15bf02abe7eae5aecf4cc6 Mon Sep 17 00:00:00 2001 From: daniel-resdiary <109083091+daniel-resdiary@users.noreply.github.com> Date: Wed, 13 Dec 2023 12:38:16 +0000 Subject: [PATCH 36/52] Get VM Scale Set NIC (#13283) Calling `*armnetwork.InterfacesClient.Get()` doesn't work for Scale Set VM NIC, because these use a different Resource ID format. Use `*armnetwork.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface()` instead. This needs both the scale set name and the instance ID, so add an `InstanceID` field to the `virtualMachine` struct. `InstanceID` is empty for a VM that isn't a ScaleSetVM. Signed-off-by: Daniel Nicholls --- discovery/azure/azure.go | 62 +++++++++++++++++++++++++++-------- discovery/azure/azure_test.go | 6 ++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 3b67a8102..4a85db232 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -95,7 +95,7 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) { name = strings.ToUpper(name) env, ok := environments[name] if !ok { - return env, fmt.Errorf("There is no cloud configuration matching the name %q", name) + return env, fmt.Errorf("there is no cloud configuration matching the name %q", name) } return env, nil @@ -308,6 +308,7 @@ type virtualMachine struct { Location string OsType string ScaleSet string + InstanceID string Tags map[string]*string NetworkInterfaces []string Size string @@ -408,17 +409,31 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { networkInterface = v d.cacheHitCount.Add(1) } else { - networkInterface, err = client.getNetworkInterfaceByID(ctx, nicID) - if err != nil { - if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) - } else { - ch <- target{labelSet: nil, err: err} + if vm.ScaleSet == "" { + networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID) + if err != nil { + if errors.Is(err, errorNotFound) { + level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + } else { + ch <- target{labelSet: nil, err: err} + } + // Get out of this routine because we cannot continue without a network interface. + return } - // Get out of this routine because we cannot continue without a network interface. - return + d.addToCache(nicID, networkInterface) + } else { + networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID) + if err != nil { + if errors.Is(err, errorNotFound) { + level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + } else { + ch <- target{labelSet: nil, err: err} + } + // Get out of this routine because we cannot continue without a network interface. + return + } + d.addToCache(nicID, networkInterface) } - d.addToCache(nicID, networkInterface) } if networkInterface.Properties == nil { @@ -626,6 +641,7 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st Location: *(vm.Location), OsType: osType, ScaleSet: scaleSetName, + InstanceID: *(vm.InstanceID), Tags: tags, NetworkInterfaces: networkInterfaces, Size: size, @@ -634,9 +650,9 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st var errorNotFound = errors.New("network interface does not exist") -// getNetworkInterfaceByID gets the network interface. +// getVMNetworkInterfaceByID gets the network interface. // If a 404 is returned from the Azure API, `errorNotFound` is returned. -func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { +func (client *azureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { r, err := newAzureResourceFromID(networkInterfaceID, client.logger) if err != nil { return nil, fmt.Errorf("could not parse network interface ID: %w", err) @@ -648,7 +664,27 @@ func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkI if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound { return nil, errorNotFound } - return nil, fmt.Errorf("Failed to retrieve Interface %v with error: %w", networkInterfaceID, err) + return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err) + } + + return &resp.Interface, nil +} + +// getVMScaleSetVMNetworkInterfaceByID gets the network interface. +// If a 404 is returned from the Azure API, `errorNotFound` is returned. +func (client *azureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) { + r, err := newAzureResourceFromID(networkInterfaceID, client.logger) + if err != nil { + return nil, fmt.Errorf("could not parse network interface ID: %w", err) + } + + resp, err := client.nic.GetVirtualMachineScaleSetNetworkInterface(ctx, r.ResourceGroupName, scaleSetName, instanceID, r.Name, &armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")}) + if err != nil { + var responseError *azcore.ResponseError + if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound { + return nil, errorNotFound + } + return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err) } return &resp.Interface, nil diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 48f5b076c..024cf7591 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -142,6 +142,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) { vmSize := armcompute.VirtualMachineSizeTypes(size) osType := armcompute.OperatingSystemTypesLinux vmType := "type" + instanceID := "123" location := "westeurope" computerName := "computer_name" networkProfile := armcompute.NetworkProfile{ @@ -166,6 +167,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) { ID: &id, Name: &name, Type: &vmType, + InstanceID: &instanceID, Location: &location, Tags: nil, Properties: properties, @@ -182,6 +184,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) { Tags: map[string]*string{}, NetworkInterfaces: []string{}, ScaleSet: scaleSet, + InstanceID: instanceID, Size: size, } @@ -197,6 +200,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) { vmSize := armcompute.VirtualMachineSizeTypes(size) osType := armcompute.OperatingSystemTypesLinux vmType := "type" + instanceID := "123" location := "westeurope" computerName := "computer_name" tags := map[string]*string{ @@ -224,6 +228,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) { ID: &id, Name: &name, Type: &vmType, + InstanceID: &instanceID, Location: &location, Tags: tags, Properties: properties, @@ -240,6 +245,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) { Tags: tags, NetworkInterfaces: []string{}, ScaleSet: scaleSet, + InstanceID: instanceID, Size: size, } From 0704c7254876fefbdde85fecb8ad389106312af2 Mon Sep 17 00:00:00 2001 From: Daniel Nicholls Date: Wed, 13 Dec 2023 14:14:01 +0000 Subject: [PATCH 37/52] Dedup code handling getting network interface Signed-off-by: Daniel Nicholls --- discovery/azure/azure.go | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 4a85db232..2a0f8380b 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -411,29 +411,19 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { } else { if vm.ScaleSet == "" { networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID) - if err != nil { - if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) - } else { - ch <- target{labelSet: nil, err: err} - } - // Get out of this routine because we cannot continue without a network interface. - return - } - d.addToCache(nicID, networkInterface) } else { networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID) - if err != nil { - if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) - } else { - ch <- target{labelSet: nil, err: err} - } - // Get out of this routine because we cannot continue without a network interface. - return - } - d.addToCache(nicID, networkInterface) } + if err != nil { + if errors.Is(err, errorNotFound) { + level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + } else { + ch <- target{labelSet: nil, err: err} + } + // Get out of this routine because we cannot continue without a network interface. + return + } + d.addToCache(nicID, networkInterface) } if networkInterface.Properties == nil { From 69abd6d9f65a6358025461510237366f1d939c70 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Wed, 13 Dec 2023 12:12:50 -0300 Subject: [PATCH 38/52] Document feature flag 'created-timestamp-zero-ingestion' (#13279) Signed-off-by: Arthur Silva Sens --- docs/feature_flags.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 0e585f13c..bcf8309b5 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -194,3 +194,13 @@ won't work when you push OTLP metrics. Enables PromQL functions that are considered experimental and whose name or semantics could change. + +## Created Timestamps Zero Injection + +`--enable-feature=created-timestamp-zero-ingestion` + +Enables ingestion of created timestamp. Created timestamps are injected as 0 valued samples when appropriate. See [PromCon talk](https://youtu.be/nWf0BfQ5EEA) for details. + +Currently Prometheus supports created timestamps only on the traditional Prometheus Protobuf protocol (WIP for other protocols). As a result, when enabling this feature, the Prometheus protobuf scrape protocol will be prioritized (See `scrape_config.scrape_protocols` settings for more details). + +Besides enabling this feature in Prometheus, created timestamps need to be exposed by the application being scraped. From 1a8381a5011e17328d059ab80d3963cac3e507b4 Mon Sep 17 00:00:00 2001 From: Diogo Teles Sant'Anna Date: Wed, 13 Dec 2023 16:30:06 -0300 Subject: [PATCH 39/52] Add minimal permissions to script golangci-lint.yml Signed-off-by: Diogo Teles Sant'Anna --- scripts/golangci-lint.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index 805c59fb7..4b292229d 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -12,8 +12,14 @@ on: - ".golangci.yml" pull_request: +permissions: # added using https://github.com/step-security/secure-repo + contents: read + jobs: golangci: + permissions: + contents: read # for actions/checkout to fetch code + pull-requests: read # for golangci/golangci-lint-action to fetch pull requests name: lint runs-on: ubuntu-latest steps: From 1f69dcfa6bfb5c53dacbe33b8aca45a6b7540cc8 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Thu, 14 Dec 2023 11:28:15 +0100 Subject: [PATCH 40/52] Fix reusing float histograms In https://github.com/prometheus/prometheus/pull/13276 we started reusing float histogram objects to reduce allocations in PromQL. That PR introduces a bug where histogram pointers gets copied to the beginning of the histograms slice, but are still kept in the end of the slice. When a new histogram is read into the last element, it can overwrite a previous element because the pointer is the same. This commit fixes the issue by moving outdated points to the end of the slice so that we don't end up with duplicate pointers in the same buffer. In other words, the slice gets rotated so that old objects can get reused. Signed-off-by: Filip Petkovski --- promql/engine.go | 5 +++ promql/engine_test.go | 91 +++++++++++++++++++++++++++++--------- tsdb/tsdbutil/histogram.go | 8 ++++ 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 16b8ee500..12755663d 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2052,7 +2052,12 @@ func (ev *evaluator) matrixIterSlice( var drop int for drop = 0; histograms[drop].T < mint; drop++ { } + // Rotate the buffer around the drop index so that points before mint can be + // reused to store new histograms. + tail := make([]HPoint, drop) + copy(tail, histograms[:drop]) copy(histograms, histograms[drop:]) + copy(histograms[len(histograms)-drop:], tail) histograms = histograms[:len(histograms)-drop] ev.currentSamples -= totalHPointSize(histograms) // Only append points with timestamps after the last timestamp we have. diff --git a/promql/engine_test.go b/promql/engine_test.go index 9ab54dd16..105cdc10d 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3169,28 +3169,75 @@ func TestNativeHistogramRate(t *testing.T) { } require.NoError(t, app.Commit()) - queryString := fmt.Sprintf("rate(%s[1m])", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.226666666666667, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) + queryString := fmt.Sprintf("rate(%s[45s])", seriesName) + t.Run("instant_query", func(t *testing.T) { + qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) + require.NoError(t, err) + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + vector, err := res.Vector() + require.NoError(t, err) + require.Len(t, vector, 1) + actualHistogram := vector[0].H + expectedHistogram := &histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 1, + ZeroThreshold: 0.001, + ZeroCount: 1. / 15., + Count: 9. / 15., + Sum: 1.2266666666666663, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + } + require.Equal(t, expectedHistogram, actualHistogram) + }) + + t.Run("range_query", func(t *testing.T) { + step := 30 * time.Second + start := timestamp.Time(int64(5 * time.Minute / time.Millisecond)) + end := start.Add(step) + qry, err := engine.NewRangeQuery(context.Background(), storage, nil, queryString, start, end, step) + require.NoError(t, err) + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + matrix, err := res.Matrix() + require.NoError(t, err) + require.Len(t, matrix, 1) + require.Len(t, matrix[0].Histograms, 2) + actualHistograms := matrix[0].Histograms + expectedHistograms := []HPoint{{ + T: 300000, + H: &histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 1, + ZeroThreshold: 0.001, + ZeroCount: 1. / 15., + Count: 9. / 15., + Sum: 1.2266666666666663, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + }, + }, { + T: 330000, + H: &histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 1, + ZeroThreshold: 0.001, + ZeroCount: 1. / 15., + Count: 9. / 15., + Sum: 1.2266666666666663, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, + NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, + }, + }} + require.Equal(t, expectedHistograms, actualHistograms) + }) } func TestNativeFloatHistogramRate(t *testing.T) { diff --git a/tsdb/tsdbutil/histogram.go b/tsdb/tsdbutil/histogram.go index 0847f81a8..bb8d49b20 100644 --- a/tsdb/tsdbutil/histogram.go +++ b/tsdb/tsdbutil/histogram.go @@ -30,6 +30,14 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) { return r } +func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram { + hs := GenerateTestHistograms(n) + for i := range hs { + hs[i].CounterResetHint = histogram.UnknownCounterReset + } + return hs +} + // GenerateTestHistogram but it is up to the user to set any known counter reset hint. func GenerateTestHistogram(i int) *histogram.Histogram { return &histogram.Histogram{ From 952cb41373c00a46c060ade00e8ecc85603ef758 Mon Sep 17 00:00:00 2001 From: Matthieu MOREL Date: Fri, 15 Dec 2023 10:21:18 +0000 Subject: [PATCH 41/52] build(deps): bump github.com/Azure/azure-sdk-for-go/sdk/resourcemanager Signed-off-by: Matthieu MOREL --- discovery/azure/azure.go | 4 ++-- discovery/azure/azure_test.go | 2 +- go.mod | 7 +++---- go.sum | 14 ++++++-------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 4a85db232..6637e9800 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -30,8 +30,8 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4" - "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" "github.com/go-kit/log" diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 024cf7591..4ff937e0b 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -17,7 +17,7 @@ import ( "testing" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" - "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" "github.com/stretchr/testify/require" "go.uber.org/goleak" ) diff --git a/go.mod b/go.mod index 770607e9c..9ea078083 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,9 @@ go 1.20 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.0 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0 - github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1 - github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.3.0 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 + github.com/Code-Hex/go-generics-cache v1.3.1 github.com/alecthomas/kingpin/v2 v2.4.0 github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 github.com/aws/aws-sdk-go v1.48.14 @@ -93,9 +94,7 @@ require ( cloud.google.com/go/compute v1.23.3 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 // indirect - github.com/Code-Hex/go-generics-cache v1.3.1 github.com/Microsoft/go-winio v0.6.1 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect diff --git a/go.sum b/go.sum index ae367bb90..a6f535dd7 100644 --- a/go.sum +++ b/go.sum @@ -40,14 +40,12 @@ github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0 h1:BMAjVKJM0U/CYF27gA0ZM github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0/go.mod h1:1fXstnBMas5kzG+S3q8UoJcmyU6nUeunJcMDHcRYHhs= github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0 h1:d81/ng9rET2YqdVkVwkb6EXeRrLJIwyGnJcAlAWKwhs= github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0/go.mod h1:s4kgfzA0covAXNicZHDMN58jExvcng2mC/DepXiF1EI= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1 h1:UPeCRD+XY7QlaGQte2EVI2iOcWvUYA2XY8w5T/8v0NQ= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4 v4.2.1/go.mod h1:oGV6NlB0cvi1ZbYRR2UN44QHxWFyGk+iylgD0qaMXjA= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2 h1:mLY+pNLjCUeKhgnAJWAKhEUQM+RJQo2H1fuGSw1Ky1E= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0 h1:QM6sE5k2ZT/vI5BEe0r7mqjsUSnhVBFbOsVkEuaEfiA= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0/go.mod h1:243D9iHbcQXoFUtgHJwL7gl2zx1aDuDMjvBZVGr2uW0= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1 h1:bWh0Z2rOEDfB/ywv/l0iHN1JgyazE6kW/aIA89+CEK0= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1/go.mod h1:Bzf34hhAE9NSxailk8xVeLEZbUjOXcC+GnU1mMKdhLw= -github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.0.0 h1:ECsQtyERDVz3NP3kvDOTLvbQhqWp/x9EsGKtb4ogUr8= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.3.0 h1:qgs/VAMSR+9qFhwTw4OwF2NbVuw+2m83pVZJjqkKQMw= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.3.0/go.mod h1:uYt4CfhkJA9o0FN7jfE5minm/i4nUE4MjGUJkzB6Zs8= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 h1:bXwSugBiSbgtz7rOtbfGf+woewp4f06orW9OP5BjHLA= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0/go.mod h1:Y/HgrePTmGy9HjdSGTqZNa+apUpTVIEVKXJyARP2lrk= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1 h1:7CBQ+Ei8SP2c6ydQTGCCrS35bDxgTMfoP2miAwK++OU= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 h1:WpB/QDNLpMw72xHJc34BNNykqSOeEJDAWkhf0u12/Jk= From 9ab7e3b3dec4b82870487154dce53baa3266a614 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 18 Dec 2023 14:52:42 +0000 Subject: [PATCH 42/52] relabel: refactor: extract config.Validate method And add a test for it, which fails because validation is not strong enough. Signed-off-by: Bryan Boreham --- model/relabel/relabel.go | 4 ++ model/relabel/relabel_test.go | 72 +++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index fadf35b86..fa0d809de 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -108,6 +108,10 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { if c.Regex.Regexp == nil { c.Regex = MustNewRegexp("") } + return c.Validate() +} + +func (c *Config) Validate() error { if c.Action == "" { return fmt.Errorf("relabel action cannot be empty") } diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index b50ff4010..fe040be3a 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -14,6 +14,7 @@ package relabel import ( + "fmt" "testing" "github.com/prometheus/common/model" @@ -575,6 +576,77 @@ func TestRelabel(t *testing.T) { } } +func TestRelabelValidate(t *testing.T) { + tests := []struct { + config Config + expected string + }{ + { + config: Config{}, + expected: `relabel action cannot be empty`, + }, + { + config: Config{ + Action: Replace, + }, + expected: `requires 'target_label' value`, + }, + { + config: Config{ + Action: Lowercase, + }, + expected: `requires 'target_label' value`, + }, + { + config: Config{ + Action: Lowercase, + Replacement: DefaultRelabelConfig.Replacement, + TargetLabel: "${3}", + }, + expected: `"${3}" is invalid 'target_label'`, + }, + { + config: Config{ + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("some-([^-]+)-([^,]+)"), + Action: Replace, + Replacement: "${1}", + TargetLabel: "${3}", + }, + }, + { + config: Config{ + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("some-([^-]+)-([^,]+)"), + Action: Replace, + Replacement: "${1}", + TargetLabel: "0${3}", + }, + expected: `"0${3}" is invalid 'target_label'`, + }, + { + config: Config{ + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("some-([^-]+)-([^,]+)"), + Action: Replace, + Replacement: "${1}", + TargetLabel: "-${3}", + }, + expected: `"-${3}" is invalid 'target_label' for replace action`, + }, + } + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + err := test.config.Validate() + if test.expected == "" { + require.NoError(t, err) + } else { + require.ErrorContains(t, err, test.expected) + } + }) + } +} + func TestTargetLabelValidity(t *testing.T) { tests := []struct { str string From 2d4c367d87cdd99eac1b4786cf1621c58d43735d Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 18 Dec 2023 14:58:56 +0000 Subject: [PATCH 43/52] relabel: stricter check that target labels are valid For `Lowercase`, `KeepEqual`, etc., we do not expand a regexp, so the target label name must not contain anything like `${1}`. Also for the common case that the `Replace` target does not require any template expansion, check that the entire string passes label name validity rules. Signed-off-by: Bryan Boreham --- model/relabel/relabel.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index fa0d809de..1947e6273 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -121,7 +121,13 @@ func (c *Config) Validate() error { if (c.Action == Replace || c.Action == HashMod || c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) } - if (c.Action == Replace || c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && !relabelTarget.MatchString(c.TargetLabel) { + if c.Action == Replace && !strings.Contains(c.TargetLabel, "$") && !model.LabelName(c.TargetLabel).IsValid() { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } + if c.Action == Replace && strings.Contains(c.TargetLabel, "$") && !relabelTarget.MatchString(c.TargetLabel) { + return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) + } + if (c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && !model.LabelName(c.TargetLabel).IsValid() { return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) } if (c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && c.Replacement != DefaultRelabelConfig.Replacement { From 000182e4b8638bfe781a583c27a1e8a366ac82bf Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 18 Dec 2023 15:03:21 +0000 Subject: [PATCH 44/52] relabel: check validity of all test cases Thought this would be a nice check on the `Validate()` function, but some of the test cases needed tweaking to pass. Signed-off-by: Bryan Boreham --- model/relabel/relabel_test.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index fe040be3a..2b8fc911d 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -335,7 +335,7 @@ func TestRelabel(t *testing.T) { }, { // invalid target_labels input: labels.FromMap(map[string]string{ - "a": "some-name-value", + "a": "some-name-0", }), relabel: []*Config{ { @@ -350,18 +350,18 @@ func TestRelabel(t *testing.T) { Regex: MustNewRegexp("some-([^-]+)-([^,]+)"), Action: Replace, Replacement: "${1}", - TargetLabel: "0${3}", + TargetLabel: "${3}", }, { SourceLabels: model.LabelNames{"a"}, - Regex: MustNewRegexp("some-([^-]+)-([^,]+)"), + Regex: MustNewRegexp("some-([^-]+)(-[^,]+)"), Action: Replace, Replacement: "${1}", - TargetLabel: "-${3}", + TargetLabel: "${3}", }, }, output: labels.FromMap(map[string]string{ - "a": "some-name-value", + "a": "some-name-0", }), }, { // more complex real-life like usecase @@ -566,6 +566,7 @@ func TestRelabel(t *testing.T) { if cfg.Replacement == "" { cfg.Replacement = DefaultRelabelConfig.Replacement } + require.NoError(t, cfg.Validate()) } res, keep := Process(test.input, test.relabel...) From 0289dd61571c7a812a235ecec5bc3f74fd7ccf50 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 18 Dec 2023 16:38:59 +0000 Subject: [PATCH 45/52] relabel: blank replacement deletes label post-regexp If `cfg.TargetLabel` is a template like `$1`, it won't match any labels, so no point calling `lb.Del` with it. Similarly if `target` is not a valid label name, it won't match any labels, so don't call with that either. The intention seems to have been that a blank _value_ would delete the target, so change that code to use `target` instead of `cfg.TargetLabel`. Signed-off-by: Bryan Boreham --- model/relabel/relabel.go | 3 +-- model/relabel/relabel_test.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index 1947e6273..d29c3d07a 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -274,12 +274,11 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { } target := model.LabelName(cfg.Regex.ExpandString([]byte{}, cfg.TargetLabel, val, indexes)) if !target.IsValid() { - lb.Del(cfg.TargetLabel) break } res := cfg.Regex.ExpandString([]byte{}, cfg.Replacement, val, indexes) if len(res) == 0 { - lb.Del(cfg.TargetLabel) + lb.Del(string(target)) break } lb.Set(string(target), string(res)) diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 2b8fc911d..517b9b822 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -214,6 +214,25 @@ func TestRelabel(t *testing.T) { "a": "boo", }), }, + { + // Blank replacement should delete the label. + input: labels.FromMap(map[string]string{ + "a": "foo", + "f": "baz", + }), + relabel: []*Config{ + { + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("(f).*"), + TargetLabel: "$1", + Replacement: "$2", + Action: Replace, + }, + }, + output: labels.FromMap(map[string]string{ + "a": "foo", + }), + }, { input: labels.FromMap(map[string]string{ "a": "foo", From 8065bef172e8d88e22399504b175a8c9115e9da3 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 22 Nov 2023 14:39:21 +0000 Subject: [PATCH 46/52] Move metric type definitions to common/model They are used in multiple repos, so common is a better place for them. Several packages now don't depend on `model/textparse`, e.g. `storage/remote`. Also remove `metadata` struct from `api.go`, since it was identical to a struct in the `metadata` package. Signed-off-by: Bryan Boreham --- go.mod | 2 +- go.sum | 4 +- model/metadata/metadata.go | 4 +- model/textparse/interface.go | 16 +--- model/textparse/openmetricsparse.go | 20 +++-- model/textparse/openmetricsparse_test.go | 25 +++--- model/textparse/promparse.go | 12 +-- model/textparse/promparse_test.go | 6 +- model/textparse/protobufparse.go | 14 +-- model/textparse/protobufparse_test.go | 69 +++++++-------- scrape/scrape.go | 6 +- scrape/scrape_test.go | 6 +- storage/remote/codec.go | 3 +- storage/remote/codec_test.go | 8 +- storage/remote/metadata_watcher_test.go | 7 +- storage/remote/queue_manager_test.go | 3 +- tsdb/head_append.go | 2 +- tsdb/head_wal.go | 2 +- tsdb/record/record.go | 37 ++++---- web/api/v1/api.go | 32 +++---- web/api/v1/api_test.go | 104 +++++++++++------------ 21 files changed, 185 insertions(+), 197 deletions(-) diff --git a/go.mod b/go.mod index 9ea078083..0ecfd4374 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require ( github.com/prometheus/alertmanager v0.26.0 github.com/prometheus/client_golang v1.17.0 github.com/prometheus/client_model v0.5.0 - github.com/prometheus/common v0.45.0 + github.com/prometheus/common v0.45.1-0.20231122191551-832cd6e99f99 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 github.com/prometheus/exporter-toolkit v0.10.0 diff --git a/go.sum b/go.sum index a6f535dd7..fb31c7a84 100644 --- a/go.sum +++ b/go.sum @@ -653,8 +653,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= -github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= +github.com/prometheus/common v0.45.1-0.20231122191551-832cd6e99f99 h1:V5ajRiLiCQGO+ggTr+07gMUcTqlIMMkDBfrJe5zKLmc= +github.com/prometheus/common v0.45.1-0.20231122191551-832cd6e99f99/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= diff --git a/model/metadata/metadata.go b/model/metadata/metadata.go index f227af0b9..f6f2827a4 100644 --- a/model/metadata/metadata.go +++ b/model/metadata/metadata.go @@ -13,11 +13,11 @@ package metadata -import "github.com/prometheus/prometheus/model/textparse" +import "github.com/prometheus/common/model" // Metadata stores a series' metadata information. type Metadata struct { - Type textparse.MetricType + Type model.MetricType Unit string Help string } diff --git a/model/textparse/interface.go b/model/textparse/interface.go index df4259c85..f6d93f063 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -16,6 +16,8 @@ package textparse import ( "mime" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -110,16 +112,4 @@ const ( EntryHistogram Entry = 5 // A series with a native histogram as a value. ) -// MetricType represents metric type values. -type MetricType string - -const ( - MetricTypeCounter = MetricType("counter") - MetricTypeGauge = MetricType("gauge") - MetricTypeHistogram = MetricType("histogram") - MetricTypeGaugeHistogram = MetricType("gaugehistogram") - MetricTypeSummary = MetricType("summary") - MetricTypeInfo = MetricType("info") - MetricTypeStateset = MetricType("stateset") - MetricTypeUnknown = MetricType("unknown") -) +type MetricType = model.MetricType diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index f0c383723..d6b01eb8e 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -24,6 +24,8 @@ import ( "strings" "unicode/utf8" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -74,7 +76,7 @@ type OpenMetricsParser struct { builder labels.ScratchBuilder series []byte text []byte - mtype MetricType + mtype model.MetricType val float64 ts int64 hasTS bool @@ -272,21 +274,21 @@ func (p *OpenMetricsParser) Next() (Entry, error) { case tType: switch s := yoloString(p.text); s { case "counter": - p.mtype = MetricTypeCounter + p.mtype = model.MetricTypeCounter case "gauge": - p.mtype = MetricTypeGauge + p.mtype = model.MetricTypeGauge case "histogram": - p.mtype = MetricTypeHistogram + p.mtype = model.MetricTypeHistogram case "gaugehistogram": - p.mtype = MetricTypeGaugeHistogram + p.mtype = model.MetricTypeGaugeHistogram case "summary": - p.mtype = MetricTypeSummary + p.mtype = model.MetricTypeSummary case "info": - p.mtype = MetricTypeInfo + p.mtype = model.MetricTypeInfo case "stateset": - p.mtype = MetricTypeStateset + p.mtype = model.MetricTypeStateset case "unknown": - p.mtype = MetricTypeUnknown + p.mtype = model.MetricTypeUnknown default: return EntryInvalid, fmt.Errorf("invalid metric type %q", s) } diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index eed30364c..2b1d909f3 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -18,6 +18,7 @@ import ( "io" "testing" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" @@ -77,7 +78,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` m string t *int64 v float64 - typ MetricType + typ model.MetricType help string unit string comment string @@ -88,7 +89,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` help: "A summary of the GC invocation durations.", }, { m: "go_gc_duration_seconds", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: "go_gc_duration_seconds", unit: "seconds", @@ -130,7 +131,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` help: "Number of goroutines that currently exist.", }, { m: "go_goroutines", - typ: MetricTypeGauge, + typ: model.MetricTypeGauge, }, { m: `go_goroutines`, v: 33, @@ -138,21 +139,21 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` lset: labels.FromStrings("__name__", "go_goroutines"), }, { m: "hh", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: `hh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "hh_bucket", "le", "+Inf"), }, { m: "gh", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { m: `gh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), }, { m: "hhh", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: `hhh_bucket{le="+Inf"}`, v: 1, @@ -165,7 +166,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, }, { m: "ggh", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { m: `ggh_bucket{le="+Inf"}`, v: 1, @@ -178,7 +179,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, }, { m: "smr_seconds", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: `smr_seconds_count`, v: 2, @@ -191,14 +192,14 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, }, { m: "ii", - typ: MetricTypeInfo, + typ: model.MetricTypeInfo, }, { m: `ii{foo="bar"}`, v: 1, lset: labels.FromStrings("__name__", "ii", "foo", "bar"), }, { m: "ss", - typ: MetricTypeStateset, + typ: model.MetricTypeStateset, }, { m: `ss{ss="foo"}`, v: 1, @@ -213,7 +214,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` lset: labels.FromStrings("A", "a", "__name__", "ss"), }, { m: "un", - typ: MetricTypeUnknown, + typ: model.MetricTypeUnknown, }, { m: "_metric_starting_with_underscore", v: 1, @@ -228,7 +229,7 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), }, { m: "foo", - typ: MetricTypeCounter, + typ: model.MetricTypeCounter, }, { m: "foo_total", v: 17, diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 935801fb9..cd028ef90 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -26,6 +26,8 @@ import ( "unicode/utf8" "unsafe" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -305,15 +307,15 @@ func (p *PromParser) Next() (Entry, error) { case tType: switch s := yoloString(p.text); s { case "counter": - p.mtype = MetricTypeCounter + p.mtype = model.MetricTypeCounter case "gauge": - p.mtype = MetricTypeGauge + p.mtype = model.MetricTypeGauge case "histogram": - p.mtype = MetricTypeHistogram + p.mtype = model.MetricTypeHistogram case "summary": - p.mtype = MetricTypeSummary + p.mtype = model.MetricTypeSummary case "untyped": - p.mtype = MetricTypeUnknown + p.mtype = model.MetricTypeUnknown default: return EntryInvalid, fmt.Errorf("invalid metric type %q", s) } diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index ac79a1394..d34b26ba5 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -65,7 +65,7 @@ testmetric{label="\"bar\""} 1` m string t *int64 v float64 - typ MetricType + typ model.MetricType help string comment string }{ @@ -74,7 +74,7 @@ testmetric{label="\"bar\""} 1` help: "A summary of the GC invocation durations.", }, { m: "go_gc_duration_seconds", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, @@ -142,7 +142,7 @@ testmetric{label="\"bar\""} 1` help: "Number of goroutines that currently exist.", }, { m: "go_goroutines", - typ: MetricTypeGauge, + typ: model.MetricTypeGauge, }, { m: `go_goroutines`, v: 33, diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index baede7e1d..534bbebb2 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -252,21 +252,21 @@ func (p *ProtobufParser) Help() ([]byte, []byte) { // Type returns the metric name and type in the current entry. // Must only be called after Next returned a type entry. // The returned byte slices become invalid after the next call to Next. -func (p *ProtobufParser) Type() ([]byte, MetricType) { +func (p *ProtobufParser) Type() ([]byte, model.MetricType) { n := p.metricBytes.Bytes() switch p.mf.GetType() { case dto.MetricType_COUNTER: - return n, MetricTypeCounter + return n, model.MetricTypeCounter case dto.MetricType_GAUGE: - return n, MetricTypeGauge + return n, model.MetricTypeGauge case dto.MetricType_HISTOGRAM: - return n, MetricTypeHistogram + return n, model.MetricTypeHistogram case dto.MetricType_GAUGE_HISTOGRAM: - return n, MetricTypeGaugeHistogram + return n, model.MetricTypeGaugeHistogram case dto.MetricType_SUMMARY: - return n, MetricTypeSummary + return n, model.MetricTypeSummary } - return n, MetricTypeUnknown + return n, model.MetricTypeUnknown } // Unit always returns (nil, nil) because units aren't supported by the protobuf diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index c5b672dbc..f994ff966 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/gogo/protobuf/proto" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" @@ -649,7 +650,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "go_build_info", - typ: MetricTypeGauge, + typ: model.MetricTypeGauge, }, { m: "go_build_info\xFFchecksum\xFF\xFFpath\xFFgithub.com/prometheus/client_golang\xFFversion\xFF(devel)", @@ -667,7 +668,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "go_memstats_alloc_bytes_total", - typ: MetricTypeCounter, + typ: model.MetricTypeCounter, }, { m: "go_memstats_alloc_bytes_total", @@ -685,7 +686,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "something_untyped", - typ: MetricTypeUnknown, + typ: model.MetricTypeUnknown, }, { m: "something_untyped", @@ -701,7 +702,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_histogram", @@ -736,7 +737,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_histogram", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { m: "test_gauge_histogram", @@ -772,7 +773,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_float_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_float_histogram", @@ -807,7 +808,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_float_histogram", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { m: "test_gauge_float_histogram", @@ -843,7 +844,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram2", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_histogram2_count", @@ -903,7 +904,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_family", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_histogram_family\xfffoo\xffbar", @@ -947,7 +948,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_float_histogram_with_zerothreshold_zero", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_float_histogram_with_zerothreshold_zero", @@ -971,7 +972,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "rpc_durations_seconds", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: "rpc_durations_seconds_count\xffservice\xffexponential", @@ -1022,7 +1023,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "without_quantiles", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: "without_quantiles_count", @@ -1044,7 +1045,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "empty_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "empty_histogram", @@ -1063,7 +1064,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_counter_with_createdtimestamp", - typ: MetricTypeCounter, + typ: model.MetricTypeCounter, }, { m: "test_counter_with_createdtimestamp", @@ -1079,7 +1080,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_summary_with_createdtimestamp", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { m: "test_summary_with_createdtimestamp_count", @@ -1103,7 +1104,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_createdtimestamp", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { m: "test_histogram_with_createdtimestamp", @@ -1123,7 +1124,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gaugehistogram_with_createdtimestamp", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { m: "test_gaugehistogram_with_createdtimestamp", @@ -1149,7 +1150,7 @@ func TestProtobufParse(t *testing.T) { }, { // 1 m: "go_build_info", - typ: MetricTypeGauge, + typ: model.MetricTypeGauge, }, { // 2 m: "go_build_info\xFFchecksum\xFF\xFFpath\xFFgithub.com/prometheus/client_golang\xFFversion\xFF(devel)", @@ -1167,7 +1168,7 @@ func TestProtobufParse(t *testing.T) { }, { // 4 m: "go_memstats_alloc_bytes_total", - typ: MetricTypeCounter, + typ: model.MetricTypeCounter, }, { // 5 m: "go_memstats_alloc_bytes_total", @@ -1185,7 +1186,7 @@ func TestProtobufParse(t *testing.T) { }, { // 7 m: "something_untyped", - typ: MetricTypeUnknown, + typ: model.MetricTypeUnknown, }, { // 8 m: "something_untyped", @@ -1201,7 +1202,7 @@ func TestProtobufParse(t *testing.T) { }, { // 10 m: "test_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 11 m: "test_histogram", @@ -1294,7 +1295,7 @@ func TestProtobufParse(t *testing.T) { }, { // 19 m: "test_gauge_histogram", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { // 20 m: "test_gauge_histogram", @@ -1388,7 +1389,7 @@ func TestProtobufParse(t *testing.T) { }, { // 28 m: "test_float_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 29 m: "test_float_histogram", @@ -1481,7 +1482,7 @@ func TestProtobufParse(t *testing.T) { }, { // 37 m: "test_gauge_float_histogram", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { // 38 m: "test_gauge_float_histogram", @@ -1575,7 +1576,7 @@ func TestProtobufParse(t *testing.T) { }, { // 46 m: "test_histogram2", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 47 m: "test_histogram2_count", @@ -1635,7 +1636,7 @@ func TestProtobufParse(t *testing.T) { }, { // 54 m: "test_histogram_family", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 55 m: "test_histogram_family\xfffoo\xffbar", @@ -1765,7 +1766,7 @@ func TestProtobufParse(t *testing.T) { }, { // 68 m: "test_float_histogram_with_zerothreshold_zero", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 69 m: "test_float_histogram_with_zerothreshold_zero", @@ -1789,7 +1790,7 @@ func TestProtobufParse(t *testing.T) { }, { // 71 m: "rpc_durations_seconds", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { // 72 m: "rpc_durations_seconds_count\xffservice\xffexponential", @@ -1840,7 +1841,7 @@ func TestProtobufParse(t *testing.T) { }, { // 78 m: "without_quantiles", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { // 79 m: "without_quantiles_count", @@ -1862,7 +1863,7 @@ func TestProtobufParse(t *testing.T) { }, { // 79 m: "empty_histogram", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 80 m: "empty_histogram", @@ -1881,7 +1882,7 @@ func TestProtobufParse(t *testing.T) { }, { // 82 m: "test_counter_with_createdtimestamp", - typ: MetricTypeCounter, + typ: model.MetricTypeCounter, }, { // 83 m: "test_counter_with_createdtimestamp", @@ -1897,7 +1898,7 @@ func TestProtobufParse(t *testing.T) { }, { // 85 m: "test_summary_with_createdtimestamp", - typ: MetricTypeSummary, + typ: model.MetricTypeSummary, }, { // 86 m: "test_summary_with_createdtimestamp_count", @@ -1921,7 +1922,7 @@ func TestProtobufParse(t *testing.T) { }, { // 89 m: "test_histogram_with_createdtimestamp", - typ: MetricTypeHistogram, + typ: model.MetricTypeHistogram, }, { // 90 m: "test_histogram_with_createdtimestamp", @@ -1941,7 +1942,7 @@ func TestProtobufParse(t *testing.T) { }, { // 92 m: "test_gaugehistogram_with_createdtimestamp", - typ: MetricTypeGaugeHistogram, + typ: model.MetricTypeGaugeHistogram, }, { // 93 m: "test_gaugehistogram_with_createdtimestamp", diff --git a/scrape/scrape.go b/scrape/scrape.go index be27a5d48..de74987cc 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -966,7 +966,7 @@ func (c *scrapeCache) setType(metric []byte, t textparse.MetricType) { e, ok := c.metadata[string(metric)] if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: textparse.MetricTypeUnknown}} + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} c.metadata[string(metric)] = e } if e.Type != t { @@ -983,7 +983,7 @@ func (c *scrapeCache) setHelp(metric, help []byte) { e, ok := c.metadata[string(metric)] if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: textparse.MetricTypeUnknown}} + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} c.metadata[string(metric)] = e } if e.Help != string(help) { @@ -1000,7 +1000,7 @@ func (c *scrapeCache) setUnit(metric, unit []byte) { e, ok := c.metadata[string(metric)] if !ok { - e = &metaEntry{Metadata: metadata.Metadata{Type: textparse.MetricTypeUnknown}} + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} c.metadata[string(metric)] = e } if e.Unit != string(unit) { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 90578f2e9..1a416eeb6 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -971,19 +971,19 @@ test_metric 1 md, ok := cache.GetMetadata("test_metric") require.True(t, ok, "expected metadata to be present") - require.Equal(t, textparse.MetricTypeCounter, md.Type, "unexpected metric type") + require.Equal(t, model.MetricTypeCounter, md.Type, "unexpected metric type") require.Equal(t, "some help text", md.Help) require.Equal(t, "metric", md.Unit) md, ok = cache.GetMetadata("test_metric_no_help") require.True(t, ok, "expected metadata to be present") - require.Equal(t, textparse.MetricTypeGauge, md.Type, "unexpected metric type") + require.Equal(t, model.MetricTypeGauge, md.Type, "unexpected metric type") require.Equal(t, "", md.Help) require.Equal(t, "", md.Unit) md, ok = cache.GetMetadata("test_metric_no_type") require.True(t, ok, "expected metadata to be present") - require.Equal(t, textparse.MetricTypeUnknown, md.Type, "unexpected metric type") + require.Equal(t, model.MetricTypeUnknown, md.Type, "unexpected metric type") require.Equal(t, "other help text", md.Help) require.Equal(t, "", md.Unit) } diff --git a/storage/remote/codec.go b/storage/remote/codec.go index 67035cd8e..ffab821a5 100644 --- a/storage/remote/codec.go +++ b/storage/remote/codec.go @@ -33,7 +33,6 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -784,7 +783,7 @@ func labelsToLabelsProto(lbls labels.Labels, buf []prompb.Label) []prompb.Label } // metricTypeToMetricTypeProto transforms a Prometheus metricType into prompb metricType. Since the former is a string we need to transform it to an enum. -func metricTypeToMetricTypeProto(t textparse.MetricType) prompb.MetricMetadata_MetricType { +func metricTypeToMetricTypeProto(t model.MetricType) prompb.MetricMetadata_MetricType { mt := strings.ToUpper(string(t)) v, ok := prompb.MetricMetadata_MetricType_value[mt] if !ok { diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index d2a7d45be..ac8b0f0b5 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -20,11 +20,11 @@ import ( "testing" "github.com/gogo/protobuf/proto" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -488,17 +488,17 @@ func TestMergeLabels(t *testing.T) { func TestMetricTypeToMetricTypeProto(t *testing.T) { tc := []struct { desc string - input textparse.MetricType + input model.MetricType expected prompb.MetricMetadata_MetricType }{ { desc: "with a single-word metric", - input: textparse.MetricTypeCounter, + input: model.MetricTypeCounter, expected: prompb.MetricMetadata_COUNTER, }, { desc: "with a two-word metric", - input: textparse.MetricTypeStateset, + input: model.MetricTypeStateset, expected: prompb.MetricMetadata_STATESET, }, { diff --git a/storage/remote/metadata_watcher_test.go b/storage/remote/metadata_watcher_test.go index cd664bc8b..0cd6027a8 100644 --- a/storage/remote/metadata_watcher_test.go +++ b/storage/remote/metadata_watcher_test.go @@ -22,7 +22,6 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/scrape" ) @@ -108,13 +107,13 @@ func TestWatchScrapeManager_ReadyForCollection(t *testing.T) { Metadata: []scrape.MetricMetadata{ { Metric: "prometheus_tsdb_head_chunks_created_total", - Type: textparse.MetricTypeCounter, + Type: model.MetricTypeCounter, Help: "Total number", Unit: "", }, { Metric: "prometheus_remote_storage_retried_samples_total", - Type: textparse.MetricTypeCounter, + Type: model.MetricTypeCounter, Help: "Total number", Unit: "", }, @@ -124,7 +123,7 @@ func TestWatchScrapeManager_ReadyForCollection(t *testing.T) { Metadata: []scrape.MetricMetadata{ { Metric: "prometheus_tsdb_head_chunks_created_total", - Type: textparse.MetricTypeCounter, + Type: model.MetricTypeCounter, Help: "Total number", Unit: "", }, diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index c878c750b..17a904fcd 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -38,7 +38,6 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/scrape" @@ -180,7 +179,7 @@ func TestMetadataDelivery(t *testing.T) { for i := 0; i < numMetadata; i++ { metadata = append(metadata, scrape.MetricMetadata{ Metric: "prometheus_remote_storage_sent_metadata_bytes_total_" + strconv.Itoa(i), - Type: textparse.MetricTypeCounter, + Type: model.MetricTypeCounter, Help: "a nice help text", Unit: "", }) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index f509317c8..f112ffa3a 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -1038,7 +1038,7 @@ func (a *headAppender) Commit() (err error) { for i, m := range a.metadata { series = a.metadataSeries[i] series.Lock() - series.meta = &metadata.Metadata{Type: record.ToTextparseMetricType(m.Type), Unit: m.Unit, Help: m.Help} + series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} series.Unlock() } diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index a492a85a0..1be65f134 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -388,7 +388,7 @@ Outer: continue } s.meta = &metadata.Metadata{ - Type: record.ToTextparseMetricType(m.Type), + Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help, } diff --git a/tsdb/record/record.go b/tsdb/record/record.go index 42a656dfe..3931ad05d 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -20,9 +20,10 @@ import ( "fmt" "math" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/encoding" @@ -90,45 +91,45 @@ const ( Stateset MetricType = 7 ) -func GetMetricType(t textparse.MetricType) uint8 { +func GetMetricType(t model.MetricType) uint8 { switch t { - case textparse.MetricTypeCounter: + case model.MetricTypeCounter: return uint8(Counter) - case textparse.MetricTypeGauge: + case model.MetricTypeGauge: return uint8(Gauge) - case textparse.MetricTypeHistogram: + case model.MetricTypeHistogram: return uint8(HistogramSample) - case textparse.MetricTypeGaugeHistogram: + case model.MetricTypeGaugeHistogram: return uint8(GaugeHistogram) - case textparse.MetricTypeSummary: + case model.MetricTypeSummary: return uint8(Summary) - case textparse.MetricTypeInfo: + case model.MetricTypeInfo: return uint8(Info) - case textparse.MetricTypeStateset: + case model.MetricTypeStateset: return uint8(Stateset) default: return uint8(UnknownMT) } } -func ToTextparseMetricType(m uint8) textparse.MetricType { +func ToMetricType(m uint8) model.MetricType { switch m { case uint8(Counter): - return textparse.MetricTypeCounter + return model.MetricTypeCounter case uint8(Gauge): - return textparse.MetricTypeGauge + return model.MetricTypeGauge case uint8(HistogramSample): - return textparse.MetricTypeHistogram + return model.MetricTypeHistogram case uint8(GaugeHistogram): - return textparse.MetricTypeGaugeHistogram + return model.MetricTypeGaugeHistogram case uint8(Summary): - return textparse.MetricTypeSummary + return model.MetricTypeSummary case uint8(Info): - return textparse.MetricTypeInfo + return model.MetricTypeInfo case uint8(Stateset): - return textparse.MetricTypeStateset + return model.MetricTypeStateset default: - return textparse.MetricTypeUnknown + return model.MetricTypeUnknown } } diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 8fa7ce14a..dd35d1fe9 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -41,7 +41,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/model/metadata" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -1141,11 +1141,11 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult { } type metricMetadata struct { - Target labels.Labels `json:"target"` - Metric string `json:"metric,omitempty"` - Type textparse.MetricType `json:"type"` - Help string `json:"help"` - Unit string `json:"unit"` + Target labels.Labels `json:"target"` + Metric string `json:"metric,omitempty"` + Type model.MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` } // AlertmanagerDiscovery has all the active Alertmanagers. @@ -1221,14 +1221,8 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert { return apiAlerts } -type metadata struct { - Type textparse.MetricType `json:"type"` - Help string `json:"help"` - Unit string `json:"unit"` -} - func (api *API) metricMetadata(r *http.Request) apiFuncResult { - metrics := map[string]map[metadata]struct{}{} + metrics := map[string]map[metadata.Metadata]struct{}{} limit := -1 if s := r.FormValue("limit"); s != "" { @@ -1250,7 +1244,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { for _, t := range tt { if metric == "" { for _, mm := range t.ListMetadata() { - m := metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit} + m := metadata.Metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit} ms, ok := metrics[mm.Metric] if limitPerMetric > 0 && len(ms) >= limitPerMetric { @@ -1258,7 +1252,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { } if !ok { - ms = map[metadata]struct{}{} + ms = map[metadata.Metadata]struct{}{} metrics[mm.Metric] = ms } ms[m] = struct{}{} @@ -1267,7 +1261,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { } if md, ok := t.GetMetadata(metric); ok { - m := metadata{Type: md.Type, Help: md.Help, Unit: md.Unit} + m := metadata.Metadata{Type: md.Type, Help: md.Help, Unit: md.Unit} ms, ok := metrics[md.Metric] if limitPerMetric > 0 && len(ms) >= limitPerMetric { @@ -1275,7 +1269,7 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { } if !ok { - ms = map[metadata]struct{}{} + ms = map[metadata.Metadata]struct{}{} metrics[md.Metric] = ms } ms[m] = struct{}{} @@ -1284,13 +1278,13 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { } // Put the elements from the pseudo-set into a slice for marshaling. - res := map[string][]metadata{} + res := map[string][]metadata.Metadata{} for name, set := range metrics { if limit >= 0 && len(res) >= limit { break } - s := []metadata{} + s := []metadata.Metadata{} for metadata := range set { s = append(s, metadata) } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index d4da05e46..c9ab84087 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -44,7 +44,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/model/metadata" "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -1584,7 +1584,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created.", Unit: "", }, @@ -1597,7 +1597,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "job": "test", }), Help: "Number of OS threads created.", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Unit: "", }, }, @@ -1614,7 +1614,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "prometheus_tsdb_storage_blocks_bytes", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "The number of bytes that are currently used for local storage by all blocks.", Unit: "", }, @@ -1628,7 +1628,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }), Metric: "prometheus_tsdb_storage_blocks_bytes", Help: "The number of bytes that are currently used for local storage by all blocks.", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Unit: "", }, }, @@ -1642,7 +1642,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created.", Unit: "", }, @@ -1653,7 +1653,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "prometheus_tsdb_storage_blocks_bytes", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "The number of bytes that are currently used for local storage by all blocks.", Unit: "", }, @@ -1667,7 +1667,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }), Metric: "go_threads", Help: "Number of OS threads created.", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Unit: "", }, { @@ -1676,7 +1676,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }), Metric: "prometheus_tsdb_storage_blocks_bytes", Help: "The number of bytes that are currently used for local storage by all blocks.", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Unit: "", }, }, @@ -1719,22 +1719,22 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "prometheus_engine_query_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "Query timings", Unit: "", }, { Metric: "go_info", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Information about the Go environment.", Unit: "", }, }, }, }, - response: map[string][]metadata{ - "prometheus_engine_query_duration_seconds": {{textparse.MetricTypeSummary, "Query timings", ""}}, - "go_info": {{textparse.MetricTypeGauge, "Information about the Go environment.", ""}}, + response: map[string][]metadata.Metadata{ + "prometheus_engine_query_duration_seconds": {{Type: model.MetricTypeSummary, Help: "Query timings", Unit: ""}}, + "go_info": {{Type: model.MetricTypeGauge, Help: "Information about the Go environment.", Unit: ""}}, }, }, // With duplicate metadata for a metric that comes from different targets. @@ -1746,7 +1746,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, @@ -1757,15 +1757,15 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, }, }, }, - response: map[string][]metadata{ - "go_threads": {{textparse.MetricTypeGauge, "Number of OS threads created", ""}}, + response: map[string][]metadata.Metadata{ + "go_threads": {{Type: model.MetricTypeGauge, Help: "Number of OS threads created"}}, }, }, // With non-duplicate metadata for the same metric from different targets. @@ -1777,7 +1777,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, @@ -1788,21 +1788,21 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads that were created.", Unit: "", }, }, }, }, - response: map[string][]metadata{ + response: map[string][]metadata.Metadata{ "go_threads": { - {textparse.MetricTypeGauge, "Number of OS threads created", ""}, - {textparse.MetricTypeGauge, "Number of OS threads that were created.", ""}, + {Type: model.MetricTypeGauge, Help: "Number of OS threads created"}, + {Type: model.MetricTypeGauge, Help: "Number of OS threads that were created."}, }, }, sorter: func(m interface{}) { - v := m.(map[string][]metadata)["go_threads"] + v := m.(map[string][]metadata.Metadata)["go_threads"] sort.Slice(v, func(i, j int) bool { return v[i].Help < v[j].Help @@ -1821,13 +1821,13 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, { Metric: "prometheus_engine_query_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "Query Timmings.", Unit: "", }, @@ -1838,7 +1838,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations.", Unit: "", }, @@ -1857,31 +1857,31 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Repeated metadata", Unit: "", }, { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations.", Unit: "", }, }, }, }, - response: map[string][]metadata{ + response: map[string][]metadata.Metadata{ "go_threads": { - {textparse.MetricTypeGauge, "Number of OS threads created", ""}, + {Type: model.MetricTypeGauge, Help: "Number of OS threads created"}, }, "go_gc_duration_seconds": { - {textparse.MetricTypeSummary, "A summary of the GC invocation durations.", ""}, + {Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations."}, }, }, }, @@ -1895,19 +1895,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Repeated metadata", Unit: "", }, { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations.", Unit: "", }, @@ -1928,19 +1928,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Repeated metadata", Unit: "", }, { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations.", Unit: "", }, @@ -1951,13 +1951,13 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created, but from a different target", Unit: "", }, { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations, but from a different target.", Unit: "", }, @@ -1977,7 +1977,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, @@ -1988,27 +1988,27 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_gc_duration_seconds", - Type: textparse.MetricTypeSummary, + Type: model.MetricTypeSummary, Help: "A summary of the GC invocation durations.", Unit: "", }, { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads that were created.", Unit: "", }, }, }, }, - response: map[string][]metadata{ + response: map[string][]metadata.Metadata{ "go_threads": { - {textparse.MetricTypeGauge, "Number of OS threads created", ""}, - {textparse.MetricTypeGauge, "Number of OS threads that were created.", ""}, + {Type: model.MetricTypeGauge, Help: "Number of OS threads created"}, + {Type: model.MetricTypeGauge, Help: "Number of OS threads that were created."}, }, }, sorter: func(m interface{}) { - v := m.(map[string][]metadata)["go_threads"] + v := m.(map[string][]metadata.Metadata)["go_threads"] sort.Slice(v, func(i, j int) bool { return v[i].Help < v[j].Help @@ -2025,19 +2025,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata: []scrape.MetricMetadata{ { Metric: "go_threads", - Type: textparse.MetricTypeGauge, + Type: model.MetricTypeGauge, Help: "Number of OS threads created", Unit: "", }, }, }, }, - response: map[string][]metadata{}, + response: map[string][]metadata.Metadata{}, }, // With no available metadata. { endpoint: api.metricMetadata, - response: map[string][]metadata{}, + response: map[string][]metadata.Metadata{}, }, { endpoint: api.serveConfig, @@ -2931,7 +2931,7 @@ func assertAPIResponseMetadataLen(t *testing.T, got interface{}, expLen int) { t.Helper() var gotLen int - response := got.(map[string][]metadata) + response := got.(map[string][]metadata.Metadata) for _, m := range response { gotLen += len(m) } From c83e1fc5748be3bd35bf0a31eb53690b412846a4 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 12 Dec 2023 12:14:36 +0000 Subject: [PATCH 47/52] textparse: remove MetricType alias No backwards-compatibility; make a clean break. Signed-off-by: Bryan Boreham --- model/textparse/interface.go | 4 +--- model/textparse/openmetricsparse.go | 2 +- model/textparse/promparse.go | 4 ++-- model/textparse/protobufparse_test.go | 2 +- scrape/scrape.go | 2 +- scrape/target.go | 3 +-- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/model/textparse/interface.go b/model/textparse/interface.go index f6d93f063..3a363ebfb 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -44,7 +44,7 @@ type Parser interface { // Type returns the metric name and type in the current entry. // Must only be called after Next returned a type entry. // The returned byte slices become invalid after the next call to Next. - Type() ([]byte, MetricType) + Type() ([]byte, model.MetricType) // Unit returns the metric name and unit in the current entry. // Must only be called after Next returned a unit entry. @@ -111,5 +111,3 @@ const ( EntryUnit Entry = 4 EntryHistogram Entry = 5 // A series with a native histogram as a value. ) - -type MetricType = model.MetricType diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index d6b01eb8e..ddfbe4fc5 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -128,7 +128,7 @@ func (p *OpenMetricsParser) Help() ([]byte, []byte) { // Type returns the metric name and type in the current entry. // Must only be called after Next returned a type entry. // The returned byte slices become invalid after the next call to Next. -func (p *OpenMetricsParser) Type() ([]byte, MetricType) { +func (p *OpenMetricsParser) Type() ([]byte, model.MetricType) { return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype } diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index cd028ef90..7123e52c3 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -148,7 +148,7 @@ type PromParser struct { builder labels.ScratchBuilder series []byte text []byte - mtype MetricType + mtype model.MetricType val float64 ts int64 hasTS bool @@ -192,7 +192,7 @@ func (p *PromParser) Help() ([]byte, []byte) { // Type returns the metric name and type in the current entry. // Must only be called after Next returned a type entry. // The returned byte slices become invalid after the next call to Next. -func (p *PromParser) Type() ([]byte, MetricType) { +func (p *PromParser) Type() ([]byte, model.MetricType) { return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype } diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index f994ff966..7dcc85f54 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -623,7 +623,7 @@ func TestProtobufParse(t *testing.T) { m string t int64 v float64 - typ MetricType + typ model.MetricType help string unit string comment string diff --git a/scrape/scrape.go b/scrape/scrape.go index de74987cc..dd425db90 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -961,7 +961,7 @@ func (c *scrapeCache) forEachStale(f func(labels.Labels) bool) { } } -func (c *scrapeCache) setType(metric []byte, t textparse.MetricType) { +func (c *scrapeCache) setType(metric []byte, t model.MetricType) { c.metaMtx.Lock() e, ok := c.metadata[string(metric)] diff --git a/scrape/target.go b/scrape/target.go index fd984f5a6..0605f5349 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -30,7 +30,6 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" ) @@ -87,7 +86,7 @@ type MetricMetadataStore interface { // MetricMetadata is a piece of metadata for a metric. type MetricMetadata struct { Metric string - Type textparse.MetricType + Type model.MetricType Help string Unit string } From 096ec129120b124ea14c78c8721c80dd0961b8ce Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 19 Dec 2023 18:53:09 +0000 Subject: [PATCH 48/52] Update comment about metadata in types.proto Signed-off-by: Bryan Boreham --- prompb/types.pb.go | 2 +- prompb/types.proto | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prompb/types.pb.go b/prompb/types.pb.go index 125f868e9..93883daa1 100644 --- a/prompb/types.pb.go +++ b/prompb/types.pb.go @@ -164,7 +164,7 @@ func (Chunk_Encoding) EnumDescriptor() ([]byte, []int) { type MetricMetadata struct { // Represents the metric type, these match the set from Prometheus. - // Refer to model/textparse/interface.go for details. + // Refer to github.com/prometheus/common/model/metadata.go for details. Type MetricMetadata_MetricType `protobuf:"varint,1,opt,name=type,proto3,enum=prometheus.MetricMetadata_MetricType" json:"type,omitempty"` MetricFamilyName string `protobuf:"bytes,2,opt,name=metric_family_name,json=metricFamilyName,proto3" json:"metric_family_name,omitempty"` Help string `protobuf:"bytes,4,opt,name=help,proto3" json:"help,omitempty"` diff --git a/prompb/types.proto b/prompb/types.proto index aa322515c..61fc1e014 100644 --- a/prompb/types.proto +++ b/prompb/types.proto @@ -31,7 +31,7 @@ message MetricMetadata { } // Represents the metric type, these match the set from Prometheus. - // Refer to model/textparse/interface.go for details. + // Refer to github.com/prometheus/common/model/metadata.go for details. MetricType type = 1; string metric_family_name = 2; string help = 4; From b012366c33cb673110421b9e20e35fde99c992db Mon Sep 17 00:00:00 2001 From: Kumar Kalpadiptya Roy Date: Wed, 20 Dec 2023 00:28:59 +0530 Subject: [PATCH 49/52] Issue #13268: fix quality value in accept header Signed-off-by: Kumar Kalpadiptya Roy --- scrape/scrape.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index be27a5d48..cb65053ba 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -675,7 +675,7 @@ func acceptHeader(sps []config.ScrapeProtocol) string { weight-- } // Default match anything. - vals = append(vals, fmt.Sprintf("*/*;q=%d", weight)) + vals = append(vals, fmt.Sprintf("*/*;q=0.%d", weight)) return strings.Join(vals, ",") } From 5df3820c7a5e099f0cdf8c503a01ac4b0498cf96 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 25 Dec 2023 11:19:16 +0100 Subject: [PATCH 50/52] Copy last histogram point Signed-off-by: Filip Petkovski --- promql/engine.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 12755663d..5d6f7b9a0 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2126,17 +2126,20 @@ loop: // The sought sample might also be in the range. switch soughtValueType { case chunkenc.ValFloatHistogram, chunkenc.ValHistogram: - t, h := it.AtFloatHistogram() - if t == maxt && !value.IsStaleNaN(h.Sum) { - if ev.currentSamples >= ev.maxSamples { - ev.error(ErrTooManySamples(env)) + t := it.AtT() + if t == maxt { + _, h := it.AtFloatHistogram() + if !value.IsStaleNaN(h.Sum) { + if ev.currentSamples >= ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if histograms == nil { + histograms = getHPointSlice(16) + } + point := HPoint{T: t, H: h.Copy()} + histograms = append(histograms, point) + ev.currentSamples += point.size() } - if histograms == nil { - histograms = getHPointSlice(16) - } - point := HPoint{T: t, H: h} - histograms = append(histograms, point) - ev.currentSamples += point.size() } case chunkenc.ValFloat: t, f := it.At() From 35f9620cd1f212b37cc3074bbd59bdfae713f87d Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 25 Dec 2023 11:30:29 +0100 Subject: [PATCH 51/52] Expand benchmark Signed-off-by: Filip Petkovski --- promql/bench_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/promql/bench_test.go b/promql/bench_test.go index 13eba3714..b7a4978de 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -296,8 +296,12 @@ func BenchmarkNativeHistograms(b *testing.B) { query: "sum(native_histogram_series)", }, { - name: "sum rate", - query: "sum(rate(native_histogram_series[1m]))", + name: "sum rate with short rate interval", + query: "sum(rate(native_histogram_series[2m]))", + }, + { + name: "sum rate with long rate interval", + query: "sum(rate(native_histogram_series[20m]))", }, } From 0e1ae1d1caa65c398d0024a4ecb7977405e15a07 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 25 Dec 2023 11:41:07 +0100 Subject: [PATCH 52/52] Add comment Signed-off-by: Filip Petkovski --- promql/engine.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index 5d6f7b9a0..2ea37dae6 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2136,6 +2136,8 @@ loop: if histograms == nil { histograms = getHPointSlice(16) } + // The last sample comes directly from the iterator, so we need to copy it to + // avoid having the same reference twice in the buffer. point := HPoint{T: t, H: h.Copy()} histograms = append(histograms, point) ev.currentSamples += point.size()