Merge pull request #14292 from zenador/nhcb-review-2

[nhcb branch] update missed suggested change from code review
This commit is contained in:
George Krajcsovits 2024-06-20 17:06:28 +02:00 committed by GitHub
commit a3d92dd72f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 1180 additions and 819 deletions

View file

@ -29,6 +29,7 @@ linters:
- unused
- usestdlibvars
- whitespace
- loggercheck
issues:
max-same-issues: 0

View file

@ -2,16 +2,23 @@
## unreleased
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 50.
## 2.53.0 / 2024-06-16
* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980
* [CHANGE] Runtime: Change GOGC threshold from 100 to 50 #14176
* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061
* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 75.
* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 #14048
* [CHANGE] Runtime: Change GOGC threshold from 100 to 75 #14176 #14285
* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 #14216 #14273
* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` metric to measure the time it takes to restore a rule group. #13974
* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991
* [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620
* [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991
* [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991
* [BUGFIX] OTLP: Don't generate target_info unless there are metrics and at least one identifying label is defined. #13991
* [BUGFIX] Scrape: Do no try to ingest native histograms when the native histograms feature is turned off. This happened when protobuf scrape was enabled by for example the created time feature. #13987
* [BUGFIX] Scaleway SD: Use the instance's public IP if no private IP is available as the `__address__` meta label. #13941
* [BUGFIX] Query logger: Do not leak file descriptors on error. #13948
* [BUGFIX] TSDB: Let queries with heavy regex matches be cancelled and not use up the CPU. #14096 #14103 #14118 #14199
* [BUGFIX] API: Do not warn if result count is equal to the limit, only when exceeding the limit for the series, label-names and label-values APIs. #14116
* [BUGFIX] TSDB: Fix head stats and hooks when replaying a corrupted snapshot. #14079
## 2.52.1 / 2024-05-29

View file

@ -1,8 +1,8 @@
<h1 align="center" style="border-bottom: none">
<a href="//prometheus.io" target="_blank"><img alt="Prometheus" src="/documentation/images/prometheus-logo.svg"></a><br>Prometheus
<a href="https://prometheus.io" target="_blank"><img alt="Prometheus" src="/documentation/images/prometheus-logo.svg"></a><br>Prometheus
</h1>
<p align="center">Visit <a href="//prometheus.io" target="_blank">prometheus.io</a> for the full documentation,
<p align="center">Visit <a href="https://prometheus.io" target="_blank">prometheus.io</a> for the full documentation,
examples and guides.</p>
<div align="center">

View file

@ -149,6 +149,8 @@ Changes for a patch release or release candidate should be merged into the previ
Bump the version in the `VERSION` file and update `CHANGELOG.md`. Do this in a proper PR pointing to the release branch as this gives others the opportunity to chime in on the release in general and on the addition to the changelog in particular. For a release candidate, append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.).
When updating the `CHANGELOG.md` look at all PRs included in the release since the last release and verify if they need a changelog entry.
Note that `CHANGELOG.md` should only document changes relevant to users of Prometheus, including external API changes, performance improvements, and new features. Do not document changes of internal interfaces, code refactorings and clean-ups, changes to the build process, etc. People interested in these are asked to refer to the git history.
For release candidates still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update.

View file

@ -1 +1 @@
2.52.1
2.53.0

View file

@ -1197,7 +1197,7 @@ func main() {
}
if agentMode {
// WAL storage.
opts := cfg.agent.ToAgentOptions()
opts := cfg.agent.ToAgentOptions(cfg.tsdb.OutOfOrderTimeWindow)
cancel := make(chan struct{})
g.Add(
func() error {
@ -1233,6 +1233,7 @@ func main() {
"TruncateFrequency", cfg.agent.TruncateFrequency,
"MinWALTime", cfg.agent.MinWALTime,
"MaxWALTime", cfg.agent.MaxWALTime,
"OutOfOrderTimeWindow", cfg.agent.OutOfOrderTimeWindow,
)
localStorage.Set(db, 0)
@ -1736,17 +1737,22 @@ type agentOptions struct {
TruncateFrequency model.Duration
MinWALTime, MaxWALTime model.Duration
NoLockfile bool
OutOfOrderTimeWindow int64
}
func (opts agentOptions) ToAgentOptions() agent.Options {
func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Options {
if outOfOrderTimeWindow < 0 {
outOfOrderTimeWindow = 0
}
return agent.Options{
WALSegmentSize: int(opts.WALSegmentSize),
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
StripeSize: opts.StripeSize,
TruncateFrequency: time.Duration(opts.TruncateFrequency),
MinWALTime: durationToInt64Millis(time.Duration(opts.MinWALTime)),
MaxWALTime: durationToInt64Millis(time.Duration(opts.MaxWALTime)),
NoLockfile: opts.NoLockfile,
WALSegmentSize: int(opts.WALSegmentSize),
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
StripeSize: opts.StripeSize,
TruncateFrequency: time.Duration(opts.TruncateFrequency),
MinWALTime: durationToInt64Millis(time.Duration(opts.MinWALTime)),
MaxWALTime: durationToInt64Millis(time.Duration(opts.MaxWALTime)),
NoLockfile: opts.NoLockfile,
OutOfOrderTimeWindow: outOfOrderTimeWindow,
}
}

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/oklog/ulid"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/textparse"
@ -191,6 +192,10 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
if quiet {
break
}
// Empty block, don't print.
if block.Compare(ulid.ULID{}) == 0 {
break
}
blocks, err := db.Blocks()
if err != nil {
return fmt.Errorf("get blocks: %w", err)

View file

@ -154,7 +154,7 @@ var (
DefaultRuntimeConfig = RuntimeConfig{
// Go runtime tuning.
GoGC: 50,
GoGC: 75,
}
// DefaultScrapeConfig is the default scrape configuration.

View file

@ -42,28 +42,29 @@ import (
)
const (
ec2Label = model.MetaLabelPrefix + "ec2_"
ec2LabelAMI = ec2Label + "ami"
ec2LabelAZ = ec2Label + "availability_zone"
ec2LabelAZID = ec2Label + "availability_zone_id"
ec2LabelArch = ec2Label + "architecture"
ec2LabelIPv6Addresses = ec2Label + "ipv6_addresses"
ec2LabelInstanceID = ec2Label + "instance_id"
ec2LabelInstanceLifecycle = ec2Label + "instance_lifecycle"
ec2LabelInstanceState = ec2Label + "instance_state"
ec2LabelInstanceType = ec2Label + "instance_type"
ec2LabelOwnerID = ec2Label + "owner_id"
ec2LabelPlatform = ec2Label + "platform"
ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id"
ec2LabelPrivateDNS = ec2Label + "private_dns_name"
ec2LabelPrivateIP = ec2Label + "private_ip"
ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelRegion = ec2Label + "region"
ec2LabelSubnetID = ec2Label + "subnet_id"
ec2LabelTag = ec2Label + "tag_"
ec2LabelVPCID = ec2Label + "vpc_id"
ec2LabelSeparator = ","
ec2Label = model.MetaLabelPrefix + "ec2_"
ec2LabelAMI = ec2Label + "ami"
ec2LabelAZ = ec2Label + "availability_zone"
ec2LabelAZID = ec2Label + "availability_zone_id"
ec2LabelArch = ec2Label + "architecture"
ec2LabelIPv6Addresses = ec2Label + "ipv6_addresses"
ec2LabelInstanceID = ec2Label + "instance_id"
ec2LabelInstanceLifecycle = ec2Label + "instance_lifecycle"
ec2LabelInstanceState = ec2Label + "instance_state"
ec2LabelInstanceType = ec2Label + "instance_type"
ec2LabelOwnerID = ec2Label + "owner_id"
ec2LabelPlatform = ec2Label + "platform"
ec2LabelPrimaryIPv6Addresses = ec2Label + "primary_ipv6_addresses"
ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id"
ec2LabelPrivateDNS = ec2Label + "private_dns_name"
ec2LabelPrivateIP = ec2Label + "private_ip"
ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelRegion = ec2Label + "region"
ec2LabelSubnetID = ec2Label + "subnet_id"
ec2LabelTag = ec2Label + "tag_"
ec2LabelVPCID = ec2Label + "vpc_id"
ec2LabelSeparator = ","
)
// DefaultEC2SDConfig is the default EC2 SD configuration.
@ -317,6 +318,7 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
var subnets []string
var ipv6addrs []string
var primaryipv6addrs []string
subnetsMap := make(map[string]struct{})
for _, eni := range inst.NetworkInterfaces {
if eni.SubnetId == nil {
@ -330,6 +332,15 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
for _, ipv6addr := range eni.Ipv6Addresses {
ipv6addrs = append(ipv6addrs, *ipv6addr.Ipv6Address)
if *ipv6addr.IsPrimaryIpv6 {
// we might have to extend the slice with more than one element
// that could leave empty strings in the list which is intentional
// to keep the position/device index information
for int64(len(primaryipv6addrs)) <= *eni.Attachment.DeviceIndex {
primaryipv6addrs = append(primaryipv6addrs, "")
}
primaryipv6addrs[*eni.Attachment.DeviceIndex] = *ipv6addr.Ipv6Address
}
}
}
labels[ec2LabelSubnetID] = model.LabelValue(
@ -342,6 +353,12 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
strings.Join(ipv6addrs, ec2LabelSeparator) +
ec2LabelSeparator)
}
if len(primaryipv6addrs) > 0 {
labels[ec2LabelPrimaryIPv6Addresses] = model.LabelValue(
ec2LabelSeparator +
strings.Join(primaryipv6addrs, ec2LabelSeparator) +
ec2LabelSeparator)
}
}
for _, t := range inst.Tags {

View file

@ -120,6 +120,16 @@ func Name(n string) func(*Manager) {
}
}
// Updatert sets the updatert of the manager.
// Used to speed up tests.
func Updatert(u time.Duration) func(*Manager) {
return func(m *Manager) {
m.mtx.Lock()
defer m.mtx.Unlock()
m.updatert = u
}
}
// HTTPClientOptions sets the list of HTTP client options to expose to
// Discoverers. It is up to Discoverers to choose to use the options provided.
func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) {

View file

@ -125,7 +125,7 @@ runtime:
# Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC
# Lowering this number increases CPU usage.
[ gogc: <int> | default = 50 ]
[ gogc: <int> | default = 75 ]
# Rule files specifies a list of globs. Rules and alerts are read from
# all matching files.
@ -1229,6 +1229,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_ec2_ipv6_addresses`: comma separated list of IPv6 addresses assigned to the instance's network interfaces, if present
* `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance
* `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise
* `__meta_ec2_primary_ipv6_addresses`: comma separated list of the Primary IPv6 addresses of the instance, if present. The list is ordered based on the position of each corresponding network interface in the attachment order.
* `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available
* `__meta_ec2_private_dns_name`: the private DNS name of the instance, if available
* `__meta_ec2_private_ip`: the private IP address of the instance, if present
@ -1608,7 +1609,16 @@ and serves as an interface to plug in custom service discovery mechanisms.
It reads a set of files containing a list of zero or more
`<static_config>`s. Changes to all defined files are detected via disk watches
and applied immediately. Files may be provided in YAML or JSON format. Only
and applied immediately.
While those individual files are watched for changes,
the parent directory is also watched implicitly. This is to handle [atomic
renaming](https://github.com/fsnotify/fsnotify/blob/c1467c02fba575afdb5f4201072ab8403bbf00f4/README.md?plain=1#L128) efficiently and to detect new files that match the configured globs.
This may cause issues if the parent directory contains a large number of other files,
as each of these files will be watched too, even though the events related
to them are not relevant.
Files may be provided in YAML or JSON format. Only
changes resulting in well-formed target groups are applied.
Files must contain a list of static configs, using these formats:
@ -3813,6 +3823,10 @@ NOTE: Out-of-order ingestion is an experimental feature, but you do not need any
# into the TSDB, i.e. it is an in-order sample or an out-of-order/out-of-bounds sample
# that is within the out-of-order window, or (b) too-old, i.e. not in-order
# and before the out-of-order window.
#
# When out_of_order_time_window is greater than 0, it also affects experimental agent. It allows
# the agent's WAL to accept out-of-order samples that fall within the specified time window relative
# to the timestamp of the last appended sample for the same series.
[ out_of_order_time_window: <duration> | default = 0s ]
```

View file

@ -31,11 +31,19 @@ production deployments it is highly recommended to use a
[named volume](https://docs.docker.com/storage/volumes/)
to ease managing the data on Prometheus upgrades.
To provide your own configuration, there are several options. Here are
two examples.
### Setting command line parameters
The Docker image is started with a number of default command line parameters, which
can be found in the [Dockerfile](https://github.com/prometheus/prometheus/blob/main/Dockerfile) (adjust the link to correspond with the version in use).
If you want to add extra command line parameters to the `docker run` command,
you will need to re-add these yourself as they will be overwritten.
### Volumes & bind-mount
To provide your own configuration, there are several options. Here are
two examples.
Bind-mount your `prometheus.yml` from the host by running:
```bash

View file

@ -61,8 +61,11 @@ A Prometheus server's data directory looks something like this:
Note that a limitation of local storage is that it is not clustered or
replicated. Thus, it is not arbitrarily scalable or durable in the face of
drive or node outages and should be managed like any other single node
database. The use of RAID is suggested for storage availability, and
[snapshots](querying/api.md#snapshot) are recommended for backups. With proper
database.
[Snapshots](querying/api.md#snapshot) are recommended for backups. Backups
made without snapshots run the risk of losing data that was recorded since
the last WAL sync, which typically happens every two hours. With proper
architecture, it is possible to retain years of data in local storage.
Alternatively, external storage may be used via the

View file

@ -34,6 +34,20 @@
description: 'Prometheus %(prometheusName)s has failed to refresh SD with mechanism {{$labels.mechanism}}.' % $._config,
},
},
{
alert: 'PrometheusKubernetesListWatchFailures',
expr: |||
increase(prometheus_sd_kubernetes_failures_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Requests in Kubernetes SD are failing.',
description: 'Kubernetes service discovery of Prometheus %(prometheusName)s is experiencing {{ printf "%%.0f" $value }} failures with LIST/WATCH requests to the Kubernetes API in the last 5 minutes.' % $._config,
},
},
{
alert: 'PrometheusNotificationQueueRunningFull',
expr: |||

2
go.mod
View file

@ -77,6 +77,7 @@ require (
golang.org/x/oauth2 v0.21.0
golang.org/x/sync v0.7.0
golang.org/x/sys v0.21.0
golang.org/x/text v0.16.0
golang.org/x/time v0.5.0
golang.org/x/tools v0.22.0
google.golang.org/api v0.183.0
@ -188,7 +189,6 @@ require (
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
golang.org/x/mod v0.18.0 // indirect
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"slices"
"strconv"
"unsafe"
"github.com/prometheus/common/model"
)
@ -215,3 +216,7 @@ func contains(s []Label, n string) bool {
}
return false
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}

View file

@ -20,7 +20,6 @@ import (
"slices"
"strings"
"sync"
"unsafe"
"github.com/cespare/xxhash/v2"
)
@ -426,10 +425,6 @@ func EmptyLabels() Labels {
return Labels{}
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
// New returns a sorted Labels from the given labels.
// The caller has to guarantee that all label names are unique.
// Note this function is not efficient; should not be used in performance-critical places.

View file

@ -299,11 +299,6 @@ func Equal(ls, o Labels) bool {
func EmptyLabels() Labels {
return Labels{}
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
func yoloBytes(s string) (b []byte) {
*(*string)(unsafe.Pointer(&b)) = s
(*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s)

View file

@ -101,7 +101,7 @@ func (m *Matcher) shouldQuoteName() bool {
}
return true
}
return false
return len(m.Name) == 0
}
// Matches returns whether the matcher matches the given string value.

View file

@ -16,10 +16,12 @@ package labels
import (
"slices"
"strings"
"unicode"
"unicode/utf8"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
"golang.org/x/text/unicode/norm"
)
const (
@ -766,7 +768,7 @@ type equalMultiStringMapMatcher struct {
func (m *equalMultiStringMapMatcher) add(s string) {
if !m.caseSensitive {
s = strings.ToLower(s)
s = toNormalisedLower(s)
}
m.values[s] = struct{}{}
@ -786,13 +788,35 @@ func (m *equalMultiStringMapMatcher) setMatches() []string {
func (m *equalMultiStringMapMatcher) Matches(s string) bool {
if !m.caseSensitive {
s = strings.ToLower(s)
s = toNormalisedLower(s)
}
_, ok := m.values[s]
return ok
}
// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
// it to lower case.
func toNormalisedLower(s string) string {
var buf []byte
for i := 0; i < len(s); i++ {
c := s[i]
if c >= utf8.RuneSelf {
return strings.Map(unicode.ToLower, norm.NFKD.String(s))
}
if 'A' <= c && c <= 'Z' {
if buf == nil {
buf = []byte(s)
}
buf[i] = c + 'a' - 'A'
}
}
if buf == nil {
return s
}
return yoloString(buf)
}
// anyStringWithoutNewlineMatcher is a stringMatcher which matches any string
// (including an empty one) as far as it doesn't contain any newline character.
type anyStringWithoutNewlineMatcher struct{}

File diff suppressed because one or more lines are too long

View file

@ -298,25 +298,14 @@ func (n *Manager) nextBatch() []*Alert {
return alerts
}
// Run dispatches notifications continuously.
func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) {
// sendLoop continuously consumes the notifications queue and sends alerts to
// the configured Alertmanagers.
func (n *Manager) sendLoop() {
for {
// The select is split in two parts, such as we will first try to read
// new alertmanager targets if they are available, before sending new
// alerts.
select {
case <-n.ctx.Done():
return
case ts := <-tsets:
n.reload(ts)
default:
select {
case <-n.ctx.Done():
return
case ts := <-tsets:
n.reload(ts)
case <-n.more:
}
case <-n.more:
}
alerts := n.nextBatch()
@ -330,6 +319,21 @@ func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) {
}
}
// Run receives updates of target groups and triggers a reload.
// The dispatching of notifications occurs in the background to prevent blocking the receipt of target updates.
// Refer to https://github.com/prometheus/prometheus/issues/13676 for more details.
func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) {
go n.sendLoop()
for {
select {
case <-n.ctx.Done():
return
case ts := <-tsets:
n.reload(ts)
}
}
}
func (n *Manager) reload(tgs map[string][]*targetgroup.Group) {
n.mtx.Lock()
defer n.mtx.Unlock()
@ -471,10 +475,6 @@ func (n *Manager) sendAll(alerts ...*Alert) bool {
numSuccess atomic.Uint64
)
for _, ams := range amSets {
if len(ams.ams) == 0 {
continue
}
var (
payload []byte
err error
@ -483,6 +483,11 @@ func (n *Manager) sendAll(alerts ...*Alert) bool {
ams.mtx.RLock()
if len(ams.ams) == 0 {
ams.mtx.RUnlock()
continue
}
if len(ams.cfg.AlertRelabelConfigs) > 0 {
amAlerts = relabelAlerts(ams.cfg.AlertRelabelConfigs, labels.Labels{}, alerts)
if len(amAlerts) == 0 {

View file

@ -26,13 +26,17 @@ import (
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/labels"
@ -697,117 +701,149 @@ func TestLabelsToOpenAPILabelSet(t *testing.T) {
require.Equal(t, models.LabelSet{"aaa": "111", "bbb": "222"}, labelsToOpenAPILabelSet(labels.FromStrings("aaa", "111", "bbb", "222")))
}
// TestHangingNotifier validates that targets updates happen even when there are
// queued alerts.
// TestHangingNotifier ensures that the notifier takes into account SD changes even when there are
// queued alerts. This test reproduces the issue described in https://github.com/prometheus/prometheus/issues/13676.
// and https://github.com/prometheus/prometheus/issues/8768.
func TestHangingNotifier(t *testing.T) {
// Note: When targets are not updated in time, this test is flaky because go
// selects are not deterministic. Therefore we run 10 subtests to run into the issue.
for i := 0; i < 10; i++ {
t.Run(strconv.Itoa(i), func(t *testing.T) {
var (
done = make(chan struct{})
changed = make(chan struct{})
syncCh = make(chan map[string][]*targetgroup.Group)
)
const (
batches = 100
alertsCount = maxBatchSize * batches
)
defer func() {
close(done)
}()
var (
sendTimeout = 10 * time.Millisecond
sdUpdatert = sendTimeout / 2
var calledOnce bool
// Setting up a bad server. This server hangs for 2 seconds.
badServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if calledOnce {
t.Fatal("hanging server called multiple times")
}
calledOnce = true
select {
case <-done:
case <-time.After(2 * time.Second):
}
}))
badURL, err := url.Parse(badServer.URL)
require.NoError(t, err)
badAddress := badURL.Host // Used for __name__ label in targets.
done = make(chan struct{})
)
// Setting up a bad server. This server returns fast, signaling requests on
// by closing the changed channel.
goodServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
close(changed)
}))
goodURL, err := url.Parse(goodServer.URL)
require.NoError(t, err)
goodAddress := goodURL.Host // Used for __name__ label in targets.
defer func() {
close(done)
}()
h := NewManager(
&Options{
QueueCapacity: 20 * maxBatchSize,
},
nil,
)
// Set up a faulty Alertmanager.
var faultyCalled atomic.Bool
faultyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
faultyCalled.Store(true)
select {
case <-done:
case <-time.After(time.Hour):
}
}))
faultyURL, err := url.Parse(faultyServer.URL)
require.NoError(t, err)
h.alertmanagers = make(map[string]*alertmanagerSet)
// Set up a functional Alertmanager.
var functionalCalled atomic.Bool
functionalServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
functionalCalled.Store(true)
}))
functionalURL, err := url.Parse(functionalServer.URL)
require.NoError(t, err)
am1Cfg := config.DefaultAlertmanagerConfig
am1Cfg.Timeout = model.Duration(200 * time.Millisecond)
// Initialize the discovery manager
// This is relevant as the updates aren't sent continually in real life, but only each updatert.
// The old implementation of TestHangingNotifier didn't take that into acount.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
reg := prometheus.NewRegistry()
sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg))
require.NoError(t, err)
sdManager := discovery.NewManager(
ctx,
log.NewNopLogger(),
reg,
sdMetrics,
discovery.Name("sd-manager"),
discovery.Updatert(sdUpdatert),
)
go sdManager.Run()
h.alertmanagers["config-0"] = &alertmanagerSet{
ams: []alertmanager{},
cfg: &am1Cfg,
metrics: h.metrics,
}
go h.Run(syncCh)
defer h.Stop()
// Set up the notifier with both faulty and functional Alertmanagers.
notifier := NewManager(
&Options{
QueueCapacity: alertsCount,
},
nil,
)
notifier.alertmanagers = make(map[string]*alertmanagerSet)
amCfg := config.DefaultAlertmanagerConfig
amCfg.Timeout = model.Duration(sendTimeout)
notifier.alertmanagers["config-0"] = &alertmanagerSet{
ams: []alertmanager{
alertmanagerMock{
urlf: func() string { return faultyURL.String() },
},
alertmanagerMock{
urlf: func() string { return functionalURL.String() },
},
},
cfg: &amCfg,
metrics: notifier.metrics,
}
go notifier.Run(sdManager.SyncCh())
defer notifier.Stop()
var alerts []*Alert
for i := range make([]struct{}, 20*maxBatchSize) {
alerts = append(alerts, &Alert{
Labels: labels.FromStrings("alertname", strconv.Itoa(i)),
})
}
require.Len(t, notifier.Alertmanagers(), 2)
// Injecting the hanging server URL.
syncCh <- map[string][]*targetgroup.Group{
"config-0": {
{
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue(badAddress),
},
},
},
},
}
// Queing alerts.
h.Send(alerts...)
// Updating with a working alertmanager target.
go func() {
select {
case syncCh <- map[string][]*targetgroup.Group{
"config-0": {
{
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue(goodAddress),
},
},
},
},
}:
case <-done:
}
}()
select {
case <-time.After(1 * time.Second):
t.Fatalf("Timeout after 1 second, targets not synced in time.")
case <-changed:
// The good server has been hit in less than 3 seconds, therefore
// targets have been updated before a second call could be made to the
// bad server.
}
// Enqueue the alerts.
var alerts []*Alert
for i := range make([]struct{}, alertsCount) {
alerts = append(alerts, &Alert{
Labels: labels.FromStrings("alertname", strconv.Itoa(i)),
})
}
notifier.Send(alerts...)
// Wait for the Alertmanagers to start receiving alerts.
// 10*sdUpdatert is used as an arbitrary timeout here.
timeout := time.After(10 * sdUpdatert)
loop1:
for {
select {
case <-timeout:
t.Fatalf("Timeout waiting for the alertmanagers to be reached for the first time.")
default:
if faultyCalled.Load() && functionalCalled.Load() {
break loop1
}
}
}
// Request to remove the faulty Alertmanager.
c := map[string]discovery.Configs{
"config-0": {
discovery.StaticConfig{
&targetgroup.Group{
Targets: []model.LabelSet{
{
model.AddressLabel: model.LabelValue(functionalURL.Host),
},
},
},
},
},
}
require.NoError(t, sdManager.ApplyConfig(c))
// The notifier should not wait until the alerts queue is empty to apply the discovery changes
// A faulty Alertmanager could cause each alert sending cycle to take up to AlertmanagerConfig.Timeout
// The queue may never be emptied, as the arrival rate could be larger than the departure rate
// It could even overflow and alerts could be dropped.
timeout = time.After(batches * sendTimeout)
loop2:
for {
select {
case <-timeout:
t.Fatalf("Timeout, the faulty alertmanager not removed on time.")
default:
// The faulty alertmanager was dropped.
if len(notifier.Alertmanagers()) == 1 {
// Prevent from TOCTOU.
require.Positive(t, notifier.queueLen())
break loop2
}
require.Positive(t, notifier.queueLen(), "The faulty alertmanager wasn't dropped before the alerts queue was emptied.")
}
}
}

View file

@ -2015,47 +2015,6 @@ func TestSubquerySelector(t *testing.T) {
}
}
func TestTimestampFunction_StepsMoreOftenThanSamples(t *testing.T) {
engine := newTestEngine()
storage := promqltest.LoadedStorage(t, `
load 1m
metric 0+1x1000
`)
t.Cleanup(func() { storage.Close() })
query := "timestamp(metric)"
start := time.Unix(0, 0)
end := time.Unix(61, 0)
interval := time.Second
// We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s.
expectedPoints := []promql.FPoint{}
for t := 0; t <= 59; t++ {
expectedPoints = append(expectedPoints, promql.FPoint{F: 0, T: int64(t * 1000)})
}
expectedPoints = append(
expectedPoints,
promql.FPoint{F: 60, T: 60_000},
promql.FPoint{F: 60, T: 61_000},
)
expectedResult := promql.Matrix{
promql.Series{
Floats: expectedPoints,
Metric: labels.EmptyLabels(),
},
}
qry, err := engine.NewRangeQuery(context.Background(), storage, nil, query, start, end, interval)
require.NoError(t, err)
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
testutil.RequireEqual(t, expectedResult, res.Value)
}
type FakeQueryLogger struct {
closed bool
logs []interface{}
@ -3061,167 +3020,6 @@ func TestEngineOptsValidation(t *testing.T) {
}
}
func TestRangeQuery(t *testing.T) {
cases := []struct {
Name string
Load string
Query string
Result parser.Value
Start time.Time
End time.Time
Interval time.Duration
}{
{
Name: "sum_over_time with all values",
Load: `load 30s
bar 0 1 10 100 1000`,
Query: "sum_over_time(bar[30s])",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 60 * time.Second,
},
{
Name: "sum_over_time with trailing values",
Load: `load 30s
bar 0 1 10 100 1000 0 0 0 0`,
Query: "sum_over_time(bar[30s])",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 60 * time.Second,
},
{
Name: "sum_over_time with all values long",
Load: `load 30s
bar 0 1 10 100 1000 10000 100000 1000000 10000000`,
Query: "sum_over_time(bar[30s])",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}, {F: 110000, T: 180000}, {F: 11000000, T: 240000}},
Metric: labels.EmptyLabels(),
},
},
Start: time.Unix(0, 0),
End: time.Unix(240, 0),
Interval: 60 * time.Second,
},
{
Name: "sum_over_time with all values random",
Load: `load 30s
bar 5 17 42 2 7 905 51`,
Query: "sum_over_time(bar[30s])",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 5, T: 0}, {F: 59, T: 60000}, {F: 9, T: 120000}, {F: 956, T: 180000}},
Metric: labels.EmptyLabels(),
},
},
Start: time.Unix(0, 0),
End: time.Unix(180, 0),
Interval: 60 * time.Second,
},
{
Name: "metric query",
Load: `load 30s
metric 1+1x4`,
Query: "metric",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 1 * time.Minute,
},
{
Name: "metric query with trailing values",
Load: `load 30s
metric 1+1x8`,
Query: "metric",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 1 * time.Minute,
},
{
Name: "short-circuit",
Load: `load 30s
foo{job="1"} 1+1x4
bar{job="2"} 1+1x4`,
Query: `foo > 2 or bar`,
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}},
Metric: labels.FromStrings(
"__name__", "bar",
"job", "2",
),
},
promql.Series{
Floats: []promql.FPoint{{F: 3, T: 60000}, {F: 5, T: 120000}},
Metric: labels.FromStrings(
"__name__", "foo",
"job", "1",
),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 1 * time.Minute,
},
{
Name: "drop-metric-name",
Load: `load 30s
requests{job="1", __address__="bar"} 100`,
Query: `requests * 2`,
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 200, T: 0}, {F: 200, T: 60000}, {F: 200, T: 120000}},
Metric: labels.FromStrings(
"__address__", "bar",
"job", "1",
),
},
},
Start: time.Unix(0, 0),
End: time.Unix(120, 0),
Interval: 1 * time.Minute,
},
}
for _, c := range cases {
t.Run(c.Name, func(t *testing.T) {
engine := newTestEngine()
storage := promqltest.LoadedStorage(t, c.Load)
t.Cleanup(func() { storage.Close() })
qry, err := engine.NewRangeQuery(context.Background(), storage, nil, c.Query, c.Start, c.End, c.Interval)
require.NoError(t, err)
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
testutil.RequireEqual(t, c.Result, res.Value)
})
}
}
func TestInstantQueryWithRangeVectorSelector(t *testing.T) {
engine := newTestEngine()

View file

@ -148,6 +148,13 @@ func TestExprString(t *testing.T) {
in: `{"_0"="1"}`,
out: `{_0="1"}`,
},
{
in: `{""="0"}`,
},
{
in: "{``=\"0\"}",
out: `{""="0"}`,
},
}
for _, test := range inputs {

View file

@ -130,14 +130,3 @@ eval_fail instant at 1m ceil({__name__=~'testmetric1|testmetric2'})
eval_fail instant at 1m ceil({__name__=~'testmetric1|testmetric2'})
expected_fail_regexp (vector cannot contain metrics .*|something else went wrong)
```
### Native histograms with custom buckets (NHCB)
For native histogram with custom buckets (NHCB) series that have been loaded with `load_with_nhcb`, you can use `eval_with_nhcb` instead on the queries of the classic histogram float bucket series to run additional queries on the
NHCB version. We use best effort heuristics to convert the query to its NHCB equivalent, and raise an error if it looks like the conversion was not effective.
For example, `eval_with_nhcb instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))` is shorthand for running these queries:
```
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job)) # Classic histogram
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (job)) # NHCB
```

View file

@ -44,38 +44,10 @@ import (
)
var (
patSpace = regexp.MustCompile("[\t ]+")
patLoad = regexp.MustCompile(`^load(?:_(with_nhcb))?\s+(.+?)$`)
patEvalInstant = regexp.MustCompile(`^eval(?:_(with_nhcb))?(?:_(fail|warn|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`)
patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`)
patWhitespace = regexp.MustCompile(`\s+`)
patBucket = regexp.MustCompile(`_bucket\b`)
patLE = regexp.MustCompile(`\ble\b`)
histogramBucketReplacements = []struct {
pattern *regexp.Regexp
repl string
}{
{
pattern: regexp.MustCompile(`_bucket\b`),
repl: "",
},
{
pattern: regexp.MustCompile(`\s+by\s+\(\s*le\s*\)`),
repl: "",
},
{
pattern: regexp.MustCompile(`\(\s*le\s*,\s*`),
repl: "(",
},
{
pattern: regexp.MustCompile(`,\s*le\s*,\s*`),
repl: ", ",
},
{
pattern: regexp.MustCompile(`,\s*le\s*\)`),
repl: ")",
},
}
patSpace = regexp.MustCompile("[\t ]+")
patLoad = regexp.MustCompile(`^load(?:_(with_nhcb))?\s+(.+?)$`)
patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`)
patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`)
)
const (
@ -251,19 +223,17 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) {
rangeParts := patEvalRange.FindStringSubmatch(lines[i])
if instantParts == nil && rangeParts == nil {
return i, nil, raise(i, "invalid evaluation command. Must be either 'eval[_with_nhcb][_fail|_warn|_ordered] instant [at <offset:duration>] <query>' or 'eval[_fail|_warn] range from <from> to <to> step <step> <query>'")
return i, nil, raise(i, "invalid evaluation command. Must be either 'eval[_fail|_warn|_ordered] instant [at <offset:duration>] <query>' or 'eval[_fail|_warn] range from <from> to <to> step <step> <query>'")
}
isInstant := instantParts != nil
var withNHCB bool
var mod string
var expr string
if isInstant {
withNHCB = instantParts[1] == "with_nhcb"
mod = instantParts[2]
expr = instantParts[4]
mod = instantParts[1]
expr = instantParts[3]
} else {
mod = rangeParts[1]
expr = rangeParts[5]
@ -291,7 +261,7 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) {
var cmd *evalCmd
if isInstant {
at := instantParts[3]
at := instantParts[2]
offset, err := model.ParseDuration(at)
if err != nil {
return i, nil, formatErr("invalid timestamp definition %q: %s", at, err)
@ -335,7 +305,6 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) {
case "warn":
cmd.warn = true
}
cmd.withNHCB = withNHCB
for j := 1; i+1 < len(lines); j++ {
i++
@ -673,7 +642,6 @@ type evalCmd struct {
isRange bool // if false, instant query
fail, warn, ordered bool
withNHCB bool
expectedFailMessage string
expectedFailRegexp *regexp.Regexp
@ -1066,26 +1034,6 @@ func (t *test) execInstantEval(cmd *evalCmd, engine promql.QueryEngine) error {
if err := t.runInstantQuery(iq, cmd, engine); err != nil {
return err
}
if cmd.withNHCB {
if !strings.Contains(iq.expr, "_bucket") {
return fmt.Errorf("expected '_bucket' in the expression '%q'", iq.expr)
}
origExpr := iq.expr
for _, rep := range histogramBucketReplacements {
iq.expr = rep.pattern.ReplaceAllString(iq.expr, rep.repl)
}
switch {
case patWhitespace.ReplaceAllString(iq.expr, "") == patWhitespace.ReplaceAllString(origExpr, ""):
return fmt.Errorf("query rewrite of '%q' had no effect", iq.expr)
case patBucket.MatchString(iq.expr):
return fmt.Errorf("rewritten query '%q' still has '_bucket'", iq.expr)
case patLE.MatchString(iq.expr):
return fmt.Errorf("rewritten query '%q' still has 'le'", iq.expr)
}
if err := t.runInstantQuery(iq, cmd, engine); err != nil {
return err
}
}
}
return nil
}

View file

@ -1213,3 +1213,11 @@ eval instant at 5m log10(exp_root_log - 20)
{l="y"} -Inf
clear
# Test that timestamp() handles the scenario where there are more steps than samples.
load 1m
metric 0+1x1000
# We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s.
eval range from 0 to 61s step 1s timestamp(metric)
{} 0x59 60 60

View file

@ -25,9 +25,8 @@ load_with_nhcb 5m
testhistogram2_bucket{le="6"} 0+3x10
testhistogram2_bucket{le="+Inf"} 0+3x10
# Another test histogram, where there are 0 counts where there is
# an infinite bound, allowing us to calculate standard deviation
# and variance properly.
# Another test histogram, this time without any observations in the +Inf bucket.
# This enables a meaningful calculation of standard deviation and variance.
load_with_nhcb 5m
testhistogram3_bucket{le="0", start="positive"} 0+0x10
testhistogram3_bucket{le="0.1", start="positive"} 0+5x10
@ -106,149 +105,149 @@ eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[5m]))
# Test histogram_quantile.
eval_with_nhcb instant at 50m histogram_quantile(0, testhistogram3_bucket)
eval instant at 50m histogram_quantile(0, testhistogram3_bucket)
{start="positive"} 0
{start="negative"} -0.25
eval_with_nhcb instant at 50m histogram_quantile(0.25, testhistogram3_bucket)
eval instant at 50m histogram_quantile(0.25, testhistogram3_bucket)
{start="positive"} 0.055
{start="negative"} -0.225
eval_with_nhcb instant at 50m histogram_quantile(0.5, testhistogram3_bucket)
eval instant at 50m histogram_quantile(0.5, testhistogram3_bucket)
{start="positive"} 0.125
{start="negative"} -0.2
eval_with_nhcb instant at 50m histogram_quantile(0.75, testhistogram3_bucket)
eval instant at 50m histogram_quantile(0.75, testhistogram3_bucket)
{start="positive"} 0.45
{start="negative"} -0.15
eval_with_nhcb instant at 50m histogram_quantile(1, testhistogram3_bucket)
eval instant at 50m histogram_quantile(1, testhistogram3_bucket)
{start="positive"} 1
{start="negative"} -0.1
# Quantile too low.
eval_with_nhcb_warn instant at 50m histogram_quantile(-0.1, testhistogram_bucket)
eval_warn instant at 50m histogram_quantile(-0.1, testhistogram_bucket)
{start="positive"} -Inf
{start="negative"} -Inf
# Quantile too high.
eval_with_nhcb_warn instant at 50m histogram_quantile(1.01, testhistogram_bucket)
eval_warn instant at 50m histogram_quantile(1.01, testhistogram_bucket)
{start="positive"} +Inf
{start="negative"} +Inf
# Quantile invalid.
eval_with_nhcb_warn instant at 50m histogram_quantile(NaN, testhistogram_bucket)
eval_warn instant at 50m histogram_quantile(NaN, testhistogram_bucket)
{start="positive"} NaN
{start="negative"} NaN
# Quantile value in lowest bucket.
eval_with_nhcb instant at 50m histogram_quantile(0, testhistogram_bucket)
eval instant at 50m histogram_quantile(0, testhistogram_bucket)
{start="positive"} 0
{start="negative"} -0.2
# Quantile value in highest bucket.
eval_with_nhcb instant at 50m histogram_quantile(1, testhistogram_bucket)
eval instant at 50m histogram_quantile(1, testhistogram_bucket)
{start="positive"} 1
{start="negative"} 0.3
# Finally some useful quantiles.
eval_with_nhcb instant at 50m histogram_quantile(0.2, testhistogram_bucket)
eval instant at 50m histogram_quantile(0.2, testhistogram_bucket)
{start="positive"} 0.048
{start="negative"} -0.2
eval_with_nhcb instant at 50m histogram_quantile(0.5, testhistogram_bucket)
eval instant at 50m histogram_quantile(0.5, testhistogram_bucket)
{start="positive"} 0.15
{start="negative"} -0.15
eval_with_nhcb instant at 50m histogram_quantile(0.8, testhistogram_bucket)
eval instant at 50m histogram_quantile(0.8, testhistogram_bucket)
{start="positive"} 0.72
{start="negative"} 0.3
# More realistic with rates.
eval_with_nhcb instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[5m]))
eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[5m]))
{start="positive"} 0.048
{start="negative"} -0.2
eval_with_nhcb instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[5m]))
eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[5m]))
{start="positive"} 0.15
{start="negative"} -0.15
eval_with_nhcb instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[5m]))
eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[5m]))
{start="positive"} 0.72
{start="negative"} 0.3
# Want results exactly in the middle of the bucket.
eval_with_nhcb instant at 7m histogram_quantile(1./6., testhistogram2_bucket)
eval instant at 7m histogram_quantile(1./6., testhistogram2_bucket)
{} 1
eval_with_nhcb instant at 7m histogram_quantile(0.5, testhistogram2_bucket)
eval instant at 7m histogram_quantile(0.5, testhistogram2_bucket)
{} 3
eval_with_nhcb instant at 7m histogram_quantile(5./6., testhistogram2_bucket)
eval instant at 7m histogram_quantile(5./6., testhistogram2_bucket)
{} 5
eval_with_nhcb instant at 47m histogram_quantile(1./6., rate(testhistogram2_bucket[15m]))
eval instant at 47m histogram_quantile(1./6., rate(testhistogram2_bucket[15m]))
{} 1
eval_with_nhcb instant at 47m histogram_quantile(0.5, rate(testhistogram2_bucket[15m]))
eval instant at 47m histogram_quantile(0.5, rate(testhistogram2_bucket[15m]))
{} 3
eval_with_nhcb instant at 47m histogram_quantile(5./6., rate(testhistogram2_bucket[15m]))
eval instant at 47m histogram_quantile(5./6., rate(testhistogram2_bucket[15m]))
{} 5
# Aggregated histogram: Everything in one.
eval_with_nhcb instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le))
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le))
{} 0.075
eval_with_nhcb instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))
eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))
{} 0.1277777777777778
# Aggregated histogram: Everything in one. Now with avg, which does not change anything.
eval_with_nhcb instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le))
eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le))
{} 0.075
eval_with_nhcb instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le))
eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le))
{} 0.12777777777777778
# Aggregated histogram: By instance.
eval_with_nhcb instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
{instance="ins1"} 0.075
{instance="ins2"} 0.075
eval_with_nhcb instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance))
{instance="ins1"} 0.1333333333
{instance="ins2"} 0.125
# Aggregated histogram: By job.
eval_with_nhcb instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
{job="job1"} 0.1
{job="job2"} 0.0642857142857143
eval_with_nhcb instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job))
{job="job1"} 0.14
{job="job2"} 0.1125
# Aggregated histogram: By job and instance.
eval_with_nhcb instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
{instance="ins1", job="job1"} 0.11
{instance="ins2", job="job1"} 0.09
{instance="ins1", job="job2"} 0.06
{instance="ins2", job="job2"} 0.0675
eval_with_nhcb instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance))
{instance="ins1", job="job1"} 0.15
{instance="ins2", job="job1"} 0.1333333333333333
{instance="ins1", job="job2"} 0.1
{instance="ins2", job="job2"} 0.1166666666666667
# The unaggregated histogram for comparison. Same result as the previous one.
eval_with_nhcb instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m]))
eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m]))
{instance="ins1", job="job1"} 0.11
{instance="ins2", job="job1"} 0.09
{instance="ins1", job="job2"} 0.06
{instance="ins2", job="job2"} 0.0675
eval_with_nhcb instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m]))
eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m]))
{instance="ins1", job="job1"} 0.15
{instance="ins2", job="job1"} 0.13333333333333333
{instance="ins1", job="job2"} 0.1
@ -288,11 +287,11 @@ eval instant at 50m histogram_quantile(0.5, rate(mixed[5m]))
{instance="ins1", job="job1"} 0.2
{instance="ins2", job="job1"} NaN
eval_with_nhcb instant at 50m histogram_quantile(0.75, rate(mixed_bucket[5m]))
eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[5m]))
{instance="ins1", job="job1"} 0.2
{instance="ins2", job="job1"} NaN
eval_with_nhcb instant at 50m histogram_quantile(1, rate(mixed_bucket[5m]))
eval instant at 50m histogram_quantile(1, rate(mixed_bucket[5m]))
{instance="ins1", job="job1"} 0.2
{instance="ins2", job="job1"} NaN
@ -301,7 +300,7 @@ load_with_nhcb 5m
empty_bucket{le="0.2", job="job1", instance="ins1"} 0x10
empty_bucket{le="+Inf", job="job1", instance="ins1"} 0x10
eval_with_nhcb instant at 50m histogram_quantile(0.2, rate(empty_bucket[5m]))
eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[5m]))
{instance="ins1", job="job1"} NaN
# Load a duplicate histogram with a different name to test failure scenario on multiple histograms with the same label set
@ -311,4 +310,4 @@ load_with_nhcb 5m
request_duration_seconds2_bucket{job="job1", instance="ins1", le="0.2"} 0+3x10
request_duration_seconds2_bucket{job="job1", instance="ins1", le="+Inf"} 0+4x10
eval_with_nhcb_fail instant at 50m histogram_quantile(0.99, {__name__=~"request_duration_seconds\\d*_bucket$"})
eval_fail instant at 50m histogram_quantile(0.99, {__name__=~"request_duration_seconds\\d*_bucket$"})

View file

@ -715,6 +715,24 @@ eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4)
eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4)
{} 1
clear
# Counter reset only noticeable in a single bucket.
load 5m
reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}}
eval instant at 10m increase(reset_in_bucket[15m])
{} {{count:9 sum:10.5 buckets:[1.5 3 4.5]}}
# The following two test the "fast path" where only sum and count is decoded.
eval instant at 10m histogram_count(increase(reset_in_bucket[15m]))
{} 9
eval instant at 10m histogram_sum(increase(reset_in_bucket[15m]))
{} 10.5
clear
# Test native histograms with custom buckets.
load 5m
custom_buckets_histogram {{schema:-53 sum:5 count:4 custom_values:[5 10] buckets:[1 2 1]}}x10

View file

@ -0,0 +1,73 @@
# sum_over_time with all values
load 30s
bar 0 1 10 100 1000
eval range from 0 to 2m step 1m sum_over_time(bar[30s])
{} 0 11 1100
clear
# sum_over_time with trailing values
load 30s
bar 0 1 10 100 1000 0 0 0 0
eval range from 0 to 2m step 1m sum_over_time(bar[30s])
{} 0 11 1100
clear
# sum_over_time with all values long
load 30s
bar 0 1 10 100 1000 10000 100000 1000000 10000000
eval range from 0 to 4m step 1m sum_over_time(bar[30s])
{} 0 11 1100 110000 11000000
clear
# sum_over_time with all values random
load 30s
bar 5 17 42 2 7 905 51
eval range from 0 to 3m step 1m sum_over_time(bar[30s])
{} 5 59 9 956
clear
# metric query
load 30s
metric 1+1x4
eval range from 0 to 2m step 1m metric
metric 1 3 5
clear
# metric query with trailing values
load 30s
metric 1+1x8
eval range from 0 to 2m step 1m metric
metric 1 3 5
clear
# short-circuit
load 30s
foo{job="1"} 1+1x4
bar{job="2"} 1+1x4
eval range from 0 to 2m step 1m foo > 2 or bar
foo{job="1"} _ 3 5
bar{job="2"} 1 3 5
clear
# Drop metric name
load 30s
requests{job="1", __address__="bar"} 100
eval range from 0 to 2m step 1m requests * 2
{job="1", __address__="bar"} 200 200 200
clear

View file

@ -1911,18 +1911,12 @@ func TestDependencyMapUpdatesOnGroupUpdate(t *testing.T) {
}
func TestAsyncRuleEvaluation(t *testing.T) {
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
var (
inflightQueries atomic.Int32
maxInflight atomic.Int32
)
t.Run("synchronous evaluation with independent rules", func(t *testing.T) {
// Reset.
inflightQueries.Store(0)
maxInflight.Store(0)
t.Parallel()
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
inflightQueries := atomic.Int32{}
maxInflight := atomic.Int32{}
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
@ -1950,9 +1944,11 @@ func TestAsyncRuleEvaluation(t *testing.T) {
})
t.Run("asynchronous evaluation with independent and dependent rules", func(t *testing.T) {
// Reset.
inflightQueries.Store(0)
maxInflight.Store(0)
t.Parallel()
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
inflightQueries := atomic.Int32{}
maxInflight := atomic.Int32{}
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
@ -1986,9 +1982,11 @@ func TestAsyncRuleEvaluation(t *testing.T) {
})
t.Run("asynchronous evaluation of all independent rules, insufficient concurrency", func(t *testing.T) {
// Reset.
inflightQueries.Store(0)
maxInflight.Store(0)
t.Parallel()
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
inflightQueries := atomic.Int32{}
maxInflight := atomic.Int32{}
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
@ -2022,9 +2020,11 @@ func TestAsyncRuleEvaluation(t *testing.T) {
})
t.Run("asynchronous evaluation of all independent rules, sufficient concurrency", func(t *testing.T) {
// Reset.
inflightQueries.Store(0)
maxInflight.Store(0)
t.Parallel()
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
inflightQueries := atomic.Int32{}
maxInflight := atomic.Int32{}
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
@ -2099,7 +2099,7 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) {
require.EqualValues(t, maxInflight.Load(), int32(maxConcurrency)+int32(groupCount))
}
const artificialDelay = 15 * time.Millisecond
const artificialDelay = 250 * time.Millisecond
func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.Int32, maxConcurrent int64) *ManagerOptions {
var inflightMu sync.Mutex

View file

@ -95,7 +95,7 @@ func EncodeReadResponse(resp *prompb.ReadResponse, w http.ResponseWriter) error
// ToQuery builds a Query proto.
func ToQuery(from, to int64, matchers []*labels.Matcher, hints *storage.SelectHints) (*prompb.Query, error) {
ms, err := toLabelMatchers(matchers)
ms, err := ToLabelMatchers(matchers)
if err != nil {
return nil, err
}
@ -166,7 +166,7 @@ func ToQueryResult(ss storage.SeriesSet, sampleLimit int) (*prompb.QueryResult,
}
resp.Timeseries = append(resp.Timeseries, &prompb.TimeSeries{
Labels: labelsToLabelsProto(series.Labels(), nil),
Labels: LabelsToLabelsProto(series.Labels(), nil),
Samples: samples,
Histograms: histograms,
})
@ -182,7 +182,7 @@ func FromQueryResult(sortSeries bool, res *prompb.QueryResult) storage.SeriesSet
if err := validateLabelsAndMetricName(ts.Labels); err != nil {
return errSeriesSet{err: err}
}
lbls := labelProtosToLabels(&b, ts.Labels)
lbls := LabelProtosToLabels(&b, ts.Labels)
series = append(series, &concreteSeries{labels: lbls, floats: ts.Samples, histograms: ts.Histograms})
}
@ -235,7 +235,7 @@ func StreamChunkedReadResponses(
for ss.Next() {
series := ss.At()
iter = series.Iterator(iter)
lbls = MergeLabels(labelsToLabelsProto(series.Labels(), lbls), sortedExternalLabels)
lbls = MergeLabels(LabelsToLabelsProto(series.Labels(), lbls), sortedExternalLabels)
maxDataLength := maxBytesInFrame
for _, lbl := range lbls {
@ -566,7 +566,8 @@ func validateLabelsAndMetricName(ls []prompb.Label) error {
return nil
}
func toLabelMatchers(matchers []*labels.Matcher) ([]*prompb.LabelMatcher, error) {
// ToLabelMatchers converts Prometheus label matchers to protobuf label matchers.
func ToLabelMatchers(matchers []*labels.Matcher) ([]*prompb.LabelMatcher, error) {
pbMatchers := make([]*prompb.LabelMatcher, 0, len(matchers))
for _, m := range matchers {
var mType prompb.LabelMatcher_Type
@ -591,7 +592,7 @@ func toLabelMatchers(matchers []*labels.Matcher) ([]*prompb.LabelMatcher, error)
return pbMatchers, nil
}
// FromLabelMatchers parses protobuf label matchers to Prometheus label matchers.
// FromLabelMatchers converts protobuf label matchers to Prometheus label matchers.
func FromLabelMatchers(matchers []*prompb.LabelMatcher) ([]*labels.Matcher, error) {
result := make([]*labels.Matcher, 0, len(matchers))
for _, matcher := range matchers {
@ -621,7 +622,7 @@ func exemplarProtoToExemplar(b *labels.ScratchBuilder, ep prompb.Exemplar) exemp
timestamp := ep.Timestamp
return exemplar.Exemplar{
Labels: labelProtosToLabels(b, ep.Labels),
Labels: LabelProtosToLabels(b, ep.Labels),
Value: ep.Value,
Ts: timestamp,
HasTs: timestamp != 0,
@ -761,7 +762,9 @@ func LabelProtosToMetric(labelPairs []*prompb.Label) model.Metric {
return metric
}
func labelProtosToLabels(b *labels.ScratchBuilder, labelPairs []prompb.Label) labels.Labels {
// LabelProtosToLabels transforms prompb labels into labels. The labels builder
// will be used to build the returned labels.
func LabelProtosToLabels(b *labels.ScratchBuilder, labelPairs []prompb.Label) labels.Labels {
b.Reset()
for _, l := range labelPairs {
b.Add(l.Name, l.Value)
@ -770,9 +773,9 @@ func labelProtosToLabels(b *labels.ScratchBuilder, labelPairs []prompb.Label) la
return b.Labels()
}
// labelsToLabelsProto transforms labels into prompb labels. The buffer slice
// LabelsToLabelsProto transforms labels into prompb labels. The buffer slice
// will be used to avoid allocations if it is big enough to store the labels.
func labelsToLabelsProto(lbls labels.Labels, buf []prompb.Label) []prompb.Label {
func LabelsToLabelsProto(lbls labels.Labels, buf []prompb.Label) []prompb.Label {
result := buf[:0]
lbls.Range(func(l labels.Label) {
result = append(result, prompb.Label{

View file

@ -729,8 +729,8 @@ func TestFloatHistogramToProtoConvert(t *testing.T) {
}
func TestStreamResponse(t *testing.T) {
lbs1 := labelsToLabelsProto(labels.FromStrings("instance", "localhost1", "job", "demo1"), nil)
lbs2 := labelsToLabelsProto(labels.FromStrings("instance", "localhost2", "job", "demo2"), nil)
lbs1 := LabelsToLabelsProto(labels.FromStrings("instance", "localhost1", "job", "demo1"), nil)
lbs2 := LabelsToLabelsProto(labels.FromStrings("instance", "localhost2", "job", "demo2"), nil)
chunk := prompb.Chunk{
Type: prompb.Chunk_XOR,
Data: make([]byte, 100),
@ -802,7 +802,7 @@ func (c *mockChunkSeriesSet) Next() bool {
func (c *mockChunkSeriesSet) At() storage.ChunkSeries {
return &storage.ChunkSeriesEntry{
Lset: labelProtosToLabels(&c.builder, c.chunkedSeries[c.index].Labels),
Lset: LabelProtosToLabels(&c.builder, c.chunkedSeries[c.index].Labels),
ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
return &mockChunkIterator{
chunks: c.chunkedSeries[c.index].Chunks,

View file

@ -1507,7 +1507,7 @@ func (s *shards) populateTimeSeries(batch []timeSeries, pendingData []prompb.Tim
// Number of pending samples is limited by the fact that sendSamples (via sendSamplesWithBackoff)
// retries endlessly, so once we reach max samples, if we can never send to the endpoint we'll
// stop reading from the queue. This makes it safe to reference pendingSamples by index.
pendingData[nPending].Labels = labelsToLabelsProto(d.seriesLabels, pendingData[nPending].Labels)
pendingData[nPending].Labels = LabelsToLabelsProto(d.seriesLabels, pendingData[nPending].Labels)
switch d.sType {
case tSample:
pendingData[nPending].Samples = append(pendingData[nPending].Samples, prompb.Sample{
@ -1517,7 +1517,7 @@ func (s *shards) populateTimeSeries(batch []timeSeries, pendingData []prompb.Tim
nPendingSamples++
case tExemplar:
pendingData[nPending].Exemplars = append(pendingData[nPending].Exemplars, prompb.Exemplar{
Labels: labelsToLabelsProto(d.exemplarLabels, nil),
Labels: LabelsToLabelsProto(d.exemplarLabels, nil),
Value: d.value,
Timestamp: d.timestamp,
})

View file

@ -742,7 +742,7 @@ func (c *TestWriteClient) expectExemplars(ss []record.RefExemplar, series []reco
for _, s := range ss {
seriesName := getSeriesNameFromRef(series[s.Ref])
e := prompb.Exemplar{
Labels: labelsToLabelsProto(s.Labels, nil),
Labels: LabelsToLabelsProto(s.Labels, nil),
Timestamp: s.T,
Value: s.V,
}
@ -826,7 +826,7 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) error {
builder := labels.NewScratchBuilder(0)
count := 0
for _, ts := range reqProto.Timeseries {
labels := labelProtosToLabels(&builder, ts.Labels)
labels := LabelProtosToLabels(&builder, ts.Labels)
seriesName := labels.Get("__name__")
for _, sample := range ts.Samples {
count++

View file

@ -172,12 +172,12 @@ func TestSeriesSetFilter(t *testing.T) {
toRemove: []string{"foo"},
in: &prompb.QueryResult{
Timeseries: []*prompb.TimeSeries{
{Labels: labelsToLabelsProto(labels.FromStrings("foo", "bar", "a", "b"), nil)},
{Labels: LabelsToLabelsProto(labels.FromStrings("foo", "bar", "a", "b"), nil)},
},
},
expected: &prompb.QueryResult{
Timeseries: []*prompb.TimeSeries{
{Labels: labelsToLabelsProto(labels.FromStrings("a", "b"), nil)},
{Labels: LabelsToLabelsProto(labels.FromStrings("a", "b"), nil)},
},
},
},
@ -211,7 +211,7 @@ func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query) (*prom
q := &prompb.QueryResult{}
for _, s := range c.store {
l := labelProtosToLabels(&c.b, s.Labels)
l := LabelProtosToLabels(&c.b, s.Labels)
var notMatch bool
for _, m := range matchers {

View file

@ -116,7 +116,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err
b := labels.NewScratchBuilder(0)
var exemplarErr error
for _, ts := range req.Timeseries {
labels := labelProtosToLabels(&b, ts.Labels)
labels := LabelProtosToLabels(&b, ts.Labels)
if !labels.IsValid() {
level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", labels.String())
samplesWithInvalidLabels++

View file

@ -60,14 +60,14 @@ func TestRemoteWriteHandler(t *testing.T) {
j := 0
k := 0
for _, ts := range writeRequestFixture.Timeseries {
labels := labelProtosToLabels(&b, ts.Labels)
labels := LabelProtosToLabels(&b, ts.Labels)
for _, s := range ts.Samples {
requireEqual(t, mockSample{labels, s.Timestamp, s.Value}, appendable.samples[i])
i++
}
for _, e := range ts.Exemplars {
exemplarLabels := labelProtosToLabels(&b, e.Labels)
exemplarLabels := LabelProtosToLabels(&b, e.Labels)
requireEqual(t, mockExemplar{labels, exemplarLabels, e.Timestamp, e.Value}, appendable.exemplars[j])
j++
}

View file

@ -32,6 +32,8 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
common_templates "github.com/prometheus/common/helpers/templates"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/util/strutil"
)
@ -263,51 +265,7 @@ func NewTemplateExpander(
}
return fmt.Sprintf("%.4g%s", v, prefix), nil
},
"humanizeDuration": func(i interface{}) (string, error) {
v, err := convertToFloat(i)
if err != nil {
return "", err
}
if math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v), nil
}
if v == 0 {
return fmt.Sprintf("%.4gs", v), nil
}
if math.Abs(v) >= 1 {
sign := ""
if v < 0 {
sign = "-"
v = -v
}
duration := int64(v)
seconds := duration % 60
minutes := (duration / 60) % 60
hours := (duration / 60 / 60) % 24
days := duration / 60 / 60 / 24
// For days to minutes, we display seconds as an integer.
if days != 0 {
return fmt.Sprintf("%s%dd %dh %dm %ds", sign, days, hours, minutes, seconds), nil
}
if hours != 0 {
return fmt.Sprintf("%s%dh %dm %ds", sign, hours, minutes, seconds), nil
}
if minutes != 0 {
return fmt.Sprintf("%s%dm %ds", sign, minutes, seconds), nil
}
// For seconds, we display 4 significant digits.
return fmt.Sprintf("%s%.4gs", sign, v), nil
}
prefix := ""
for _, p := range []string{"m", "u", "n", "p", "f", "a", "z", "y"} {
if math.Abs(v) >= 1 {
break
}
prefix = p
v *= 1000
}
return fmt.Sprintf("%.4g%ss", v, prefix), nil
},
"humanizeDuration": common_templates.HumanizeDuration,
"humanizePercentage": func(i interface{}) (string, error) {
v, err := convertToFloat(i)
if err != nil {

View file

@ -81,19 +81,23 @@ type Options struct {
// NoLockfile disables creation and consideration of a lock file.
NoLockfile bool
// OutOfOrderTimeWindow specifies how much out of order is allowed, if any.
OutOfOrderTimeWindow int64
}
// DefaultOptions used for the WAL storage. They are reasonable for setups using
// millisecond-precision timestamps.
func DefaultOptions() *Options {
return &Options{
WALSegmentSize: wlog.DefaultSegmentSize,
WALCompression: wlog.CompressionNone,
StripeSize: tsdb.DefaultStripeSize,
TruncateFrequency: DefaultTruncateFrequency,
MinWALTime: DefaultMinWALTime,
MaxWALTime: DefaultMaxWALTime,
NoLockfile: false,
WALSegmentSize: wlog.DefaultSegmentSize,
WALCompression: wlog.CompressionNone,
StripeSize: tsdb.DefaultStripeSize,
TruncateFrequency: DefaultTruncateFrequency,
MinWALTime: DefaultMinWALTime,
MaxWALTime: DefaultMaxWALTime,
NoLockfile: false,
OutOfOrderTimeWindow: 0,
}
}
@ -812,6 +816,11 @@ func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v flo
series.Lock()
defer series.Unlock()
if t <= a.minValidTime(series.lastTs) {
a.metrics.totalOutOfOrderSamples.Inc()
return 0, storage.ErrOutOfOrderSample
}
// NOTE: always modify pendingSamples and sampleSeries together.
a.pendingSamples = append(a.pendingSamples, record.RefSample{
Ref: series.ref,
@ -935,6 +944,11 @@ func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int
series.Lock()
defer series.Unlock()
if t <= a.minValidTime(series.lastTs) {
a.metrics.totalOutOfOrderSamples.Inc()
return 0, storage.ErrOutOfOrderSample
}
switch {
case h != nil:
// NOTE: always modify pendingHistograms and histogramSeries together
@ -1103,3 +1117,13 @@ func (a *appender) logSeries() error {
return nil
}
// mintTs returns the minimum timestamp that a sample can have
// and is needed for preventing underflow.
func (a *appender) minValidTime(lastTs int64) int64 {
if lastTs < math.MinInt64+a.opts.OutOfOrderTimeWindow {
return math.MinInt64
}
return lastTs - a.opts.OutOfOrderTimeWindow
}

View file

@ -16,6 +16,7 @@ package agent
import (
"context"
"fmt"
"math"
"path/filepath"
"strconv"
"testing"
@ -761,7 +762,9 @@ func TestDBAllowOOOSamples(t *testing.T) {
)
reg := prometheus.NewRegistry()
s := createTestAgentDB(t, reg, DefaultOptions())
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = math.MaxInt64
s := createTestAgentDB(t, reg, opts)
app := s.Appender(context.TODO())
// Let's add some samples in the [offset, offset+numDatapoints) range.
@ -879,6 +882,56 @@ func TestDBAllowOOOSamples(t *testing.T) {
require.NoError(t, db.Close())
}
func TestDBOutOfOrderTimeWindow(t *testing.T) {
tc := []struct {
outOfOrderTimeWindow, firstTs, secondTs int64
expectedError error
}{
{0, 100, 101, nil},
{0, 100, 100, storage.ErrOutOfOrderSample},
{0, 100, 99, storage.ErrOutOfOrderSample},
{100, 100, 1, nil},
{100, 100, 0, storage.ErrOutOfOrderSample},
}
for _, c := range tc {
t.Run(fmt.Sprintf("outOfOrderTimeWindow=%d, firstTs=%d, secondTs=%d, expectedError=%s", c.outOfOrderTimeWindow, c.firstTs, c.secondTs, c.expectedError), func(t *testing.T) {
reg := prometheus.NewRegistry()
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = c.outOfOrderTimeWindow
s := createTestAgentDB(t, reg, opts)
app := s.Appender(context.TODO())
lbls := labelsForTest(t.Name()+"_histogram", 1)
lset := labels.New(lbls[0]...)
_, err := app.AppendHistogram(0, lset, c.firstTs, tsdbutil.GenerateTestHistograms(1)[0], nil)
require.NoError(t, err)
err = app.Commit()
require.NoError(t, err)
_, err = app.AppendHistogram(0, lset, c.secondTs, tsdbutil.GenerateTestHistograms(1)[0], nil)
require.ErrorIs(t, err, c.expectedError)
lbls = labelsForTest(t.Name(), 1)
lset = labels.New(lbls[0]...)
_, err = app.Append(0, lset, c.firstTs, 0)
require.NoError(t, err)
err = app.Commit()
require.NoError(t, err)
_, err = app.Append(0, lset, c.secondTs, 0)
require.ErrorIs(t, err, c.expectedError)
expectedAppendedSamples := float64(2)
if c.expectedError != nil {
expectedAppendedSamples = 1
}
m := gatherFamily(t, reg, "prometheus_agent_samples_appended_total")
require.Equal(t, expectedAppendedSamples, m.Metric[0].Counter.GetValue(), "agent wal mismatch of total appended samples")
require.Equal(t, expectedAppendedSamples, m.Metric[1].Counter.GetValue(), "agent wal mismatch of total appended histograms")
require.NoError(t, s.Close())
})
}
}
func BenchmarkCreateSeries(b *testing.B) {
s := createTestAgentDB(b, nil, DefaultOptions())
defer s.Close()

View file

@ -103,9 +103,9 @@ type IndexReader interface {
// storage.ErrNotFound is returned as error.
LabelValueFor(ctx context.Context, id storage.SeriesRef, label string) (string, error)
// LabelNamesFor returns all the label names for the series referred to by IDs.
// LabelNamesFor returns all the label names for the series referred to by the postings.
// The names returned are sorted.
LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error)
LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error)
// Close releases the underlying resources of the reader.
Close() error
@ -551,10 +551,10 @@ func (r blockIndexReader) LabelValueFor(ctx context.Context, id storage.SeriesRe
return r.ir.LabelValueFor(ctx, id, label)
}
// LabelNamesFor returns all the label names for the series referred to by IDs.
// LabelNamesFor returns all the label names for the series referred to by the postings.
// The names returned are sorted.
func (r blockIndexReader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
return r.ir.LabelNamesFor(ctx, ids...)
func (r blockIndexReader) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) {
return r.ir.LabelNamesFor(ctx, postings)
}
type blockTombstoneReader struct {
@ -646,10 +646,10 @@ Outer:
}
// CleanTombstones will remove the tombstones and rewrite the block (only if there are any tombstones).
// If there was a rewrite, then it returns the ULID of the new block written, else nil.
// If the resultant block is empty (tombstones covered the whole block), then it deletes the new block and return nil UID.
// If there was a rewrite, then it returns the ULID of new blocks written, else nil.
// If a resultant block is empty (tombstones covered the whole block), then it returns an empty slice.
// It returns a boolean indicating if the parent block can be deleted safely of not.
func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, bool, error) {
func (pb *Block) CleanTombstones(dest string, c Compactor) ([]ulid.ULID, bool, error) {
numStones := 0
if err := pb.tombstones.Iter(func(id storage.SeriesRef, ivs tombstones.Intervals) error {
@ -664,12 +664,12 @@ func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, bool, er
}
meta := pb.Meta()
uid, err := c.Write(dest, pb, pb.meta.MinTime, pb.meta.MaxTime, &meta)
uids, err := c.Write(dest, pb, pb.meta.MinTime, pb.meta.MaxTime, &meta)
if err != nil {
return nil, false, err
}
return &uid, true, nil
return uids, true, nil
}
// Snapshot creates snapshot of the block into dir.

View file

@ -346,9 +346,10 @@ func TestBlockSize(t *testing.T) {
c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil)
require.NoError(t, err)
blockDirAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil)
blockDirsAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil)
require.NoError(t, err)
blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirAfterCompact.String()), nil)
require.Len(t, blockDirsAfterCompact, 1)
blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirsAfterCompact[0].String()), nil)
require.NoError(t, err)
defer func() {
require.NoError(t, blockAfterCompact.Close())
@ -605,9 +606,10 @@ func createBlockFromHead(tb testing.TB, dir string, head *Head) string {
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
ulid, err := compactor.Write(dir, head, head.MinTime(), head.MaxTime()+1, nil)
ulids, err := compactor.Write(dir, head, head.MinTime(), head.MaxTime()+1, nil)
require.NoError(tb, err)
return filepath.Join(dir, ulid.String())
require.Len(tb, ulids, 1)
return filepath.Join(dir, ulids[0].String())
}
func createBlockFromOOOHead(tb testing.TB, dir string, head *OOOCompactionHead) string {
@ -618,9 +620,10 @@ func createBlockFromOOOHead(tb testing.TB, dir string, head *OOOCompactionHead)
// Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime).
// Because of this block intervals are always +1 than the total samples it includes.
ulid, err := compactor.Write(dir, head, head.MinTime(), head.MaxTime()+1, nil)
ulids, err := compactor.Write(dir, head, head.MinTime(), head.MaxTime()+1, nil)
require.NoError(tb, err)
return filepath.Join(dir, ulid.String())
require.Len(tb, ulids, 1)
return filepath.Join(dir, ulids[0].String())
}
func createHead(tb testing.TB, w *wlog.WL, series []storage.Series, chunkDir string) *Head {

View file

@ -105,12 +105,17 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) {
if err != nil {
return ulid.ULID{}, fmt.Errorf("create leveled compactor: %w", err)
}
id, err := compactor.Write(w.destinationDir, w.head, mint, maxt, nil)
ids, err := compactor.Write(w.destinationDir, w.head, mint, maxt, nil)
if err != nil {
return ulid.ULID{}, fmt.Errorf("compactor write: %w", err)
}
return id, nil
// No block was produced. Caller is responsible to check empty
// ulid.ULID based on its use case.
if len(ids) == 0 {
return ulid.ULID{}, nil
}
return ids[0], nil
}
func (w *BlockWriter) Close() error {

View file

@ -58,19 +58,23 @@ type Compactor interface {
// Results returned when compactions are in progress are undefined.
Plan(dir string) ([]string, error)
// Write persists a Block into a directory.
// No Block is written when resulting Block has 0 samples, and returns empty ulid.ULID{}.
Write(dest string, b BlockReader, mint, maxt int64, base *BlockMeta) (ulid.ULID, error)
// Write persists one or more Blocks into a directory.
// No Block is written when resulting Block has 0 samples and returns an empty slice.
// Prometheus always return one or no block. The interface allows returning more than one
// block for downstream users to experiment with compactor.
Write(dest string, b BlockReader, mint, maxt int64, base *BlockMeta) ([]ulid.ULID, error)
// Compact runs compaction against the provided directories. Must
// only be called concurrently with results of Plan().
// Can optionally pass a list of already open blocks,
// to avoid having to reopen them.
// When resulting Block has 0 samples
// Prometheus always return one or no block. The interface allows returning more than one
// block for downstream users to experiment with compactor.
// When one resulting Block has 0 samples
// * No block is written.
// * The source dirs are marked Deletable.
// * Returns empty ulid.ULID{}.
Compact(dest string, dirs []string, open []*Block) (ulid.ULID, error)
// * Block is not included in the result.
Compact(dest string, dirs []string, open []*Block) ([]ulid.ULID, error)
}
// LeveledCompactor implements the Compactor interface.
@ -441,11 +445,11 @@ func CompactBlockMetas(uid ulid.ULID, blocks ...*BlockMeta) *BlockMeta {
// Compact creates a new block in the compactor's directory from the blocks in the
// provided directories.
func (c *LeveledCompactor) Compact(dest string, dirs []string, open []*Block) (uid ulid.ULID, err error) {
func (c *LeveledCompactor) Compact(dest string, dirs []string, open []*Block) ([]ulid.ULID, error) {
return c.CompactWithBlockPopulator(dest, dirs, open, DefaultBlockPopulator{})
}
func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, open []*Block, blockPopulator BlockPopulator) (uid ulid.ULID, err error) {
func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, open []*Block, blockPopulator BlockPopulator) ([]ulid.ULID, error) {
var (
blocks []BlockReader
bs []*Block
@ -457,7 +461,7 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string,
for _, d := range dirs {
meta, _, err := readMetaFile(d)
if err != nil {
return uid, err
return nil, err
}
var b *Block
@ -475,7 +479,7 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string,
var err error
b, err = OpenBlock(c.logger, d, c.chunkPool)
if err != nil {
return uid, err
return nil, err
}
defer b.Close()
}
@ -486,10 +490,10 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string,
uids = append(uids, meta.ULID.String())
}
uid = ulid.MustNew(ulid.Now(), rand.Reader)
uid := ulid.MustNew(ulid.Now(), rand.Reader)
meta := CompactBlockMetas(uid, metas...)
err = c.write(dest, meta, blockPopulator, blocks...)
err := c.write(dest, meta, blockPopulator, blocks...)
if err == nil {
if meta.Stats.NumSamples == 0 {
for _, b := range bs {
@ -503,25 +507,25 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string,
}
b.numBytesMeta = n
}
uid = ulid.ULID{}
level.Info(c.logger).Log(
"msg", "compact blocks resulted in empty block",
"count", len(blocks),
"sources", fmt.Sprintf("%v", uids),
"duration", time.Since(start),
)
} else {
level.Info(c.logger).Log(
"msg", "compact blocks",
"count", len(blocks),
"mint", meta.MinTime,
"maxt", meta.MaxTime,
"ulid", meta.ULID,
"sources", fmt.Sprintf("%v", uids),
"duration", time.Since(start),
)
return nil, nil
}
return uid, nil
level.Info(c.logger).Log(
"msg", "compact blocks",
"count", len(blocks),
"mint", meta.MinTime,
"maxt", meta.MaxTime,
"ulid", meta.ULID,
"sources", fmt.Sprintf("%v", uids),
"duration", time.Since(start),
)
return []ulid.ULID{uid}, nil
}
errs := tsdb_errors.NewMulti(err)
@ -533,10 +537,10 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string,
}
}
return uid, errs.Err()
return nil, errs.Err()
}
func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, base *BlockMeta) (ulid.ULID, error) {
func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, base *BlockMeta) ([]ulid.ULID, error) {
start := time.Now()
uid := ulid.MustNew(ulid.Now(), rand.Reader)
@ -560,7 +564,7 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b
err := c.write(dest, meta, DefaultBlockPopulator{}, b)
if err != nil {
return uid, err
return nil, err
}
if meta.Stats.NumSamples == 0 {
@ -570,7 +574,7 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b
"maxt", meta.MaxTime,
"duration", time.Since(start),
)
return ulid.ULID{}, nil
return nil, nil
}
level.Info(c.logger).Log(
@ -581,7 +585,7 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b
"duration", time.Since(start),
"ooo", meta.Compaction.FromOutOfOrder(),
)
return uid, nil
return []ulid.ULID{uid}, nil
}
// instrumentedChunkWriter is used for level 1 compactions to record statistics

View file

@ -1484,12 +1484,12 @@ func TestHeadCompactionWithHistograms(t *testing.T) {
maxt := head.MaxTime() + 1 // Block intervals are half-open: [b.MinTime, b.MaxTime).
compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{DefaultBlockDuration}, chunkenc.NewPool(), nil)
require.NoError(t, err)
id, err := compactor.Write(head.opts.ChunkDirRoot, head, mint, maxt, nil)
ids, err := compactor.Write(head.opts.ChunkDirRoot, head, mint, maxt, nil)
require.NoError(t, err)
require.NotEqual(t, ulid.ULID{}, id)
require.Len(t, ids, 1)
// Open the block and query it and check the histograms.
block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, id.String()), nil)
block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, ids[0].String()), nil)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, block.Close())
@ -1598,8 +1598,8 @@ func TestSparseHistogramSpaceSavings(t *testing.T) {
sparseApp := sparseHead.Appender(context.Background())
numOldSeriesPerHistogram := 0
var oldULID ulid.ULID
var sparseULID ulid.ULID
var oldULIDs []ulid.ULID
var sparseULIDs []ulid.ULID
var wg sync.WaitGroup
@ -1626,9 +1626,9 @@ func TestSparseHistogramSpaceSavings(t *testing.T) {
maxt := sparseHead.MaxTime() + 1 // Block intervals are half-open: [b.MinTime, b.MaxTime).
compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{DefaultBlockDuration}, chunkenc.NewPool(), nil)
require.NoError(t, err)
sparseULID, err = compactor.Write(sparseHead.opts.ChunkDirRoot, sparseHead, mint, maxt, nil)
sparseULIDs, err = compactor.Write(sparseHead.opts.ChunkDirRoot, sparseHead, mint, maxt, nil)
require.NoError(t, err)
require.NotEqual(t, ulid.ULID{}, sparseULID)
require.Len(t, sparseULIDs, 1)
}()
wg.Add(1)
@ -1677,15 +1677,15 @@ func TestSparseHistogramSpaceSavings(t *testing.T) {
maxt := oldHead.MaxTime() + 1 // Block intervals are half-open: [b.MinTime, b.MaxTime).
compactor, err := NewLeveledCompactor(context.Background(), nil, nil, []int64{DefaultBlockDuration}, chunkenc.NewPool(), nil)
require.NoError(t, err)
oldULID, err = compactor.Write(oldHead.opts.ChunkDirRoot, oldHead, mint, maxt, nil)
oldULIDs, err = compactor.Write(oldHead.opts.ChunkDirRoot, oldHead, mint, maxt, nil)
require.NoError(t, err)
require.NotEqual(t, ulid.ULID{}, oldULID)
require.Len(t, oldULIDs, 1)
}()
wg.Wait()
oldBlockDir := filepath.Join(oldHead.opts.ChunkDirRoot, oldULID.String())
sparseBlockDir := filepath.Join(sparseHead.opts.ChunkDirRoot, sparseULID.String())
oldBlockDir := filepath.Join(oldHead.opts.ChunkDirRoot, oldULIDs[0].String())
sparseBlockDir := filepath.Join(sparseHead.opts.ChunkDirRoot, sparseULIDs[0].String())
oldSize, err := fileutil.DirSize(oldBlockDir)
require.NoError(t, err)
@ -1846,3 +1846,22 @@ func TestCompactBlockMetas(t *testing.T) {
}
require.Equal(t, expected, output)
}
func TestCompactEmptyResultBlockWithTombstone(t *testing.T) {
ctx := context.Background()
tmpdir := t.TempDir()
blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 10))
block, err := OpenBlock(nil, blockDir, nil)
require.NoError(t, err)
// Write tombstone covering the whole block.
err = block.Delete(ctx, 0, 10, labels.MustNewMatcher(labels.MatchEqual, defaultLabelName, "0"))
require.NoError(t, err)
c, err := NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{0}, nil, nil)
require.NoError(t, err)
ulids, err := c.Compact(tmpdir, []string{blockDir}, []*Block{block})
require.NoError(t, err)
require.Nil(t, ulids)
require.NoError(t, block.Close())
}

View file

@ -1336,13 +1336,11 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID
for t := blockSize * (oooHeadMint / blockSize); t <= oooHeadMaxt; t += blockSize {
mint, maxt := t, t+blockSize
// Block intervals are half-open: [b.MinTime, b.MaxTime). Block intervals are always +1 than the total samples it includes.
uid, err := db.compactor.Write(dest, oooHead.CloneForTimeRange(mint, maxt-1), mint, maxt, meta)
uids, err := db.compactor.Write(dest, oooHead.CloneForTimeRange(mint, maxt-1), mint, maxt, meta)
if err != nil {
return nil, err
}
if uid.Compare(ulid.ULID{}) != 0 {
ulids = append(ulids, uid)
}
ulids = append(ulids, uids...)
}
if len(ulids) == 0 {
@ -1364,19 +1362,19 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID
// compactHead compacts the given RangeHead.
// The compaction mutex should be held before calling this method.
func (db *DB) compactHead(head *RangeHead) error {
uid, err := db.compactor.Write(db.dir, head, head.MinTime(), head.BlockMaxTime(), nil)
uids, err := db.compactor.Write(db.dir, head, head.MinTime(), head.BlockMaxTime(), nil)
if err != nil {
return fmt.Errorf("persist head block: %w", err)
}
if err := db.reloadBlocks(); err != nil {
if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil {
return tsdb_errors.NewMulti(
fmt.Errorf("reloadBlocks blocks: %w", err),
fmt.Errorf("delete persisted head block after failed db reloadBlocks:%s: %w", uid, errRemoveAll),
).Err()
multiErr := tsdb_errors.NewMulti(fmt.Errorf("reloadBlocks blocks: %w", err))
for _, uid := range uids {
if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil {
multiErr.Add(fmt.Errorf("delete persisted head block after failed db reloadBlocks:%s: %w", uid, errRemoveAll))
}
}
return fmt.Errorf("reloadBlocks blocks: %w", err)
return multiErr.Err()
}
if err = db.head.truncateMemory(head.BlockMaxTime()); err != nil {
return fmt.Errorf("head memory truncate: %w", err)
@ -1411,16 +1409,19 @@ func (db *DB) compactBlocks() (err error) {
default:
}
uid, err := db.compactor.Compact(db.dir, plan, db.blocks)
uids, err := db.compactor.Compact(db.dir, plan, db.blocks)
if err != nil {
return fmt.Errorf("compact %s: %w", plan, err)
}
if err := db.reloadBlocks(); err != nil {
if err := os.RemoveAll(filepath.Join(db.dir, uid.String())); err != nil {
return fmt.Errorf("delete compacted block after failed db reloadBlocks:%s: %w", uid, err)
errs := tsdb_errors.NewMulti(fmt.Errorf("reloadBlocks blocks: %w", err))
for _, uid := range uids {
if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil {
errs.Add(fmt.Errorf("delete persisted block after failed db reloadBlocks:%s: %w", uid, errRemoveAll))
}
}
return fmt.Errorf("reloadBlocks blocks: %w", err)
return errs.Err()
}
}
@ -1541,12 +1542,15 @@ func (db *DB) reloadBlocks() (err error) {
oldBlocks := db.blocks
db.blocks = toLoad
blockMetas := make([]BlockMeta, 0, len(toLoad))
for _, b := range toLoad {
blockMetas = append(blockMetas, b.Meta())
}
if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 {
level.Warn(db.logger).Log("msg", "Overlapping blocks found during reloadBlocks", "detail", overlaps.String())
// Only check overlapping blocks when overlapping compaction is enabled.
if db.opts.EnableOverlappingCompaction {
blockMetas := make([]BlockMeta, 0, len(toLoad))
for _, b := range toLoad {
blockMetas = append(blockMetas, b.Meta())
}
if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 {
level.Warn(db.logger).Log("msg", "Overlapping blocks found during reloadBlocks", "detail", overlaps.String())
}
}
// Append blocks to old, deletable blocks, so we can close them.
@ -2149,7 +2153,7 @@ func (db *DB) CleanTombstones() (err error) {
cleanUpCompleted = true
for _, pb := range db.Blocks() {
uid, safeToDelete, cleanErr := pb.CleanTombstones(db.Dir(), db.compactor)
uids, safeToDelete, cleanErr := pb.CleanTombstones(db.Dir(), db.compactor)
if cleanErr != nil {
return fmt.Errorf("clean tombstones: %s: %w", pb.Dir(), cleanErr)
}
@ -2173,7 +2177,7 @@ func (db *DB) CleanTombstones() (err error) {
}
// Delete new block if it was created.
if uid != nil && *uid != (ulid.ULID{}) {
for _, uid := range uids {
dir := filepath.Join(db.Dir(), uid.String())
if err := os.RemoveAll(dir); err != nil {
level.Error(db.logger).Log("msg", "failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err)

View file

@ -1431,9 +1431,9 @@ func (*mockCompactorFailing) Plan(string) ([]string, error) {
return nil, nil
}
func (c *mockCompactorFailing) Write(dest string, _ BlockReader, _, _ int64, _ *BlockMeta) (ulid.ULID, error) {
func (c *mockCompactorFailing) Write(dest string, _ BlockReader, _, _ int64, _ *BlockMeta) ([]ulid.ULID, error) {
if len(c.blocks) >= c.max {
return ulid.ULID{}, fmt.Errorf("the compactor already did the maximum allowed blocks so it is time to fail")
return []ulid.ULID{}, fmt.Errorf("the compactor already did the maximum allowed blocks so it is time to fail")
}
block, err := OpenBlock(nil, createBlock(c.t, dest, genSeries(1, 1, 0, 1)), nil)
@ -1452,11 +1452,11 @@ func (c *mockCompactorFailing) Write(dest string, _ BlockReader, _, _ int64, _ *
require.Equal(c.t, expectedBlocks, actualBlockDirs)
return block.Meta().ULID, nil
return []ulid.ULID{block.Meta().ULID}, nil
}
func (*mockCompactorFailing) Compact(string, []string, []*Block) (ulid.ULID, error) {
return ulid.ULID{}, nil
func (*mockCompactorFailing) Compact(string, []string, []*Block) ([]ulid.ULID, error) {
return []ulid.ULID{}, nil
}
func (*mockCompactorFailing) CompactOOO(string, *OOOCompactionHead) (result []ulid.ULID, err error) {
@ -6804,9 +6804,9 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) {
for _, b := range blocks {
blockDirs = append(blockDirs, b.Dir())
}
id, err := db.compactor.Compact(db.Dir(), blockDirs, blocks)
ids, err := db.compactor.Compact(db.Dir(), blockDirs, blocks)
require.NoError(t, err)
require.NotEqual(t, ulid.ULID{}, id)
require.Len(t, ids, 1)
require.NoError(t, db.reload())
require.Len(t, db.Blocks(), 1)
@ -7068,19 +7068,19 @@ func requireEqualOOOSamples(t *testing.T, expectedSamples int, db *DB) {
type mockCompactorFn struct {
planFn func() ([]string, error)
compactFn func() (ulid.ULID, error)
writeFn func() (ulid.ULID, error)
compactFn func() ([]ulid.ULID, error)
writeFn func() ([]ulid.ULID, error)
}
func (c *mockCompactorFn) Plan(_ string) ([]string, error) {
return c.planFn()
}
func (c *mockCompactorFn) Compact(_ string, _ []string, _ []*Block) (ulid.ULID, error) {
func (c *mockCompactorFn) Compact(_ string, _ []string, _ []*Block) ([]ulid.ULID, error) {
return c.compactFn()
}
func (c *mockCompactorFn) Write(_ string, _ BlockReader, _, _ int64, _ *BlockMeta) (ulid.ULID, error) {
func (c *mockCompactorFn) Write(_ string, _ BlockReader, _, _ int64, _ *BlockMeta) ([]ulid.ULID, error) {
return c.writeFn()
}
@ -7112,11 +7112,11 @@ func TestAbortBlockCompactions(t *testing.T) {
// Our custom Plan() will always return something to compact.
return []string{"1", "2", "3"}, nil
},
compactFn: func() (ulid.ULID, error) {
return ulid.ULID{}, nil
compactFn: func() ([]ulid.ULID, error) {
return []ulid.ULID{}, nil
},
writeFn: func() (ulid.ULID, error) {
return ulid.ULID{}, nil
writeFn: func() ([]ulid.ULID, error) {
return []ulid.ULID{}, nil
},
}
@ -7135,11 +7135,11 @@ func TestNewCompactorFunc(t *testing.T) {
planFn: func() ([]string, error) {
return []string{block1.String(), block2.String()}, nil
},
compactFn: func() (ulid.ULID, error) {
return block1, nil
compactFn: func() ([]ulid.ULID, error) {
return []ulid.ULID{block1}, nil
},
writeFn: func() (ulid.ULID, error) {
return block2, nil
writeFn: func() ([]ulid.ULID, error) {
return []ulid.ULID{block2}, nil
},
}, nil
}
@ -7150,10 +7150,12 @@ func TestNewCompactorFunc(t *testing.T) {
plans, err := db.compactor.Plan("")
require.NoError(t, err)
require.Equal(t, []string{block1.String(), block2.String()}, plans)
ulid, err := db.compactor.Compact("", nil, nil)
ulids, err := db.compactor.Compact("", nil, nil)
require.NoError(t, err)
require.Equal(t, block1, ulid)
ulid, err = db.compactor.Write("", nil, 0, 1, nil)
require.Len(t, ulids, 1)
require.Equal(t, block1, ulids[0])
ulids, err = db.compactor.Write("", nil, 0, 1, nil)
require.NoError(t, err)
require.Equal(t, block2, ulid)
require.Len(t, ulids, 1)
require.Equal(t, block2, ulids[0])
}

View file

@ -1552,7 +1552,7 @@ func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) {
// Drop old chunks and remember series IDs and hashes if they can be
// deleted entirely.
deleted, chunksRemoved, actualInOrderMint, minOOOTime, minMmapFile := h.series.gc(mint, minOOOMmapRef)
deleted, affected, chunksRemoved, actualInOrderMint, minOOOTime, minMmapFile := h.series.gc(mint, minOOOMmapRef)
seriesRemoved := len(deleted)
h.metrics.seriesRemoved.Add(float64(seriesRemoved))
@ -1561,7 +1561,7 @@ func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) {
h.numSeries.Sub(uint64(seriesRemoved))
// Remove deleted series IDs from the postings lists.
h.postings.Delete(deleted)
h.postings.Delete(deleted, affected)
// Remove tombstones referring to the deleted series.
h.tombstones.DeleteTombstones(deleted)
@ -1869,9 +1869,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st
// but the returned map goes into postings.Delete() which expects a map[storage.SeriesRef]struct
// and there's no easy way to cast maps.
// minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series.
func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ int, _, _ int64, minMmapFile int) {
func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ map[labels.Label]struct{}, _ int, _, _ int64, minMmapFile int) {
var (
deleted = map[storage.SeriesRef]struct{}{}
affected = map[labels.Label]struct{}{}
rmChunks = 0
actualMint int64 = math.MaxInt64
minOOOTime int64 = math.MaxInt64
@ -1927,6 +1928,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
}
deleted[storage.SeriesRef(series.ref)] = struct{}{}
series.lset.Range(func(l labels.Label) { affected[l] = struct{}{} })
s.hashes[hashShard].del(hash, series.ref)
delete(s.series[refShard], series.ref)
deletedForCallback[series.ref] = series.lset
@ -1938,7 +1940,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
actualMint = mint
}
return deleted, rmChunks, actualMint, minOOOTime, minMmapFile
return deleted, affected, rmChunks, actualMint, minOOOTime, minMmapFile
}
// The iterForDeletion function iterates through all series, invoking the checkDeletedFunc for each.

View file

@ -267,22 +267,29 @@ func (h *headIndexReader) LabelValueFor(_ context.Context, id storage.SeriesRef,
return value, nil
}
// LabelNamesFor returns all the label names for the series referred to by IDs.
// LabelNamesFor returns all the label names for the series referred to by the postings.
// The names returned are sorted.
func (h *headIndexReader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
func (h *headIndexReader) LabelNamesFor(ctx context.Context, series index.Postings) ([]string, error) {
namesMap := make(map[string]struct{})
for _, id := range ids {
if ctx.Err() != nil {
i := 0
for series.Next() {
i++
if i%checkContextEveryNIterations == 0 && ctx.Err() != nil {
return nil, ctx.Err()
}
memSeries := h.head.series.getByID(chunks.HeadSeriesRef(id))
memSeries := h.head.series.getByID(chunks.HeadSeriesRef(series.At()))
if memSeries == nil {
return nil, storage.ErrNotFound
// Series not found, this happens during compaction,
// when series was garbage collected after the caller got the series IDs.
continue
}
memSeries.lset.Range(func(lbl labels.Label) {
namesMap[lbl.Name] = struct{}{}
})
}
if err := series.Err(); err != nil {
return nil, err
}
names := make([]string, 0, len(namesMap))
for name := range namesMap {
names = append(names, name)

View file

@ -814,6 +814,80 @@ func TestHead_UnknownWALRecord(t *testing.T) {
require.NoError(t, head.Close())
}
// BenchmarkHead_Truncate is quite heavy, so consider running it with
// -benchtime=10x or similar to get more stable and comparable results.
func BenchmarkHead_Truncate(b *testing.B) {
const total = 1e6
prepare := func(b *testing.B, churn int) *Head {
h, _ := newTestHead(b, 1000, wlog.CompressionNone, false)
b.Cleanup(func() {
require.NoError(b, h.Close())
})
h.initTime(0)
internedItoa := map[int]string{}
var mtx sync.RWMutex
itoa := func(i int) string {
mtx.RLock()
s, ok := internedItoa[i]
mtx.RUnlock()
if ok {
return s
}
mtx.Lock()
s = strconv.Itoa(i)
internedItoa[i] = s
mtx.Unlock()
return s
}
allSeries := [total]labels.Labels{}
nameValues := make([]string, 0, 100)
for i := 0; i < total; i++ {
nameValues = nameValues[:0]
// A thousand labels like lbl_x_of_1000, each with total/1000 values
thousand := "lbl_" + itoa(i%1000) + "_of_1000"
nameValues = append(nameValues, thousand, itoa(i/1000))
// A hundred labels like lbl_x_of_100, each with total/100 values.
hundred := "lbl_" + itoa(i%100) + "_of_100"
nameValues = append(nameValues, hundred, itoa(i/100))
if i%13 == 0 {
ten := "lbl_" + itoa(i%10) + "_of_10"
nameValues = append(nameValues, ten, itoa(i%10))
}
allSeries[i] = labels.FromStrings(append(nameValues, "first", "a", "second", "a", "third", "a")...)
s, _, _ := h.getOrCreate(allSeries[i].Hash(), allSeries[i])
s.mmappedChunks = []*mmappedChunk{
{minTime: 1000 * int64(i/churn), maxTime: 999 + 1000*int64(i/churn)},
}
}
return h
}
for _, churn := range []int{10, 100, 1000} {
b.Run(fmt.Sprintf("churn=%d", churn), func(b *testing.B) {
if b.N > total/churn {
// Just to make sure that benchmark still makes sense.
panic("benchmark not prepared")
}
h := prepare(b, churn)
b.ResetTimer()
for i := 0; i < b.N; i++ {
require.NoError(b, h.Truncate(1000*int64(i)))
// Make sure the benchmark is meaningful and it's actually truncating the expected amount of series.
require.Equal(b, total-churn*i, int(h.NumSeries()))
}
})
}
}
func TestHead_Truncate(t *testing.T) {
h, _ := newTestHead(t, 1000, wlog.CompressionNone, false)
defer func() {

View file

@ -1551,12 +1551,18 @@ func (r *Reader) LabelValues(ctx context.Context, name string, matchers ...*labe
// LabelNamesFor returns all the label names for the series referred to by IDs.
// The names returned are sorted.
func (r *Reader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
func (r *Reader) LabelNamesFor(ctx context.Context, postings Postings) ([]string, error) {
// Gather offsetsMap the name offsetsMap in the symbol table first
offsetsMap := make(map[uint32]struct{})
for _, id := range ids {
if ctx.Err() != nil {
return nil, ctx.Err()
i := 0
for postings.Next() {
id := postings.At()
i++
if i%checkContextEveryNIterations == 0 {
if ctxErr := ctx.Err(); ctxErr != nil {
return nil, ctxErr
}
}
offset := id

View file

@ -634,6 +634,31 @@ func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) {
require.Equal(t, failAfter, ctx.Count())
}
func TestReader_LabelNamesForHonorsContextCancel(t *testing.T) {
const seriesCount = 1000
var input indexWriterSeriesSlice
for i := 1; i <= seriesCount; i++ {
input = append(input, &indexWriterSeries{
labels: labels.FromStrings(labels.MetricName, fmt.Sprintf("%4d", i)),
chunks: []chunks.Meta{
{Ref: 1, MinTime: 0, MaxTime: 10},
},
})
}
ir, _, _ := createFileReader(context.Background(), t, input)
name, value := AllPostingsKey()
p, err := ir.Postings(context.Background(), name, value)
require.NoError(t, err)
// We check context cancellation every 128 iterations so 3 will fail after
// iterating 3 * 128 series.
failAfter := uint64(3)
ctx := &testutil.MockContextErrAfter{FailAfter: failAfter}
_, err = ir.LabelNamesFor(ctx, p)
require.Error(t, err)
require.Equal(t, failAfter, ctx.Count())
}
// createFileReader creates a temporary index file. It writes the provided input to this file.
// It returns a Reader for this file, the file's name, and the symbol map.
func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSeriesSlice) (*Reader, string, map[string]struct{}) {

View file

@ -288,62 +288,34 @@ func (p *MemPostings) EnsureOrder(numberOfConcurrentProcesses int) {
}
// Delete removes all ids in the given map from the postings lists.
func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}) {
var keys, vals []string
// affectedLabels contains all the labels that are affected by the deletion, there's no need to check other labels.
func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected map[labels.Label]struct{}) {
p.mtx.Lock()
defer p.mtx.Unlock()
// Collect all keys relevant for deletion once. New keys added afterwards
// can by definition not be affected by any of the given deletes.
p.mtx.RLock()
for n := range p.m {
keys = append(keys, n)
process := func(l labels.Label) {
orig := p.m[l.Name][l.Value]
repl := make([]storage.SeriesRef, 0, len(orig))
for _, id := range orig {
if _, ok := deleted[id]; !ok {
repl = append(repl, id)
}
}
if len(repl) > 0 {
p.m[l.Name][l.Value] = repl
} else {
delete(p.m[l.Name], l.Value)
// Delete the key if we removed all values.
if len(p.m[l.Name]) == 0 {
delete(p.m, l.Name)
}
}
}
p.mtx.RUnlock()
for _, n := range keys {
p.mtx.RLock()
vals = vals[:0]
for v := range p.m[n] {
vals = append(vals, v)
}
p.mtx.RUnlock()
// For each posting we first analyse whether the postings list is affected by the deletes.
// If yes, we actually reallocate a new postings list.
for _, l := range vals {
// Only lock for processing one postings list so we don't block reads for too long.
p.mtx.Lock()
found := false
for _, id := range p.m[n][l] {
if _, ok := deleted[id]; ok {
found = true
break
}
}
if !found {
p.mtx.Unlock()
continue
}
repl := make([]storage.SeriesRef, 0, len(p.m[n][l]))
for _, id := range p.m[n][l] {
if _, ok := deleted[id]; !ok {
repl = append(repl, id)
}
}
if len(repl) > 0 {
p.m[n][l] = repl
} else {
delete(p.m[n], l)
}
p.mtx.Unlock()
}
p.mtx.Lock()
if len(p.m[n]) == 0 {
delete(p.m, n)
}
p.mtx.Unlock()
for l := range affected {
process(l)
}
process(allPostingsKey)
}
// Iter calls f for each postings list. It aborts if f returns an error and returns it.
@ -398,16 +370,62 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) {
}
func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings {
p.mtx.RLock()
// We'll copy the values into a slice and then match over that,
// this way we don't need to hold the mutex while we're matching,
// which can be slow (seconds) if the match function is a huge regex.
// Holding this lock prevents new series from being added (slows down the write path)
// and blocks the compaction process.
vals := p.labelValues(name)
for i, count := 0, 1; i < len(vals); count++ {
if count%checkContextEveryNIterations == 0 && ctx.Err() != nil {
return ErrPostings(ctx.Err())
}
e := p.m[name]
if len(e) == 0 {
p.mtx.RUnlock()
if match(vals[i]) {
i++
continue
}
// Didn't match, bring the last value to this position, make the slice shorter and check again.
// The order of the slice doesn't matter as it comes from a map iteration.
vals[i], vals = vals[len(vals)-1], vals[:len(vals)-1]
}
// If none matched (or this label had no values), no need to grab the lock again.
if len(vals) == 0 {
return EmptyPostings()
}
// Benchmarking shows that first copying the values into a slice and then matching over that is
// faster than matching over the map keys directly, at least on AMD64.
// Now `vals` only contains the values that matched, get their postings.
its := make([]Postings, 0, len(vals))
p.mtx.RLock()
e := p.m[name]
for _, v := range vals {
if refs, ok := e[v]; ok {
// Some of the values may have been garbage-collected in the meantime this is fine, we'll just skip them.
// If we didn't let the mutex go, we'd have these postings here, but they would be pointing nowhere
// because there would be a `MemPostings.Delete()` call waiting for the lock to delete these labels,
// because the series were deleted already.
its = append(its, NewListPostings(refs))
}
}
// Let the mutex go before merging.
p.mtx.RUnlock()
return Merge(ctx, its...)
}
// labelValues returns a slice of label values for the given label name.
// It will take the read lock.
func (p *MemPostings) labelValues(name string) []string {
p.mtx.RLock()
defer p.mtx.RUnlock()
e := p.m[name]
if len(e) == 0 {
return nil
}
vals := make([]string, 0, len(e))
for v, srs := range e {
if len(srs) > 0 {
@ -415,21 +433,7 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string,
}
}
var its []Postings
count := 1
for _, v := range vals {
if count%checkContextEveryNIterations == 0 && ctx.Err() != nil {
p.mtx.RUnlock()
return ErrPostings(ctx.Err())
}
count++
if match(v) {
its = append(its, NewListPostings(e[v]))
}
}
p.mtx.RUnlock()
return Merge(ctx, its...)
return vals
}
// ExpandPostings returns the postings expanded as a slice.

View file

@ -23,6 +23,7 @@ import (
"sort"
"strconv"
"strings"
"sync"
"testing"
"github.com/grafana/regexp"
@ -978,9 +979,13 @@ func TestMemPostings_Delete(t *testing.T) {
p.Add(3, labels.FromStrings("lbl2", "a"))
before := p.Get(allPostingsKey.Name, allPostingsKey.Value)
p.Delete(map[storage.SeriesRef]struct{}{
deletedRefs := map[storage.SeriesRef]struct{}{
2: {},
})
}
affectedLabels := map[labels.Label]struct{}{
{Name: "lbl1", Value: "b"}: {},
}
p.Delete(deletedRefs, affectedLabels)
after := p.Get(allPostingsKey.Name, allPostingsKey.Value)
// Make sure postings gotten before the delete have the old data when
@ -1001,6 +1006,101 @@ func TestMemPostings_Delete(t *testing.T) {
require.Empty(t, expanded, "expected empty postings, got %v", expanded)
}
// BenchmarkMemPostings_Delete is quite heavy, so consider running it with
// -benchtime=10x or similar to get more stable and comparable results.
func BenchmarkMemPostings_Delete(b *testing.B) {
internedItoa := map[int]string{}
var mtx sync.RWMutex
itoa := func(i int) string {
mtx.RLock()
s, ok := internedItoa[i]
mtx.RUnlock()
if ok {
return s
}
mtx.Lock()
s = strconv.Itoa(i)
internedItoa[i] = s
mtx.Unlock()
return s
}
const total = 1e6
allSeries := [total]labels.Labels{}
nameValues := make([]string, 0, 100)
for i := 0; i < total; i++ {
nameValues = nameValues[:0]
// A thousand labels like lbl_x_of_1000, each with total/1000 values
thousand := "lbl_" + itoa(i%1000) + "_of_1000"
nameValues = append(nameValues, thousand, itoa(i/1000))
// A hundred labels like lbl_x_of_100, each with total/100 values.
hundred := "lbl_" + itoa(i%100) + "_of_100"
nameValues = append(nameValues, hundred, itoa(i/100))
if i < 100 {
ten := "lbl_" + itoa(i%10) + "_of_10"
nameValues = append(nameValues, ten, itoa(i%10))
}
allSeries[i] = labels.FromStrings(append(nameValues, "first", "a", "second", "a", "third", "a")...)
}
for _, refs := range []int{1, 100, 10_000} {
b.Run(fmt.Sprintf("refs=%d", refs), func(b *testing.B) {
for _, reads := range []int{0, 1, 10} {
b.Run(fmt.Sprintf("readers=%d", reads), func(b *testing.B) {
if b.N > total/refs {
// Just to make sure that benchmark still makes sense.
panic("benchmark not prepared")
}
p := NewMemPostings()
for i := range allSeries {
p.Add(storage.SeriesRef(i), allSeries[i])
}
stop := make(chan struct{})
wg := sync.WaitGroup{}
for i := 0; i < reads; i++ {
wg.Add(1)
go func(i int) {
lbl := "lbl_" + itoa(i) + "_of_100"
defer wg.Done()
for {
select {
case <-stop:
return
default:
// Get a random value of this label.
p.Get(lbl, itoa(rand.Intn(10000))).Next()
}
}
}(i)
}
b.Cleanup(func() {
close(stop)
wg.Wait()
})
b.ResetTimer()
for n := 0; n < b.N; n++ {
deleted := make(map[storage.SeriesRef]struct{}, refs)
affected := make(map[labels.Label]struct{}, refs)
for i := 0; i < refs; i++ {
ref := storage.SeriesRef(n*refs + i)
deleted[ref] = struct{}{}
allSeries[ref].Range(func(l labels.Label) {
affected[l] = struct{}{}
})
}
p.Delete(deleted, affected)
}
})
}
})
}
}
func TestFindIntersectingPostings(t *testing.T) {
t.Run("multiple intersections", func(t *testing.T) {
p := NewListPostings([]storage.SeriesRef{10, 15, 20, 25, 30, 35, 40, 45, 50})
@ -1338,6 +1438,28 @@ func BenchmarkMemPostings_PostingsForLabelMatching(b *testing.B) {
}
}
func TestMemPostings_PostingsForLabelMatching(t *testing.T) {
mp := NewMemPostings()
mp.Add(1, labels.FromStrings("foo", "1"))
mp.Add(2, labels.FromStrings("foo", "2"))
mp.Add(3, labels.FromStrings("foo", "3"))
mp.Add(4, labels.FromStrings("foo", "4"))
isEven := func(v string) bool {
iv, err := strconv.Atoi(v)
if err != nil {
panic(err)
}
return iv%2 == 0
}
p := mp.PostingsForLabelMatching(context.Background(), "foo", isEven)
require.NoError(t, p.Err())
refs, err := ExpandPostings(p)
require.NoError(t, err)
require.Equal(t, []storage.SeriesRef{2, 4}, refs)
}
func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) {
memP := NewMemPostings()
seriesCount := 10 * checkContextEveryNIterations

View file

@ -483,7 +483,7 @@ func (ir *OOOCompactionHeadIndexReader) LabelValueFor(context.Context, storage.S
return "", errors.New("not implemented")
}
func (ir *OOOCompactionHeadIndexReader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
func (ir *OOOCompactionHeadIndexReader) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) {
return nil, errors.New("not implemented")
}

View file

@ -447,16 +447,7 @@ func labelNamesWithMatchers(ctx context.Context, r IndexReader, matchers ...*lab
if err != nil {
return nil, err
}
var postings []storage.SeriesRef
for p.Next() {
postings = append(postings, p.At())
}
if err := p.Err(); err != nil {
return nil, fmt.Errorf("postings for label names with matchers: %w", err)
}
return r.LabelNamesFor(ctx, postings...)
return r.LabelNamesFor(ctx, p)
}
// seriesData, used inside other iterators, are updated when we move from one series to another.

View file

@ -2292,13 +2292,16 @@ func (m mockIndex) LabelValueFor(_ context.Context, id storage.SeriesRef, label
return m.series[id].l.Get(label), nil
}
func (m mockIndex) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
func (m mockIndex) LabelNamesFor(_ context.Context, postings index.Postings) ([]string, error) {
namesMap := make(map[string]bool)
for _, id := range ids {
m.series[id].l.Range(func(lbl labels.Label) {
for postings.Next() {
m.series[postings.At()].l.Range(func(lbl labels.Label) {
namesMap[lbl.Name] = true
})
}
if err := postings.Err(); err != nil {
return nil, err
}
names := make([]string, 0, len(namesMap))
for name := range namesMap {
names = append(names, name)
@ -3232,7 +3235,7 @@ func (m mockMatcherIndex) LabelValueFor(context.Context, storage.SeriesRef, stri
return "", errors.New("label value for called")
}
func (m mockMatcherIndex) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([]string, error) {
func (m mockMatcherIndex) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) {
return nil, errors.New("label names for for called")
}
@ -3693,7 +3696,7 @@ func (m mockReaderOfLabels) LabelNames(context.Context, ...*labels.Matcher) ([]s
panic("LabelNames called")
}
func (m mockReaderOfLabels) LabelNamesFor(context.Context, ...storage.SeriesRef) ([]string, error) {
func (m mockReaderOfLabels) LabelNamesFor(context.Context, index.Postings) ([]string, error) {
panic("LabelNamesFor called")
}

View file

@ -200,7 +200,7 @@ func (tc *ZookeeperTreeCache) loop(path string) {
failure()
} else {
tc.resyncState(tc.prefix, tc.head, previousState)
level.Info(tc.logger).Log("Zookeeper resync successful")
level.Info(tc.logger).Log("msg", "Zookeeper resync successful")
failureMode = false
}
case <-tc.stop:

View file

@ -1761,7 +1761,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface
b, err := codec.Encode(resp)
if err != nil {
level.Error(api.logger).Log("msg", "error marshaling response", "err", err)
level.Error(api.logger).Log("msg", "error marshaling response", "url", req.URL, "err", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
@ -1769,7 +1769,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface
w.Header().Set("Content-Type", codec.ContentType().String())
w.WriteHeader(http.StatusOK)
if n, err := w.Write(b); err != nil {
level.Error(api.logger).Log("msg", "error writing response", "bytesWritten", n, "err", err)
level.Error(api.logger).Log("msg", "error writing response", "url", req.URL, "bytesWritten", n, "err", err)
}
}

View file

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/codemirror-promql",
"version": "0.52.1",
"version": "0.53.0",
"description": "a CodeMirror mode for the PromQL language",
"types": "dist/esm/index.d.ts",
"module": "dist/esm/index.js",
@ -29,7 +29,7 @@
},
"homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md",
"dependencies": {
"@prometheus-io/lezer-promql": "0.52.1",
"@prometheus-io/lezer-promql": "0.53.0",
"lru-cache": "^7.18.3"
},
"devDependencies": {

View file

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/lezer-promql",
"version": "0.52.1",
"version": "0.53.0",
"description": "lezer-based PromQL grammar",
"main": "dist/index.cjs",
"type": "module",

View file

@ -1,12 +1,12 @@
{
"name": "prometheus-io",
"version": "0.52.1",
"version": "0.53.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "prometheus-io",
"version": "0.52.1",
"version": "0.53.0",
"workspaces": [
"react-app",
"module/*"
@ -30,10 +30,10 @@
},
"module/codemirror-promql": {
"name": "@prometheus-io/codemirror-promql",
"version": "0.52.1",
"version": "0.53.0",
"license": "Apache-2.0",
"dependencies": {
"@prometheus-io/lezer-promql": "0.52.1",
"@prometheus-io/lezer-promql": "0.53.0",
"lru-cache": "^7.18.3"
},
"devDependencies": {
@ -69,7 +69,7 @@
},
"module/lezer-promql": {
"name": "@prometheus-io/lezer-promql",
"version": "0.52.1",
"version": "0.53.0",
"license": "Apache-2.0",
"devDependencies": {
"@lezer/generator": "^1.7.0",
@ -19331,7 +19331,7 @@
},
"react-app": {
"name": "@prometheus-io/app",
"version": "0.52.1",
"version": "0.53.0",
"dependencies": {
"@codemirror/autocomplete": "^6.16.2",
"@codemirror/commands": "^6.6.0",
@ -19349,7 +19349,7 @@
"@lezer/lr": "^1.4.1",
"@nexucis/fuzzy": "^0.4.1",
"@nexucis/kvsearch": "^0.8.1",
"@prometheus-io/codemirror-promql": "0.52.1",
"@prometheus-io/codemirror-promql": "0.53.0",
"bootstrap": "^4.6.2",
"css.escape": "^1.5.1",
"downshift": "^9.0.6",

View file

@ -28,5 +28,5 @@
"ts-jest": "^29.1.4",
"typescript": "^4.9.5"
},
"version": "0.52.1"
"version": "0.53.0"
}

View file

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/app",
"version": "0.52.1",
"version": "0.53.0",
"private": true,
"dependencies": {
"@codemirror/autocomplete": "^6.16.2",
@ -19,7 +19,7 @@
"@lezer/lr": "^1.4.1",
"@nexucis/fuzzy": "^0.4.1",
"@nexucis/kvsearch": "^0.8.1",
"@prometheus-io/codemirror-promql": "0.52.1",
"@prometheus-io/codemirror-promql": "0.53.0",
"bootstrap": "^4.6.2",
"css.escape": "^1.5.1",
"downshift": "^9.0.6",