prometheus/model/labels/labels.go
Bryan Boreham ac4f8a5e23
Some checks are pending
buf.build / lint and publish (push) Waiting to run
CI / Go tests (push) Waiting to run
CI / More Go tests (push) Waiting to run
CI / Go tests with previous Go version (push) Waiting to run
CI / UI tests (push) Waiting to run
CI / Go tests on Windows (push) Waiting to run
CI / Mixins tests (push) Waiting to run
CI / Build Prometheus for common architectures (0) (push) Waiting to run
CI / Build Prometheus for common architectures (1) (push) Waiting to run
CI / Build Prometheus for common architectures (2) (push) Waiting to run
CI / Build Prometheus for all architectures (0) (push) Waiting to run
CI / Build Prometheus for all architectures (1) (push) Waiting to run
CI / Build Prometheus for all architectures (10) (push) Waiting to run
CI / Build Prometheus for all architectures (11) (push) Waiting to run
CI / Build Prometheus for all architectures (2) (push) Waiting to run
CI / Build Prometheus for all architectures (3) (push) Waiting to run
CI / Build Prometheus for all architectures (4) (push) Waiting to run
CI / Build Prometheus for all architectures (5) (push) Waiting to run
CI / Build Prometheus for all architectures (6) (push) Waiting to run
CI / Build Prometheus for all architectures (7) (push) Waiting to run
CI / Build Prometheus for all architectures (8) (push) Waiting to run
CI / Build Prometheus for all architectures (9) (push) Waiting to run
CI / Report status of build Prometheus for all architectures (push) Blocked by required conditions
CI / Check generated parser (push) Waiting to run
CI / golangci-lint (push) Waiting to run
CI / fuzzing (push) Waiting to run
CI / codeql (push) Waiting to run
CI / Publish main branch artifacts (push) Blocked by required conditions
CI / Publish release artefacts (push) Blocked by required conditions
CI / Publish UI on npm Registry (push) Blocked by required conditions
Scorecards supply-chain security / Scorecards analysis (push) Waiting to run
[ENHANCEMENT] TSDB: Improve calculation of space used by labels (#13880)
* [ENHANCEMENT] TSDB: Improve calculation of space used by labels

The labels for each series in the Head take up some some space in the
Postings index, but far more space in the `memSeries` structure.

Instead of having the Postings index calculate this overhead, which is
a layering violation, have the caller pass in a function to do it.

Provide three implementations of this function for the three Labels
versions.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
2024-12-16 09:42:52 +00:00

497 lines
13 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !stringlabels && !dedupelabels
package labels
import (
"bytes"
"slices"
"strings"
"unsafe"
"github.com/cespare/xxhash/v2"
)
// Labels is a sorted set of labels. Order has to be guaranteed upon
// instantiation.
type Labels []Label
func (ls Labels) Len() int { return len(ls) }
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
// Bytes returns ls as a byte slice.
// It uses an byte invalid character as a separator and so should not be used for printing.
func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)
for i, l := range ls {
if i > 0 {
b.WriteByte(sep)
}
b.WriteString(l.Name)
b.WriteByte(sep)
b.WriteString(l.Value)
}
return b.Bytes()
}
// MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean.
// If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false.
func (ls Labels) MatchLabels(on bool, names ...string) Labels {
matchedLabels := Labels{}
nameSet := make(map[string]struct{}, len(names))
for _, n := range names {
nameSet[n] = struct{}{}
}
for _, v := range ls {
if _, ok := nameSet[v.Name]; on == ok && (on || v.Name != MetricName) {
matchedLabels = append(matchedLabels, v)
}
}
return matchedLabels
}
// Hash returns a hash value for the label set.
// Note: the result is not guaranteed to be consistent across different runs of Prometheus.
func (ls Labels) Hash() uint64 {
// Use xxhash.Sum64(b) for fast path as it's faster.
b := make([]byte, 0, 1024)
for i, v := range ls {
if len(b)+len(v.Name)+len(v.Value)+2 >= cap(b) {
// If labels entry is 1KB+ do not allocate whole entry.
h := xxhash.New()
_, _ = h.Write(b)
for _, v := range ls[i:] {
_, _ = h.WriteString(v.Name)
_, _ = h.Write(seps)
_, _ = h.WriteString(v.Value)
_, _ = h.Write(seps)
}
return h.Sum64()
}
b = append(b, v.Name...)
b = append(b, sep)
b = append(b, v.Value...)
b = append(b, sep)
}
return xxhash.Sum64(b)
}
// HashForLabels returns a hash value for the labels matching the provided names.
// 'names' have to be sorted in ascending order.
func (ls Labels) HashForLabels(b []byte, names ...string) (uint64, []byte) {
b = b[:0]
i, j := 0, 0
for i < len(ls) && j < len(names) {
switch {
case names[j] < ls[i].Name:
j++
case ls[i].Name < names[j]:
i++
default:
b = append(b, ls[i].Name...)
b = append(b, sep)
b = append(b, ls[i].Value...)
b = append(b, sep)
i++
j++
}
}
return xxhash.Sum64(b), b
}
// HashWithoutLabels returns a hash value for all labels except those matching
// the provided names.
// 'names' have to be sorted in ascending order.
func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) {
b = b[:0]
j := 0
for i := range ls {
for j < len(names) && names[j] < ls[i].Name {
j++
}
if ls[i].Name == MetricName || (j < len(names) && ls[i].Name == names[j]) {
continue
}
b = append(b, ls[i].Name...)
b = append(b, sep)
b = append(b, ls[i].Value...)
b = append(b, sep)
}
return xxhash.Sum64(b), b
}
// BytesWithLabels is just as Bytes(), but only for labels matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) BytesWithLabels(buf []byte, names ...string) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)
i, j := 0, 0
for i < len(ls) && j < len(names) {
switch {
case names[j] < ls[i].Name:
j++
case ls[i].Name < names[j]:
i++
default:
if b.Len() > 1 {
b.WriteByte(sep)
}
b.WriteString(ls[i].Name)
b.WriteByte(sep)
b.WriteString(ls[i].Value)
i++
j++
}
}
return b.Bytes()
}
// BytesWithoutLabels is just as Bytes(), but only for labels not matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) BytesWithoutLabels(buf []byte, names ...string) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)
j := 0
for i := range ls {
for j < len(names) && names[j] < ls[i].Name {
j++
}
if j < len(names) && ls[i].Name == names[j] {
continue
}
if b.Len() > 1 {
b.WriteByte(sep)
}
b.WriteString(ls[i].Name)
b.WriteByte(sep)
b.WriteString(ls[i].Value)
}
return b.Bytes()
}
// Copy returns a copy of the labels.
func (ls Labels) Copy() Labels {
res := make(Labels, len(ls))
copy(res, ls)
return res
}
// Get returns the value for the label with the given name.
// Returns an empty string if the label doesn't exist.
func (ls Labels) Get(name string) string {
for _, l := range ls {
if l.Name == name {
return l.Value
}
}
return ""
}
// Has returns true if the label with the given name is present.
func (ls Labels) Has(name string) bool {
for _, l := range ls {
if l.Name == name {
return true
}
}
return false
}
// HasDuplicateLabelNames returns whether ls has duplicate label names.
// It assumes that the labelset is sorted.
func (ls Labels) HasDuplicateLabelNames() (string, bool) {
for i, l := range ls {
if i == 0 {
continue
}
if l.Name == ls[i-1].Name {
return l.Name, true
}
}
return "", false
}
// WithoutEmpty returns the labelset without empty labels.
// May return the same labelset.
func (ls Labels) WithoutEmpty() Labels {
for _, v := range ls {
if v.Value != "" {
continue
}
// Do not copy the slice until it's necessary.
els := make(Labels, 0, len(ls)-1)
for _, v := range ls {
if v.Value != "" {
els = append(els, v)
}
}
return els
}
return ls
}
// Equal returns whether the two label sets are equal.
func Equal(ls, o Labels) bool {
if len(ls) != len(o) {
return false
}
for i, l := range ls {
if l != o[i] {
return false
}
}
return true
}
// EmptyLabels returns n empty Labels value, for convenience.
func EmptyLabels() Labels {
return Labels{}
}
// New returns a sorted Labels from the given labels.
// The caller has to guarantee that all label names are unique.
func New(ls ...Label) Labels {
set := make(Labels, 0, len(ls))
set = append(set, ls...)
slices.SortFunc(set, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
return set
}
// FromStrings creates new labels from pairs of strings.
func FromStrings(ss ...string) Labels {
if len(ss)%2 != 0 {
panic("invalid number of strings")
}
res := make(Labels, 0, len(ss)/2)
for i := 0; i < len(ss); i += 2 {
res = append(res, Label{Name: ss[i], Value: ss[i+1]})
}
slices.SortFunc(res, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
return res
}
// Compare compares the two label sets.
// The result will be 0 if a==b, <0 if a < b, and >0 if a > b.
func Compare(a, b Labels) int {
l := len(a)
if len(b) < l {
l = len(b)
}
for i := 0; i < l; i++ {
if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name {
return -1
}
return 1
}
if a[i].Value != b[i].Value {
if a[i].Value < b[i].Value {
return -1
}
return 1
}
}
// If all labels so far were in common, the set with fewer labels comes first.
return len(a) - len(b)
}
// CopyFrom copies labels from b on top of whatever was in ls previously,
// reusing memory or expanding if needed.
func (ls *Labels) CopyFrom(b Labels) {
(*ls) = append((*ls)[:0], b...)
}
// IsEmpty returns true if ls represents an empty set of labels.
func (ls Labels) IsEmpty() bool {
return len(ls) == 0
}
// Range calls f on each label.
func (ls Labels) Range(f func(l Label)) {
for _, l := range ls {
f(l)
}
}
// Validate calls f on each label. If f returns a non-nil error, then it returns that error cancelling the iteration.
func (ls Labels) Validate(f func(l Label) error) error {
for _, l := range ls {
if err := f(l); err != nil {
return err
}
}
return nil
}
// DropMetricName returns Labels with "__name__" removed.
func (ls Labels) DropMetricName() Labels {
for i, l := range ls {
if l.Name == MetricName {
if i == 0 { // Make common case fast with no allocations.
return ls[1:]
}
// Avoid modifying original Labels - use [:i:i] so that left slice would not
// have any spare capacity and append would have to allocate a new slice for the result.
return append(ls[:i:i], ls[i+1:]...)
}
}
return ls
}
// InternStrings calls intern on every string value inside ls, replacing them with what it returns.
func (ls *Labels) InternStrings(intern func(string) string) {
for i, l := range *ls {
(*ls)[i].Name = intern(l.Name)
(*ls)[i].Value = intern(l.Value)
}
}
// ReleaseStrings calls release on every string value inside ls.
func (ls Labels) ReleaseStrings(release func(string)) {
for _, l := range ls {
release(l.Name)
release(l.Value)
}
}
// Builder allows modifying Labels.
type Builder struct {
base Labels
del []string
add []Label
}
// Reset clears all current state for the builder.
func (b *Builder) Reset(base Labels) {
b.base = base
b.del = b.del[:0]
b.add = b.add[:0]
b.base.Range(func(l Label) {
if l.Value == "" {
b.del = append(b.del, l.Name)
}
})
}
// Labels returns the labels from the builder.
// If no modifications were made, the original labels are returned.
func (b *Builder) Labels() Labels {
if len(b.del) == 0 && len(b.add) == 0 {
return b.base
}
expectedSize := len(b.base) + len(b.add) - len(b.del)
if expectedSize < 1 {
expectedSize = 1
}
res := make(Labels, 0, expectedSize)
for _, l := range b.base {
if slices.Contains(b.del, l.Name) || contains(b.add, l.Name) {
continue
}
res = append(res, l)
}
if len(b.add) > 0 { // Base is already in order, so we only need to sort if we add to it.
res = append(res, b.add...)
slices.SortFunc(res, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
}
return res
}
// ScratchBuilder allows efficient construction of a Labels from scratch.
type ScratchBuilder struct {
add Labels
}
// SymbolTable is no-op, just for api parity with dedupelabels.
type SymbolTable struct{}
func NewSymbolTable() *SymbolTable { return nil }
func (t *SymbolTable) Len() int { return 0 }
// NewScratchBuilder creates a ScratchBuilder initialized for Labels with n entries.
func NewScratchBuilder(n int) ScratchBuilder {
return ScratchBuilder{add: make([]Label, 0, n)}
}
// NewBuilderWithSymbolTable creates a Builder, for api parity with dedupelabels.
func NewBuilderWithSymbolTable(_ *SymbolTable) *Builder {
return NewBuilder(EmptyLabels())
}
// NewScratchBuilderWithSymbolTable creates a ScratchBuilder, for api parity with dedupelabels.
func NewScratchBuilderWithSymbolTable(_ *SymbolTable, n int) ScratchBuilder {
return NewScratchBuilder(n)
}
func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) {
// no-op
}
func (b *ScratchBuilder) Reset() {
b.add = b.add[:0]
}
// Add a name/value pair.
// Note if you Add the same name twice you will get a duplicate label, which is invalid.
func (b *ScratchBuilder) Add(name, value string) {
b.add = append(b.add, Label{Name: name, Value: value})
}
// UnsafeAddBytes adds a name/value pair, using []byte instead of string.
// The '-tags stringlabels' version of this function is unsafe, hence the name.
// This version is safe - it copies the strings immediately - but we keep the same name so everything compiles.
func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
b.add = append(b.add, Label{Name: string(name), Value: string(value)})
}
// Sort the labels added so far by name.
func (b *ScratchBuilder) Sort() {
slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
}
// Assign is for when you already have a Labels which you want this ScratchBuilder to return.
func (b *ScratchBuilder) Assign(ls Labels) {
b.add = append(b.add[:0], ls...) // Copy on top of our slice, so we don't retain the input slice.
}
// Labels returns the name/value pairs added so far as a Labels object.
// Note: if you want them sorted, call Sort() first.
func (b *ScratchBuilder) Labels() Labels {
// Copy the slice, so the next use of ScratchBuilder doesn't overwrite.
return append([]Label{}, b.add...)
}
// Overwrite the newly-built Labels out to ls.
// Callers must ensure that there are no other references to ls, or any strings fetched from it.
func (b *ScratchBuilder) Overwrite(ls *Labels) {
*ls = append((*ls)[:0], b.add...)
}
// SizeOfLabels returns the approximate space required for n copies of a label.
func SizeOfLabels(name, value string, n uint64) uint64 {
return (uint64(len(name)) + uint64(unsafe.Sizeof(name)) + uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n
}