prometheus/model/labels/labels_stringlabels.go
Bryan Boreham 14b4fbc2ff labels: make InternStrings a no-op for stringlabels version
The current implementation of `InternStrings` will only save memory
when the whole set of labels is identical to one already seen, and this
cannot happen in the one place it is called from in Prometheus,
remote-write, which already detects identical series.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
2024-01-29 18:54:54 +00:00

665 lines
18 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build stringlabels
package labels
import (
"reflect"
"strings"
"unsafe"
"github.com/cespare/xxhash/v2"
"golang.org/x/exp/slices"
)
// Labels is implemented by a single flat string holding name/value pairs.
// Each name and value is preceded by its length in varint encoding.
// Names are in order.
type Labels struct {
data string
}
func decodeSize(data string, index int) (int, int) {
// Fast-path for common case of a single byte, value 0..127.
b := data[index]
index++
if b < 0x80 {
return int(b), index
}
size := int(b & 0x7F)
for shift := uint(7); ; shift += 7 {
// Just panic if we go of the end of data, since all Labels strings are constructed internally and
// malformed data indicates a bug, or memory corruption.
b := data[index]
index++
size |= int(b&0x7F) << shift
if b < 0x80 {
break
}
}
return size, index
}
func decodeString(data string, index int) (string, int) {
var size int
size, index = decodeSize(data, index)
return data[index : index+size], index + size
}
// Bytes returns ls as a byte slice.
// It uses non-printing characters and so should not be used for printing.
func (ls Labels) Bytes(buf []byte) []byte {
if cap(buf) < len(ls.data) {
buf = make([]byte, len(ls.data))
} else {
buf = buf[:len(ls.data)]
}
copy(buf, ls.data)
return buf
}
// IsZero implements yaml.IsZeroer - if we don't have this then 'omitempty' fields are always omitted.
func (ls Labels) IsZero() bool {
return len(ls.data) == 0
}
// MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean.
// If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false.
// TODO: This is only used in printing an error message
func (ls Labels) MatchLabels(on bool, names ...string) Labels {
b := NewBuilder(ls)
if on {
b.Keep(names...)
} else {
b.Del(MetricName)
b.Del(names...)
}
return b.Labels()
}
// Hash returns a hash value for the label set.
// Note: the result is not guaranteed to be consistent across different runs of Prometheus.
func (ls Labels) Hash() uint64 {
return xxhash.Sum64(yoloBytes(ls.data))
}
// HashForLabels returns a hash value for the labels matching the provided names.
// 'names' have to be sorted in ascending order.
func (ls Labels) HashForLabels(b []byte, names ...string) (uint64, []byte) {
b = b[:0]
j := 0
for i := 0; i < len(ls.data); {
var name, value string
name, i = decodeString(ls.data, i)
value, i = decodeString(ls.data, i)
for j < len(names) && names[j] < name {
j++
}
if j == len(names) {
break
}
if name == names[j] {
b = append(b, name...)
b = append(b, seps[0])
b = append(b, value...)
b = append(b, seps[0])
}
}
return xxhash.Sum64(b), b
}
// HashWithoutLabels returns a hash value for all labels except those matching
// the provided names.
// 'names' have to be sorted in ascending order.
func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) {
b = b[:0]
j := 0
for i := 0; i < len(ls.data); {
var name, value string
name, i = decodeString(ls.data, i)
value, i = decodeString(ls.data, i)
for j < len(names) && names[j] < name {
j++
}
if name == MetricName || (j < len(names) && name == names[j]) {
continue
}
b = append(b, name...)
b = append(b, seps[0])
b = append(b, value...)
b = append(b, seps[0])
}
return xxhash.Sum64(b), b
}
// BytesWithLabels is just as Bytes(), but only for labels matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) BytesWithLabels(buf []byte, names ...string) []byte {
b := buf[:0]
j := 0
for pos := 0; pos < len(ls.data); {
lName, newPos := decodeString(ls.data, pos)
_, newPos = decodeString(ls.data, newPos)
for j < len(names) && names[j] < lName {
j++
}
if j == len(names) {
break
}
if lName == names[j] {
b = append(b, ls.data[pos:newPos]...)
}
pos = newPos
}
return b
}
// BytesWithoutLabels is just as Bytes(), but only for labels not matching names.
// 'names' have to be sorted in ascending order.
func (ls Labels) BytesWithoutLabels(buf []byte, names ...string) []byte {
b := buf[:0]
j := 0
for pos := 0; pos < len(ls.data); {
lName, newPos := decodeString(ls.data, pos)
_, newPos = decodeString(ls.data, newPos)
for j < len(names) && names[j] < lName {
j++
}
if j == len(names) || lName != names[j] {
b = append(b, ls.data[pos:newPos]...)
}
pos = newPos
}
return b
}
// Copy returns a copy of the labels.
func (ls Labels) Copy() Labels {
buf := append([]byte{}, ls.data...)
return Labels{data: yoloString(buf)}
}
// Get returns the value for the label with the given name.
// Returns an empty string if the label doesn't exist.
func (ls Labels) Get(name string) string {
if name == "" { // Avoid crash in loop if someone asks for "".
return "" // Prometheus does not store blank label names.
}
for i := 0; i < len(ls.data); {
var size int
size, i = decodeSize(ls.data, i)
if ls.data[i] == name[0] {
lName := ls.data[i : i+size]
i += size
if lName == name {
lValue, _ := decodeString(ls.data, i)
return lValue
}
} else {
if ls.data[i] > name[0] { // Stop looking if we've gone past.
break
}
i += size
}
size, i = decodeSize(ls.data, i)
i += size
}
return ""
}
// Has returns true if the label with the given name is present.
func (ls Labels) Has(name string) bool {
if name == "" { // Avoid crash in loop if someone asks for "".
return false // Prometheus does not store blank label names.
}
for i := 0; i < len(ls.data); {
var size int
size, i = decodeSize(ls.data, i)
if ls.data[i] == name[0] {
lName := ls.data[i : i+size]
i += size
if lName == name {
return true
}
} else {
if ls.data[i] > name[0] { // Stop looking if we've gone past.
break
}
i += size
}
size, i = decodeSize(ls.data, i)
i += size
}
return false
}
// HasDuplicateLabelNames returns whether ls has duplicate label names.
// It assumes that the labelset is sorted.
func (ls Labels) HasDuplicateLabelNames() (string, bool) {
var lName, prevName string
for i := 0; i < len(ls.data); {
lName, i = decodeString(ls.data, i)
_, i = decodeString(ls.data, i)
if lName == prevName {
return lName, true
}
prevName = lName
}
return "", false
}
// WithoutEmpty returns the labelset without empty labels.
// May return the same labelset.
func (ls Labels) WithoutEmpty() Labels {
for pos := 0; pos < len(ls.data); {
_, newPos := decodeString(ls.data, pos)
lValue, newPos := decodeString(ls.data, newPos)
if lValue != "" {
pos = newPos
continue
}
// Do not copy the slice until it's necessary.
// TODO: could optimise the case where all blanks are at the end.
// Note: we size the new buffer on the assumption there is exactly one blank value.
buf := make([]byte, pos, pos+(len(ls.data)-newPos))
copy(buf, ls.data[:pos]) // copy the initial non-blank labels
pos = newPos // move past the first blank value
for pos < len(ls.data) {
var newPos int
_, newPos = decodeString(ls.data, pos)
lValue, newPos = decodeString(ls.data, newPos)
if lValue != "" {
buf = append(buf, ls.data[pos:newPos]...)
}
pos = newPos
}
return Labels{data: yoloString(buf)}
}
return ls
}
// Equal returns whether the two label sets are equal.
func Equal(ls, o Labels) bool {
return ls.data == o.data
}
// EmptyLabels returns an empty Labels value, for convenience.
func EmptyLabels() Labels {
return Labels{}
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
func yoloBytes(s string) (b []byte) {
*(*string)(unsafe.Pointer(&b)) = s
(*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s)
return
}
// New returns a sorted Labels from the given labels.
// The caller has to guarantee that all label names are unique.
func New(ls ...Label) Labels {
slices.SortFunc(ls, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
size := labelsSize(ls)
buf := make([]byte, size)
marshalLabelsToSizedBuffer(ls, buf)
return Labels{data: yoloString(buf)}
}
// FromStrings creates new labels from pairs of strings.
func FromStrings(ss ...string) Labels {
if len(ss)%2 != 0 {
panic("invalid number of strings")
}
ls := make([]Label, 0, len(ss)/2)
for i := 0; i < len(ss); i += 2 {
ls = append(ls, Label{Name: ss[i], Value: ss[i+1]})
}
return New(ls...)
}
// Compare compares the two label sets.
// The result will be 0 if a==b, <0 if a < b, and >0 if a > b.
func Compare(a, b Labels) int {
// Find the first byte in the string where a and b differ.
shorter, longer := a.data, b.data
if len(b.data) < len(a.data) {
shorter, longer = b.data, a.data
}
i := 0
// First, go 8 bytes at a time. Data strings are expected to be 8-byte aligned.
sp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&shorter)).Data)
lp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&longer)).Data)
for ; i < len(shorter)-8; i += 8 {
if *(*uint64)(unsafe.Add(sp, i)) != *(*uint64)(unsafe.Add(lp, i)) {
break
}
}
// Now go 1 byte at a time.
for ; i < len(shorter); i++ {
if shorter[i] != longer[i] {
break
}
}
if i == len(shorter) {
// One Labels was a prefix of the other; the set with fewer labels compares lower.
return len(a.data) - len(b.data)
}
// Now we know that there is some difference before the end of a and b.
// Go back through the fields and find which field that difference is in.
firstCharDifferent := i
for i = 0; ; {
size, nextI := decodeSize(a.data, i)
if nextI+size > firstCharDifferent {
break
}
i = nextI + size
}
// Difference is inside this entry.
aStr, _ := decodeString(a.data, i)
bStr, _ := decodeString(b.data, i)
if aStr < bStr {
return -1
}
return +1
}
// Copy labels from b on top of whatever was in ls previously, reusing memory or expanding if needed.
func (ls *Labels) CopyFrom(b Labels) {
ls.data = b.data // strings are immutable
}
// IsEmpty returns true if ls represents an empty set of labels.
func (ls Labels) IsEmpty() bool {
return len(ls.data) == 0
}
// Len returns the number of labels; it is relatively slow.
func (ls Labels) Len() int {
count := 0
for i := 0; i < len(ls.data); {
var size int
size, i = decodeSize(ls.data, i)
i += size
size, i = decodeSize(ls.data, i)
i += size
count++
}
return count
}
// Range calls f on each label.
func (ls Labels) Range(f func(l Label)) {
for i := 0; i < len(ls.data); {
var lName, lValue string
lName, i = decodeString(ls.data, i)
lValue, i = decodeString(ls.data, i)
f(Label{Name: lName, Value: lValue})
}
}
// Validate calls f on each label. If f returns a non-nil error, then it returns that error cancelling the iteration.
func (ls Labels) Validate(f func(l Label) error) error {
for i := 0; i < len(ls.data); {
var lName, lValue string
lName, i = decodeString(ls.data, i)
lValue, i = decodeString(ls.data, i)
err := f(Label{Name: lName, Value: lValue})
if err != nil {
return err
}
}
return nil
}
// DropMetricName returns Labels with "__name__" removed.
func (ls Labels) DropMetricName() Labels {
for i := 0; i < len(ls.data); {
lName, i2 := decodeString(ls.data, i)
size, i2 := decodeSize(ls.data, i2)
i2 += size
if lName == MetricName {
if i == 0 { // Make common case fast with no allocations.
ls.data = ls.data[i2:]
} else {
ls.data = ls.data[:i] + ls.data[i2:]
}
break
} else if lName[0] > MetricName[0] { // Stop looking if we've gone past.
break
}
i = i2
}
return ls
}
// InternStrings is a no-op because it would only save when the whole set of labels is identical.
func (ls *Labels) InternStrings(intern func(string) string) {
}
// ReleaseStrings is a no-op for the same reason as InternStrings.
func (ls Labels) ReleaseStrings(release func(string)) {
}
// Labels returns the labels from the builder.
// If no modifications were made, the original labels are returned.
func (b *Builder) Labels() Labels {
if len(b.del) == 0 && len(b.add) == 0 {
return b.base
}
slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
slices.Sort(b.del)
a, d := 0, 0
bufSize := len(b.base.data) + labelsSize(b.add)
buf := make([]byte, 0, bufSize)
for pos := 0; pos < len(b.base.data); {
oldPos := pos
var lName string
lName, pos = decodeString(b.base.data, pos)
_, pos = decodeString(b.base.data, pos)
for d < len(b.del) && b.del[d] < lName {
d++
}
if d < len(b.del) && b.del[d] == lName {
continue // This label has been deleted.
}
for ; a < len(b.add) && b.add[a].Name < lName; a++ {
buf = appendLabelTo(buf, &b.add[a]) // Insert label that was not in the base set.
}
if a < len(b.add) && b.add[a].Name == lName {
buf = appendLabelTo(buf, &b.add[a])
a++
continue // This label has been replaced.
}
buf = append(buf, b.base.data[oldPos:pos]...)
}
// We have come to the end of the base set; add any remaining labels.
for ; a < len(b.add); a++ {
buf = appendLabelTo(buf, &b.add[a])
}
return Labels{data: yoloString(buf)}
}
func marshalLabelsToSizedBuffer(lbls []Label, data []byte) int {
i := len(data)
for index := len(lbls) - 1; index >= 0; index-- {
size := marshalLabelToSizedBuffer(&lbls[index], data[:i])
i -= size
}
return len(data) - i
}
func marshalLabelToSizedBuffer(m *Label, data []byte) int {
i := len(data)
i -= len(m.Value)
copy(data[i:], m.Value)
i = encodeSize(data, i, len(m.Value))
i -= len(m.Name)
copy(data[i:], m.Name)
i = encodeSize(data, i, len(m.Name))
return len(data) - i
}
func sizeVarint(x uint64) (n int) {
// Most common case first
if x < 1<<7 {
return 1
}
if x >= 1<<56 {
return 9
}
if x >= 1<<28 {
x >>= 28
n = 4
}
if x >= 1<<14 {
x >>= 14
n += 2
}
if x >= 1<<7 {
n++
}
return n + 1
}
func encodeVarint(data []byte, offset int, v uint64) int {
offset -= sizeVarint(v)
base := offset
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return base
}
// Special code for the common case that a size is less than 128
func encodeSize(data []byte, offset, v int) int {
if v < 1<<7 {
offset--
data[offset] = uint8(v)
return offset
}
return encodeVarint(data, offset, uint64(v))
}
func labelsSize(lbls []Label) (n int) {
// we just encode name/value/name/value, without any extra tags or length bytes
for _, e := range lbls {
n += labelSize(&e)
}
return n
}
func labelSize(m *Label) (n int) {
// strings are encoded as length followed by contents.
l := len(m.Name)
n += l + sizeVarint(uint64(l))
l = len(m.Value)
n += l + sizeVarint(uint64(l))
return n
}
func appendLabelTo(buf []byte, m *Label) []byte {
size := labelSize(m)
sizeRequired := len(buf) + size
if cap(buf) >= sizeRequired {
buf = buf[:sizeRequired]
} else {
bufSize := cap(buf)
// Double size of buffer each time it needs to grow, to amortise copying cost.
for bufSize < sizeRequired {
bufSize = bufSize*2 + 1
}
newBuf := make([]byte, sizeRequired, bufSize)
copy(newBuf, buf)
buf = newBuf
}
marshalLabelToSizedBuffer(m, buf)
return buf
}
// ScratchBuilder allows efficient construction of a Labels from scratch.
type ScratchBuilder struct {
add []Label
output Labels
overwriteBuffer []byte
}
// NewScratchBuilder creates a ScratchBuilder initialized for Labels with n entries.
func NewScratchBuilder(n int) ScratchBuilder {
return ScratchBuilder{add: make([]Label, 0, n)}
}
func (b *ScratchBuilder) Reset() {
b.add = b.add[:0]
b.output = EmptyLabels()
}
// Add a name/value pair.
// Note if you Add the same name twice you will get a duplicate label, which is invalid.
func (b *ScratchBuilder) Add(name, value string) {
b.add = append(b.add, Label{Name: name, Value: value})
}
// Add a name/value pair, using []byte instead of string to reduce memory allocations.
// The values must remain live until Labels() is called.
func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
b.add = append(b.add, Label{Name: yoloString(name), Value: yoloString(value)})
}
// Sort the labels added so far by name.
func (b *ScratchBuilder) Sort() {
slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
}
// Assign is for when you already have a Labels which you want this ScratchBuilder to return.
func (b *ScratchBuilder) Assign(l Labels) {
b.output = l
}
// Labels returns the name/value pairs added as a Labels object. Calling Add() after Labels() has no effect.
// Note: if you want them sorted, call Sort() first.
func (b *ScratchBuilder) Labels() Labels {
if b.output.IsEmpty() {
size := labelsSize(b.add)
buf := make([]byte, size)
marshalLabelsToSizedBuffer(b.add, buf)
b.output = Labels{data: yoloString(buf)}
}
return b.output
}
// Write the newly-built Labels out to ls, reusing an internal buffer.
// Callers must ensure that there are no other references to ls, or any strings fetched from it.
func (b *ScratchBuilder) Overwrite(ls *Labels) {
size := labelsSize(b.add)
if size <= cap(b.overwriteBuffer) {
b.overwriteBuffer = b.overwriteBuffer[:size]
} else {
b.overwriteBuffer = make([]byte, size)
}
marshalLabelsToSizedBuffer(b.add, b.overwriteBuffer)
ls.data = yoloString(b.overwriteBuffer)
}