Add labels.MapLabels() for setting mapped labels

This makes building labels faster by having a fast lookup for string->index path via a map.
Since we now need to populate both the slice that maps index->string and a map that gives us string->index.
For that we add labels.MapLabels() function which handles updating the static mapping.

Signed-off-by: Lukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Lukasz Mierzwa 2025-02-21 10:22:26 +00:00
parent 24dd39bc57
commit cf9d3a0db5
2 changed files with 121 additions and 89 deletions

View file

@ -101,9 +101,8 @@ func selectBlockStringsToMap(block *tsdb.Block) ([]string, error) {
})
mappedLabels := make([]string, 0, 256)
mappedLabels = append(mappedLabels, "") // We must always store empty string.
for i, c := range costs {
if i > 254 {
if i >= 256 {
break
}
mappedLabels = append(mappedLabels, c.name)
@ -132,6 +131,6 @@ func mapCommonLabelSymbols(db *tsdb.DB, logger *slog.Logger) error {
return err
}
logger.Info("Mapped common label strings", slog.Int("count", len(mappedLabels)))
labels.MappedLabels = mappedLabels
labels.MapLabels(mappedLabels)
return nil
}

View file

@ -23,9 +23,41 @@ import (
"github.com/cespare/xxhash/v2"
)
var (
// List of labels that should be mapped to a single byte value.
// Obviously can't have more than 256 here.
var MappedLabels = []string{
mappedLabels = []string{}
mappedLabelIndex = map[string]byte{}
)
// MapLabels takes a list of strings that shuld use a single byte storage
// inside labels, making them use as little memory as possible.
// Since we use a single byte mapping we can only have 256 such strings.
//
// We MUST store empty string ("") as one of the values here and if you
// don't pass it into MapLabels() then it will be injected.
//
// If you pass more strings than 256 then extra strings will be ignored.
func MapLabels(names []string) {
// We must always store empty string. Push it to the front of the slice if not present.
if !slices.Contains(names, "") {
names = append([]string{""}, names...)
}
mappedLabels = make([]string, 0, 256)
mappedLabelIndex = make(map[string]byte, 256)
for i, name := range names {
if i >= 256 {
break
}
mappedLabels = append(mappedLabels, name)
mappedLabelIndex[name] = byte(i)
}
}
func init() {
names := []string{
// Empty string, this must be present here.
"",
// These label names are always present on every time series.
@ -106,6 +138,8 @@ var MappedLabels = []string{
"node_filesystem_files",
"node_filesystem_avail_bytes",
}
MapLabels(names)
}
// Labels is implemented by a single flat string holding name/value pairs.
// Each name and value is preceded by its length in varint encoding.
@ -144,15 +178,14 @@ func decodeString(data string, index int) (string, int) {
size, index, mapped = decodeSize(data, index)
if mapped {
b := data[index]
return MappedLabels[int(b)], index + size
return mappedLabels[int(b)], index + size
}
return data[index : index+size], index + size
}
func encodeShortString(s string) (int, byte) {
i := slices.Index(MappedLabels, s)
if i >= 0 {
return 0, byte(i)
if i, ok := mappedLabelIndex[s]; ok {
return 0, i
}
return len(s), 0
}