Refactor toNormalisedLower: shorter and slightly faster. (#14299)

Refactor toNormalisedLower: shorter and slightly faster Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
2025-03-05 20:59:13 -08:00 · 2024-06-18 11:57:37 +02:00 · 2024-06-18 11:57:37 +02:00 · 4f78cc809c
parent 6572b1fe63
commit 4f78cc809c
5 changed files with 18 additions and 35 deletions
--- a/model/labels/labels_common.go
+++ b/model/labels/labels_common.go
@ -18,6 +18,7 @@ import (
 	"encoding/json"
 	"slices"
 	"strconv"
+	"unsafe"

 	"github.com/prometheus/common/model"
 )
@ -215,3 +216,7 @@ func contains(s []Label, n string) bool {
 	}
 	return false
 }
+
+func yoloString(b []byte) string {
+	return *((*string)(unsafe.Pointer(&b)))
+}
--- a/model/labels/labels_dedupelabels.go
+++ b/model/labels/labels_dedupelabels.go
@ -20,7 +20,6 @@ import (
 	"slices"
 	"strings"
 	"sync"
-	"unsafe"

 	"github.com/cespare/xxhash/v2"
 )
@ -426,10 +425,6 @@ func EmptyLabels() Labels {
 	return Labels{}
 }

-func yoloString(b []byte) string {
-	return *((*string)(unsafe.Pointer(&b)))
-}
-
 // New returns a sorted Labels from the given labels.
 // The caller has to guarantee that all label names are unique.
 // Note this function is not efficient; should not be used in performance-critical places.
--- a/model/labels/labels_stringlabels.go
+++ b/model/labels/labels_stringlabels.go
@ -299,11 +299,6 @@ func Equal(ls, o Labels) bool {
 func EmptyLabels() Labels {
 	return Labels{}
 }
-
-func yoloString(b []byte) string {
-	return *((*string)(unsafe.Pointer(&b)))
-}
-
 func yoloBytes(s string) (b []byte) {
 	*(*string)(unsafe.Pointer(&b)) = s
 	(*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s)
--- a/model/labels/regexp.go
+++ b/model/labels/regexp.go
@ -798,39 +798,23 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
 // toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
 // it to lower case.
 func toNormalisedLower(s string) string {
-	// Check if the string is all ASCII chars and convert any upper case character to lower case character.
-	isASCII := true
-	var (
-		b   strings.Builder
-		pos int
-	)
-	b.Grow(len(s))
+	var buf []byte
 	for i := 0; i < len(s); i++ {
 		c := s[i]
-		if isASCII && c >= utf8.RuneSelf {
-			isASCII = false
-			break
+		if c >= utf8.RuneSelf {
+			return strings.Map(unicode.ToLower, norm.NFKD.String(s))
 		}
 		if 'A' <= c && c <= 'Z' {
-			c += 'a' - 'A'
-			if pos < i {
-				b.WriteString(s[pos:i])
+			if buf == nil {
+				buf = []byte(s)
 			}
-			b.WriteByte(c)
-			pos = i + 1
+			buf[i] = c + 'a' - 'A'
 		}
 	}
-	if pos < len(s) {
-		b.WriteString(s[pos:])
+	if buf == nil {
+		return s
 	}
-
-	// Optimize for ASCII-only strings. In this case we don't have to do any normalization.
-	if isASCII {
-		return b.String()
-	}
-
-	// Normalise and convert to lower.
-	return strings.Map(unicode.ToLower, norm.NFKD.String(b.String()))
+	return yoloString(buf)
 }

 // anyStringWithoutNewlineMatcher is a stringMatcher which matches any string
--- a/model/labels/regexp_test.go
+++ b/model/labels/regexp_test.go
@ -1209,6 +1209,10 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch
 func TestToNormalisedLower(t *testing.T) {
 	testCases := map[string]string{
 		"foo":                      "foo",
+		"FOO":                      "foo",
+		"Foo":                      "foo",
+		"foO":                      "foo",
+		"fOo":                      "foo",
 		"AAAAAAAAAAAAAAAAAAAAAAAA": "aaaaaaaaaaaaaaaaaaaaaaaa",
 		"cccccccccccccccccccccccC": "cccccccccccccccccccccccc",
 		"ſſſſſſſſſſſſſſſſſſſſſſſſS": "sssssssssssssssssssssssss",