Refactor toNormalisedLower: shorter and slightly faster. (#14299)

Refactor toNormalisedLower: shorter and slightly faster

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
This commit is contained in:
Oleg Zaytsev 2024-06-18 11:57:37 +02:00 committed by GitHub
parent 6572b1fe63
commit 4f78cc809c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 18 additions and 35 deletions

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"slices"
"strconv"
"unsafe"
"github.com/prometheus/common/model"
)
@ -215,3 +216,7 @@ func contains(s []Label, n string) bool {
}
return false
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}

View file

@ -20,7 +20,6 @@ import (
"slices"
"strings"
"sync"
"unsafe"
"github.com/cespare/xxhash/v2"
)
@ -426,10 +425,6 @@ func EmptyLabels() Labels {
return Labels{}
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
// New returns a sorted Labels from the given labels.
// The caller has to guarantee that all label names are unique.
// Note this function is not efficient; should not be used in performance-critical places.

View file

@ -299,11 +299,6 @@ func Equal(ls, o Labels) bool {
func EmptyLabels() Labels {
return Labels{}
}
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
func yoloBytes(s string) (b []byte) {
*(*string)(unsafe.Pointer(&b)) = s
(*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s)

View file

@ -798,39 +798,23 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
// it to lower case.
func toNormalisedLower(s string) string {
// Check if the string is all ASCII chars and convert any upper case character to lower case character.
isASCII := true
var (
b strings.Builder
pos int
)
b.Grow(len(s))
var buf []byte
for i := 0; i < len(s); i++ {
c := s[i]
if isASCII && c >= utf8.RuneSelf {
isASCII = false
break
if c >= utf8.RuneSelf {
return strings.Map(unicode.ToLower, norm.NFKD.String(s))
}
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
if pos < i {
b.WriteString(s[pos:i])
if buf == nil {
buf = []byte(s)
}
b.WriteByte(c)
pos = i + 1
buf[i] = c + 'a' - 'A'
}
}
if pos < len(s) {
b.WriteString(s[pos:])
if buf == nil {
return s
}
// Optimize for ASCII-only strings. In this case we don't have to do any normalization.
if isASCII {
return b.String()
}
// Normalise and convert to lower.
return strings.Map(unicode.ToLower, norm.NFKD.String(b.String()))
return yoloString(buf)
}
// anyStringWithoutNewlineMatcher is a stringMatcher which matches any string

View file

@ -1209,6 +1209,10 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch
func TestToNormalisedLower(t *testing.T) {
testCases := map[string]string{
"foo": "foo",
"FOO": "foo",
"Foo": "foo",
"foO": "foo",
"fOo": "foo",
"AAAAAAAAAAAAAAAAAAAAAAAA": "aaaaaaaaaaaaaaaaaaaaaaaa",
"cccccccccccccccccccccccC": "cccccccccccccccccccccccc",
"ſſſſſſſſſſſſſſſſſſſſſſſſS": "sssssssssssssssssssssssss",