Refactor toNormalisedLower: shorter and slightly faster. (#14299)

Refactor toNormalisedLower: shorter and slightly faster

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
This commit is contained in:
Oleg Zaytsev 2024-06-18 11:57:37 +02:00 committed by GitHub
parent 6572b1fe63
commit 4f78cc809c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 18 additions and 35 deletions

View file

@ -18,6 +18,7 @@ import (
"encoding/json" "encoding/json"
"slices" "slices"
"strconv" "strconv"
"unsafe"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
) )
@ -215,3 +216,7 @@ func contains(s []Label, n string) bool {
} }
return false return false
} }
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}

View file

@ -20,7 +20,6 @@ import (
"slices" "slices"
"strings" "strings"
"sync" "sync"
"unsafe"
"github.com/cespare/xxhash/v2" "github.com/cespare/xxhash/v2"
) )
@ -426,10 +425,6 @@ func EmptyLabels() Labels {
return Labels{} return Labels{}
} }
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
// New returns a sorted Labels from the given labels. // New returns a sorted Labels from the given labels.
// The caller has to guarantee that all label names are unique. // The caller has to guarantee that all label names are unique.
// Note this function is not efficient; should not be used in performance-critical places. // Note this function is not efficient; should not be used in performance-critical places.

View file

@ -299,11 +299,6 @@ func Equal(ls, o Labels) bool {
func EmptyLabels() Labels { func EmptyLabels() Labels {
return Labels{} return Labels{}
} }
func yoloString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
func yoloBytes(s string) (b []byte) { func yoloBytes(s string) (b []byte) {
*(*string)(unsafe.Pointer(&b)) = s *(*string)(unsafe.Pointer(&b)) = s
(*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s) (*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s)

View file

@ -798,39 +798,23 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert // toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
// it to lower case. // it to lower case.
func toNormalisedLower(s string) string { func toNormalisedLower(s string) string {
// Check if the string is all ASCII chars and convert any upper case character to lower case character. var buf []byte
isASCII := true
var (
b strings.Builder
pos int
)
b.Grow(len(s))
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
c := s[i] c := s[i]
if isASCII && c >= utf8.RuneSelf { if c >= utf8.RuneSelf {
isASCII = false return strings.Map(unicode.ToLower, norm.NFKD.String(s))
break
} }
if 'A' <= c && c <= 'Z' { if 'A' <= c && c <= 'Z' {
c += 'a' - 'A' if buf == nil {
if pos < i { buf = []byte(s)
b.WriteString(s[pos:i])
} }
b.WriteByte(c) buf[i] = c + 'a' - 'A'
pos = i + 1
} }
} }
if pos < len(s) { if buf == nil {
b.WriteString(s[pos:]) return s
} }
return yoloString(buf)
// Optimize for ASCII-only strings. In this case we don't have to do any normalization.
if isASCII {
return b.String()
}
// Normalise and convert to lower.
return strings.Map(unicode.ToLower, norm.NFKD.String(b.String()))
} }
// anyStringWithoutNewlineMatcher is a stringMatcher which matches any string // anyStringWithoutNewlineMatcher is a stringMatcher which matches any string

View file

@ -1209,6 +1209,10 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch
func TestToNormalisedLower(t *testing.T) { func TestToNormalisedLower(t *testing.T) {
testCases := map[string]string{ testCases := map[string]string{
"foo": "foo", "foo": "foo",
"FOO": "foo",
"Foo": "foo",
"foO": "foo",
"fOo": "foo",
"AAAAAAAAAAAAAAAAAAAAAAAA": "aaaaaaaaaaaaaaaaaaaaaaaa", "AAAAAAAAAAAAAAAAAAAAAAAA": "aaaaaaaaaaaaaaaaaaaaaaaa",
"cccccccccccccccccccccccC": "cccccccccccccccccccccccc", "cccccccccccccccccccccccC": "cccccccccccccccccccccccc",
"ſſſſſſſſſſſſſſſſſſſſſſſſS": "sssssssssssssssssssssssss", "ſſſſſſſſſſſſſſſſſſſſſſſſS": "sssssssssssssssssssssssss",