diff --git a/model/labels/regexp.go b/model/labels/regexp.go index f35dc76f60..79e340984a 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -16,6 +16,7 @@ package labels import ( "slices" "strings" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -827,8 +828,7 @@ type zeroOrOneCharacterStringMatcher struct { } func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { - // Zero or one. - if len(s) > 1 { + if moreThanOneRune(s) { return false } @@ -840,6 +840,27 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { return s[0] != '\n' } +// moreThanOneRune returns true if there are more than one runes in the string. +// It doesn't check whether the string is valid UTF-8. +// The return value should be always equal to utf8.RuneCountInString(s) > 1, +// but the function is optimized for the common case where the string prefix is ASCII. +func moreThanOneRune(s string) bool { + // If len(s) is exactly one or zero, there can't be more than one rune. + // Exit through this path quickly. + if len(s) <= 1 { + return false + } + + // There's one or more bytes: + // If first byte is ASCII then there are multiple runes if there are more bytes after that. + if s[0] < utf8.RuneSelf { + return len(s) > 1 + } + + // Less common case: first is a multibyte rune. + return utf8.RuneCountInString(s) > 1 +} + // trueMatcher is a stringMatcher which matches any string (always returns true). type trueMatcher struct{} diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 3a15b52b40..47d3eeb4a2 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -84,7 +84,7 @@ var ( "foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "", "FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", - "foofoo0", "foofoo", + "foofoo0", "foofoo", "😀foo0", // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",