Fix lexer bug treating non-Latin Unicode digits as digits.

Fixes https://github.com/prometheus/prometheus/issues/939
2025-03-05 20:59:13 -08:00 · 2015-07-29 02:11:13 +02:00 · 2015-07-29 02:11:13 +02:00 · 4e4b468fba
parent 0d393de63b
commit 4e4b468fba
2 changed files with 15 additions and 4 deletions
--- a/promql/lex.go
+++ b/promql/lex.go
@ -16,7 +16,6 @@ package promql
 import (
 	"fmt"
 	"strings"
-	"unicode"
 	"unicode/utf8"
 )

@ -458,7 +457,7 @@ func lexStatements(l *lexer) stateFn {
 		} else {
 			l.emit(itemGTR)
 		}
-	case unicode.IsDigit(r) || (r == '.' && unicode.IsDigit(l.peek())):
+	case isDigit(r) || (r == '.' && isDigit(l.peek())):
 		l.backup()
 		return lexNumberOrDuration
 	case r == '"' || r == '\'':
@ -569,7 +568,7 @@ func lexValueSequence(l *lexer) stateFn {
 		l.emit(itemTimes)
 	case r == '_':
 		l.emit(itemBlank)
-	case unicode.IsDigit(r) || (r == '.' && unicode.IsDigit(l.peek())):
+	case isDigit(r) || (r == '.' && isDigit(l.peek())):
 		l.backup()
 		lexNumber(l)
 	case isAlpha(r):
@ -739,7 +738,14 @@ func isEndOfLine(r rune) bool {

 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
 func isAlphaNumeric(r rune) bool {
-	return isAlpha(r) || unicode.IsDigit(r)
+	return isAlpha(r) || isDigit(r)
+}
+
+// isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit()
+// instead because that also classifies non-Latin digits as digits. See
+// https://github.com/prometheus/prometheus/issues/939.
+func isDigit(r rune) bool {
+	return '0' <= r && r <= '9'
 }

 // isAlpha reports whether r is an alphabetic or underscore.
--- a/promql/lex_test.go
+++ b/promql/lex_test.go
@ -99,6 +99,11 @@ var tests = []struct {
 		input:    "0x123",
 		expected: []item{{itemNumber, 0, "0x123"}},
 	},
+	{
+		// See https://github.com/prometheus/prometheus/issues/939.
+		input: ".٩",
+		fail:  true,
+	},
 	// Test duration.
 	{
 		input:    "5s",