Support escape sequences in strings and add raw strings.

This adapts some functionality from the Go standard library for string literal lexing and unquoting/unescaping. The following string types are now supported: Double- or single-quoted strings: These support all escape sequences that Go supports in double-quoted string literals. The difference is that Prometheus also has single-quoted strings (instead of single-quoted runes in Go). Raw newlines are not allowed. Backtick-quoted raw strings: Strings quoted in backticks are treated as raw strings just like in Go and may contain raw newlines and other special characters directly. Fixes https://github.com/prometheus/prometheus/issues/1122 Fixes https://github.com/prometheus/prometheus/issues/1121
2024-11-09 23:24:05 -08:00 · 2015-09-30 21:27:08 +02:00 · 2015-09-30 21:27:08 +02:00 · 5df52e2874
parent c5f927bd65
commit 5df52e2874
6 changed files with 161 additions and 30 deletions
--- a/promql/lex.go
+++ b/promql/lex.go
@ -16,6 +16,7 @@ package promql
 import (
 	"fmt"
 	"strings"
+	"unicode"
 	"unicode/utf8"
 )

@ -465,6 +466,9 @@ func lexStatements(l *lexer) stateFn {
 	case r == '"' || r == '\'':
 		l.stringOpen = r
 		return lexString
+	case r == '`':
+		l.stringOpen = r
+		return lexRawString
 	case isAlpha(r) || r == ':':
 		l.backup()
 		return lexKeywordOrIdentifier
@ -523,6 +527,9 @@ func lexInsideBraces(l *lexer) stateFn {
 	case r == '"' || r == '\'':
 		l.stringOpen = r
 		return lexString
+	case r == '`':
+		l.stringOpen = r
+		return lexRawString
 	case r == '=':
 		if l.next() == '~' {
 			l.emit(itemEQLRegex)
@ -583,16 +590,79 @@ func lexValueSequence(l *lexer) stateFn {
 	return lexValueSequence
 }

+// lexEscape scans a string escape sequence. The initial escaping character (\)
+// has already been seen.
+//
+// NOTE: This function as well as the helper function digitVal() and associated
+// tests have been adapted from the corresponding functions in the "go/scanner"
+// package of the Go standard library to work for Prometheus-style strings.
+// None of the actual escaping/quoting logic was changed in this function - it
+// was only modified to integrate with our lexer.
+func lexEscape(l *lexer) {
+	var n int
+	var base, max uint32
+
+	ch := l.next()
+	switch ch {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
+		return
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		n, base, max = 3, 8, 255
+	case 'x':
+		ch = l.next()
+		n, base, max = 2, 16, 255
+	case 'u':
+		ch = l.next()
+		n, base, max = 4, 16, unicode.MaxRune
+	case 'U':
+		ch = l.next()
+		n, base, max = 8, 16, unicode.MaxRune
+	case eof:
+		l.errorf("escape sequence not terminated")
+	default:
+		l.errorf("unknown escape sequence %#U", ch)
+	}
+
+	var x uint32
+	for n > 0 {
+		d := uint32(digitVal(ch))
+		if d >= base {
+			if ch == eof {
+				l.errorf("escape sequence not terminated")
+			}
+			l.errorf("illegal character %#U in escape sequence", ch)
+		}
+		x = x*base + d
+		ch = l.next()
+		n--
+	}
+
+	if x > max || 0xD800 <= x && x < 0xE000 {
+		l.errorf("escape sequence is an invalid Unicode code point")
+	}
+}
+
+// digitVal returns the digit value of a rune or 16 in case the rune does not
+// represent a valid digit.
+func digitVal(ch rune) int {
+	switch {
+	case '0' <= ch && ch <= '9':
+		return int(ch - '0')
+	case 'a' <= ch && ch <= 'f':
+		return int(ch - 'a' + 10)
+	case 'A' <= ch && ch <= 'F':
+		return int(ch - 'A' + 10)
+	}
+	return 16 // Larger than any legal digit val.
+}
+
 // lexString scans a quoted string. The initial quote has already been seen.
 func lexString(l *lexer) stateFn {
 Loop:
 	for {
 		switch l.next() {
 		case '\\':
-			if r := l.next(); r != eof && r != '\n' {
-				break
-			}
-			fallthrough
+			lexEscape(l)
 		case eof, '\n':
 			return l.errorf("unterminated quoted string")
 		case l.stringOpen:
@ -603,6 +673,21 @@ Loop:
 	return lexStatements
 }

+// lexRawString scans a raw quoted string. The initial quote has already been seen.
+func lexRawString(l *lexer) stateFn {
+Loop:
+	for {
+		switch l.next() {
+		case eof:
+			return l.errorf("unterminated raw string")
+		case l.stringOpen:
+			break Loop
+		}
+	}
+	l.emit(itemString)
+	return lexStatements
+}
+
 // lexSpace scans a run of space characters. One space has already been seen.
 func lexSpace(l *lexer) stateFn {
 	for isSpace(l.peek()) {
--- a/promql/parse.go
+++ b/promql/parse.go
@ -43,9 +43,9 @@ type ParseErr struct {

 func (e *ParseErr) Error() string {
 	if e.Line == 0 {
-		return fmt.Sprintf("Parse error at char %d: %s", e.Pos, e.Err)
+		return fmt.Sprintf("parse error at char %d: %s", e.Pos, e.Err)
 	}
-	return fmt.Sprintf("Parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err)
+	return fmt.Sprintf("parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err)
 }

 // ParseStmts parses the input and returns the resulting statements or any ocurring error.
@ -401,21 +401,21 @@ Loop:
 				p.errorf("summary must not be defined twice")
 			}
 			hasSum = true
-			sum = trimOne(p.expect(itemString, ctx).val)
+			sum = p.unquoteString(p.expect(itemString, ctx).val)

 		case itemDescription:
 			if hasDesc {
 				p.errorf("description must not be defined twice")
 			}
 			hasDesc = true
-			desc = trimOne(p.expect(itemString, ctx).val)
+			desc = p.unquoteString(p.expect(itemString, ctx).val)

 		case itemRunbook:
 			if hasRunbook {
 				p.errorf("runbook must not be defined twice")
 			}
 			hasRunbook = true
-			runbook = trimOne(p.expect(itemString, ctx).val)
+			runbook = p.unquoteString(p.expect(itemString, ctx).val)

 		default:
 			p.backup()
@ -654,8 +654,7 @@ func (p *parser) primaryExpr() Expr {
 		return &NumberLiteral{model.SampleValue(f)}

 	case t.typ == itemString:
-		s := t.val[1 : len(t.val)-1]
-		return &StringLiteral{s}
+		return &StringLiteral{p.unquoteString(t.val)}

 	case t.typ == itemLeftBrace:
 		// Metric selector without metric name.
@ -843,7 +842,7 @@ func (p *parser) labelMatchers(operators ...itemType) metric.LabelMatchers {
 			p.errorf("operator must be one of %q, is %q", operators, op)
 		}

-		val := trimOne(p.expect(itemString, ctx).val)
+		val := p.unquoteString(p.expect(itemString, ctx).val)

 		// Map the item to the respective match type.
 		var matchType metric.MatchType
@ -1104,6 +1103,14 @@ func (p *parser) checkType(node Node) (typ model.ValueType) {
 	return
 }

+func (p *parser) unquoteString(s string) string {
+	unquoted, err := strutil.Unquote(s)
+	if err != nil {
+		p.errorf("error unquoting string %q: %s", s, err)
+	}
+	return unquoted
+}
+
 func parseDuration(ds string) (time.Duration, error) {
 	dur, err := strutil.StringToDuration(ds)
 	if err != nil {
@ -1114,14 +1121,3 @@ func parseDuration(ds string) (time.Duration, error) {
 	}
 	return dur, nil
 }
-
-// trimOne removes the first and last character from a string.
-func trimOne(s string) string {
-	if len(s) > 0 {
-		s = s[1:]
-	}
-	if len(s) > 0 {
-		s = s[:len(s)-1]
-	}
-	return s
-}
--- a/promql/parse_test.go
+++ b/promql/parse_test.go
@ -1016,6 +1016,54 @@ var testExpr = []struct {
 		fail:   true,
 		errMsg: `no valid expression found`,
 	},
+	// String quoting and escape sequence interpretation tests.
+	{
+		input: `"double-quoted string \" with escaped quote"`,
+		expected: &StringLiteral{
+			Val: "double-quoted string \" with escaped quote",
+		},
+	}, {
+		input: `'single-quoted string \' with escaped quote'`,
+		expected: &StringLiteral{
+			Val: "single-quoted string ' with escaped quote",
+		},
+	}, {
+		input: "`backtick-quoted string`",
+		expected: &StringLiteral{
+			Val: "backtick-quoted string",
+		},
+	}, {
+		input: `"\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺"`,
+		expected: &StringLiteral{
+			Val: "\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺",
+		},
+	}, {
+		input: `'\a\b\f\n\r\t\v\\\' - \xFF\377\u1234\U00010111\U0001011111☺'`,
+		expected: &StringLiteral{
+			Val: "\a\b\f\n\r\t\v\\' - \xFF\377\u1234\U00010111\U0001011111☺",
+		},
+	}, {
+		input: "`" + `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺` + "`",
+		expected: &StringLiteral{
+			Val: `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺`,
+		},
+	}, {
+		input:  "`\\``",
+		fail:   true,
+		errMsg: "could not parse remaining input",
+	}, {
+		input:  `"\`,
+		fail:   true,
+		errMsg: "escape sequence not terminated",
+	}, {
+		input:  `"\c"`,
+		fail:   true,
+		errMsg: "unknown escape sequence U+0063 'c'",
+	}, {
+		input:  `"\x."`,
+		fail:   true,
+		errMsg: "illegal character U+002E '.' in escape sequence",
+	},
 }

 func TestParseExpressions(t *testing.T) {
--- a/util/strutil/quote.go
+++ b/util/strutil/quote.go
@ -28,7 +28,9 @@ var ErrSyntax = errors.New("invalid syntax")
 // NOTE: This function as well as the necessary helper functions below
 // (unquoteChar, contains, unhex) and associated tests have been adapted from
 // the corresponding functions in the "strconv" package of the Go standard
-// library to work for Prometheus-style strings.
+// library to work for Prometheus-style strings. Go's special-casing for single
+// quotes was removed and single quoted strings are now treated the same as
+// double quoted ones.
 func Unquote(s string) (t string, err error) {
 	n := len(s)
 	if n < 2 {
@ -103,7 +105,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
 		return rune(s[0]), false, s[1:], nil
 	}

-	// hard case: c is backslash
+	// Hard case: c is backslash.
 	if len(s) <= 1 {
 		err = ErrSyntax
 		return
@ -151,7 +153,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
 		}
 		s = s[n:]
 		if c == 'x' {
-			// single-byte string, possibly not UTF-8
+			// Single-byte string, possibly not UTF-8.
 			value = v
 			break
 		}
@ -167,7 +169,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
 			err = ErrSyntax
 			return
 		}
-		for j := 0; j < 2; j++ { // one digit already; two more
+		for j := 0; j < 2; j++ { // One digit already; two more.
 			x := rune(s[j]) - '0'
 			if x < 0 || x > 7 {
 				err = ErrSyntax
--- a/util/strutil/quote_test.go
+++ b/util/strutil/quote_test.go
@ -110,7 +110,7 @@ func TestUnquote(t *testing.T) {
 		}
 	}

-	// run the quote tests too, backward
+	// Run the quote tests too, backward.
 	for _, tt := range quotetests {
 		if in, err := Unquote(tt.out); in != tt.in {
 			t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in)
--- a/web/api/legacy/api_test.go
+++ b/web/api/legacy/api_test.go
@ -53,7 +53,7 @@ func TestQuery(t *testing.T) {
 		{
 			queryStr: "",
 			status:   http.StatusOK,
-			bodyRe:   `{"type":"error","value":"Parse error at char 1: no expression found in input","version":1}`,
+			bodyRe:   `{"type":"error","value":"parse error at char 1: no expression found in input","version":1}`,
 		},
 		{
 			queryStr: "expr=1.4",
@ -83,7 +83,7 @@ func TestQuery(t *testing.T) {
 		{
 			queryStr: "expr=(badexpression",
 			status:   http.StatusOK,
-			bodyRe:   `{"type":"error","value":"Parse error at char 15: unclosed left parenthesis","version":1}`,
+			bodyRe:   `{"type":"error","value":"parse error at char 15: unclosed left parenthesis","version":1}`,
 		},
 	}