Check for invalid utf-8 in lexer strings.

This protects against invalid utf-8 sneaking
in via label_replace.
This commit is contained in:
Brian Brazil 2017-06-16 15:19:24 +01:00
parent c89f8753f5
commit 6f5d952132
3 changed files with 19 additions and 0 deletions

View file

@ -713,6 +713,8 @@ Loop:
switch l.next() {
case '\\':
lexEscape(l)
case utf8.RuneError:
return l.errorf("invalid UTF-8 rune")
case eof, '\n':
return l.errorf("unterminated quoted string")
case l.stringOpen:
@ -728,6 +730,8 @@ func lexRawString(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case utf8.RuneError:
return l.errorf("invalid UTF-8 rune")
case eof:
return l.errorf("unterminated raw string")
case l.stringOpen:

View file

@ -396,6 +396,13 @@ var tests = []struct {
}, {
input: `]`, fail: true,
},
// Test encoding issues.
{
input: "\"\xff\"", fail: true,
},
{
input: "`\xff`", fail: true,
},
// Test series description.
{
input: `{} _ 1 x .3`,

View file

@ -904,6 +904,10 @@ var testExpr = []struct {
// TODO(fabxc): willingly lexing wrong tokens allows for more precrise error
// messages from the parser - consider if this is an option.
errMsg: "unexpected character inside braces: '>'",
}, {
input: "some_metric{a=\"\xff\"}",
fail: true,
errMsg: "parse error at char 15: invalid UTF-8 rune",
}, {
input: `foo{gibberish}`,
fail: true,
@ -1373,6 +1377,10 @@ var testExpr = []struct {
input: "rate(some_metric)",
fail: true,
errMsg: "expected type range vector in call to function \"rate\", got instant vector",
}, {
input: "label_replace(a, `b`, `c\xff`, `d`, `.*`)",
fail: true,
errMsg: "parse error at char 23: invalid UTF-8 rune",
},
// Fuzzing regression tests.
{