textparse: Add fuzzing and fix bug caught

See https://github.com/cznic/golex/issues/11 for info on the bug

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
This commit is contained in:
Goutham Veeramachaneni 2017-07-07 10:29:38 +02:00
parent 24e9deac71
commit b7eddbcd98
9 changed files with 128 additions and 49 deletions

View file

@ -23,22 +23,22 @@ import (
"github.com/prometheus/prometheus/pkg/value"
)
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (l *lexer) Lex() int {
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
s := lstateInit
l.state = lstateInit
if l.i >= len(l.b) {
return eof
@ -60,7 +60,7 @@ M [a-zA-Z_:]
%yyc c
%yyn c = l.next()
%yyt s
%yyt l.state
%%
@ -69,29 +69,29 @@ M [a-zA-Z_:]
#[^\r\n]*\n l.mstart = l.i
[\r\n \t]+ l.mstart = l.i
{S}({M}|{D})* s = lstateName
{S}({M}|{D})* l.state = lstateName
l.offsets = append(l.offsets, l.i)
l.mend = l.i
<lstateName>([ \t]*)\{ s = lstateLabels
<lstateName>([ \t]*)\{ l.state = lstateLabels
<lstateName>[ \t]+ s = lstateValue
<lstateName>[ \t]+ l.state = lstateValue
l.vstart = l.i
<lstateLabels>[ \t]+
<lstateLabels>,?\} s = lstateValue
<lstateLabels>,?\} l.state = lstateValue
l.mend = l.i
<lstateLabels>(,?[ \t]*) s = lstateLName
<lstateLabels>(,?[ \t]*) l.state = lstateLName
l.offsets = append(l.offsets, l.i)
<lstateLName>{S}({L}|{D})* l.offsets = append(l.offsets, l.i)
<lstateLName>[ \t]*= s = lstateLValue
<lstateLName>[ \t]*= l.state = lstateLValue
<lstateLValue>[ \t]+
<lstateLValue>\" s = lstateLValueIn
<lstateLValue>\" l.state = lstateLValueIn
l.offsets = append(l.offsets, l.i)
<lstateLValueIn>(\\.|[^\\"]|\0)*\" s = lstateLabels
<lstateLValueIn>(\\.|[^\\"])*\" l.state = lstateLabels
if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) {
l.err = fmt.Errorf("Invalid UTF-8 label value.")
return -1
@ -100,7 +100,7 @@ M [a-zA-Z_:]
<lstateValue>[ \t]+ l.vstart = l.i
<lstateValue>(NaN) l.val = math.Float64frombits(value.NormalNaN)
s = lstateTimestamp
l.state = lstateTimestamp
<lstateValue>[^\n \t\r]+ // We don't parse strictly correct floats as the conversion
// repeats the effort anyway.
@ -108,7 +108,7 @@ M [a-zA-Z_:]
if l.err != nil {
return -1
}
s = lstateTimestamp
l.state = lstateTimestamp
<lstateTimestamp>[ \t]+ l.tstart = l.i
<lstateTimestamp>{D}+ ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64)

View file

@ -24,21 +24,22 @@ import (
"github.com/prometheus/prometheus/pkg/value"
)
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (l *lexer) Lex() int {
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
s := lstateInit
l.state = lstateInit
if l.i >= len(l.b) {
return eof
@ -51,7 +52,7 @@ func (l *lexer) Lex() int {
yystate0:
switch yyt := s; yyt {
switch yyt := l.state; yyt {
default:
panic(fmt.Errorf(`invalid start condition %d`, yyt))
case 0: // start condition: INITIAL
@ -386,22 +387,26 @@ yystate33:
yystart33:
switch {
default:
goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ'
goto yyabort
case c == '"':
goto yystate35
case c == '\\':
goto yystate36
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate34
}
yystate34:
c = l.next()
switch {
default:
goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ'
goto yyabort
case c == '"':
goto yystate35
case c == '\\':
goto yystate36
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate34
}
yystate35:
@ -433,19 +438,19 @@ yyrule3: // [\r\n \t]+
}
yyrule4: // {S}({M}|{D})*
{
s = lstateName
l.state = lstateName
l.offsets = append(l.offsets, l.i)
l.mend = l.i
goto yystate0
}
yyrule5: // ([ \t]*)\{
{
s = lstateLabels
l.state = lstateLabels
goto yystate0
}
yyrule6: // [ \t]+
{
s = lstateValue
l.state = lstateValue
l.vstart = l.i
goto yystate0
}
@ -454,13 +459,13 @@ yyrule7: // [ \t]+
goto yystate0
yyrule8: // ,?\}
{
s = lstateValue
l.state = lstateValue
l.mend = l.i
goto yystate0
}
yyrule9: // (,?[ \t]*)
{
s = lstateLName
l.state = lstateLName
l.offsets = append(l.offsets, l.i)
goto yystate0
}
@ -471,7 +476,7 @@ yyrule10: // {S}({L}|{D})*
}
yyrule11: // [ \t]*=
{
s = lstateLValue
l.state = lstateLValue
goto yystate0
}
yyrule12: // [ \t]+
@ -479,13 +484,13 @@ yyrule12: // [ \t]+
goto yystate0
yyrule13: // \"
{
s = lstateLValueIn
l.state = lstateLValueIn
l.offsets = append(l.offsets, l.i)
goto yystate0
}
yyrule14: // (\\.|[^\\"]|\0)*\"
yyrule14: // (\\.|[^\\"])*\"
{
s = lstateLabels
l.state = lstateLabels
if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) {
l.err = fmt.Errorf("Invalid UTF-8 label value.")
return -1
@ -501,7 +506,7 @@ yyrule15: // [ \t]+
yyrule16: // (NaN)
{
l.val = math.Float64frombits(value.NormalNaN)
s = lstateTimestamp
l.state = lstateTimestamp
goto yystate0
}
yyrule17: // [^\n \t\r]+
@ -512,7 +517,7 @@ yyrule17: // [^\n \t\r]+
if l.err != nil {
return -1
}
s = lstateTimestamp
l.state = lstateTimestamp
goto yystate0
}
yyrule18: // [ \t]+

View file

@ -38,6 +38,8 @@ type lexer struct {
offsets []int
mstart, mend int
nextMstart int
state int
}
const eof = 0
@ -49,6 +51,11 @@ func (l *lexer) next() byte {
return eof
}
c := l.b[l.i]
// Consume null byte when encountered in label-value.
if c == eof && (l.state == lstateLValueIn || l.state == lstateLValue) {
return l.next()
}
return c
}

View file

@ -163,6 +163,10 @@ func TestParseErrors(t *testing.T) {
input: "a true\n",
err: "strconv.ParseFloat: parsing \"true\": invalid syntax",
},
{
input: "something_weird{problem=\"",
err: "no token found",
},
}
for _, c := range cases {
@ -174,6 +178,60 @@ func TestParseErrors(t *testing.T) {
}
}
func TestNullByteHandling(t *testing.T) {
cases := []struct {
input string
err string
}{
{
input: "null_byte_metric{a=\"abc\x00\"} 1",
err: "",
},
{
input: "a{b=\"\x00ss\"} 1\n",
err: "",
},
{
input: "a{b=\"\x00\"} 1\n",
err: "",
},
{
input: "a{b=\"\x00\"} 1\n",
err: "",
},
{
input: "a{b=\x00\"ssss\"} 1\n",
err: "no token found",
},
{
input: "a{b=\"\x00",
err: "no token found",
},
{
input: "a{b\x00=\"hiih\"} 1",
err: "no token found",
},
{
input: "a\x00{b=\"ddd\"} 1",
err: "no token found",
},
}
for _, c := range cases {
p := New([]byte(c.input))
for p.Next() {
}
if c.err == "" {
require.NoError(t, p.Err())
continue
}
require.Error(t, p.Err())
require.Equal(t, c.err, p.Err().Error())
}
}
const (
testdataSampleCount = 410
)

View file

@ -0,0 +1 @@
o { quantile = "1.0", a = "b" } 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05

View file

@ -16,6 +16,8 @@
package promql
import "github.com/prometheus/prometheus/pkg/textparse"
// PromQL parser fuzzing instrumentation for use with
// https://github.com/dvyukov/go-fuzz.
//
@ -48,8 +50,11 @@ const (
// Note that his is not the parser for the text-based exposition-format; that
// lives in github.com/prometheus/client_golang/text.
func FuzzParseMetric(in []byte) int {
_, err := ParseMetric(string(in))
if err == nil {
p := textparse.New(in)
for p.Next() {
}
if p.Err() == nil {
return fuzzInteresting
}