textparse: Add fuzzing and fix bug caught

See https://github.com/cznic/golex/issues/11 for info on the bug

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
This commit is contained in:
Goutham Veeramachaneni 2017-07-07 10:29:38 +02:00
parent 24e9deac71
commit b7eddbcd98
9 changed files with 128 additions and 49 deletions

View file

@ -23,22 +23,22 @@ import (
"github.com/prometheus/prometheus/pkg/value" "github.com/prometheus/prometheus/pkg/value"
) )
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
// Lex is called by the parser generated by "go tool yacc" to obtain each // Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at // token. The method is opened before the matching rules block and closed at
// the end of the file. // the end of the file.
func (l *lexer) Lex() int { func (l *lexer) Lex() int {
const ( l.state = lstateInit
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
s := lstateInit
if l.i >= len(l.b) { if l.i >= len(l.b) {
return eof return eof
@ -60,7 +60,7 @@ M [a-zA-Z_:]
%yyc c %yyc c
%yyn c = l.next() %yyn c = l.next()
%yyt s %yyt l.state
%% %%
@ -69,29 +69,29 @@ M [a-zA-Z_:]
#[^\r\n]*\n l.mstart = l.i #[^\r\n]*\n l.mstart = l.i
[\r\n \t]+ l.mstart = l.i [\r\n \t]+ l.mstart = l.i
{S}({M}|{D})* s = lstateName {S}({M}|{D})* l.state = lstateName
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
l.mend = l.i l.mend = l.i
<lstateName>([ \t]*)\{ s = lstateLabels <lstateName>([ \t]*)\{ l.state = lstateLabels
<lstateName>[ \t]+ s = lstateValue <lstateName>[ \t]+ l.state = lstateValue
l.vstart = l.i l.vstart = l.i
<lstateLabels>[ \t]+ <lstateLabels>[ \t]+
<lstateLabels>,?\} s = lstateValue <lstateLabels>,?\} l.state = lstateValue
l.mend = l.i l.mend = l.i
<lstateLabels>(,?[ \t]*) s = lstateLName <lstateLabels>(,?[ \t]*) l.state = lstateLName
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
<lstateLName>{S}({L}|{D})* l.offsets = append(l.offsets, l.i) <lstateLName>{S}({L}|{D})* l.offsets = append(l.offsets, l.i)
<lstateLName>[ \t]*= s = lstateLValue <lstateLName>[ \t]*= l.state = lstateLValue
<lstateLValue>[ \t]+ <lstateLValue>[ \t]+
<lstateLValue>\" s = lstateLValueIn <lstateLValue>\" l.state = lstateLValueIn
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
<lstateLValueIn>(\\.|[^\\"]|\0)*\" s = lstateLabels <lstateLValueIn>(\\.|[^\\"])*\" l.state = lstateLabels
if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) { if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) {
l.err = fmt.Errorf("Invalid UTF-8 label value.") l.err = fmt.Errorf("Invalid UTF-8 label value.")
return -1 return -1
@ -100,7 +100,7 @@ M [a-zA-Z_:]
<lstateValue>[ \t]+ l.vstart = l.i <lstateValue>[ \t]+ l.vstart = l.i
<lstateValue>(NaN) l.val = math.Float64frombits(value.NormalNaN) <lstateValue>(NaN) l.val = math.Float64frombits(value.NormalNaN)
s = lstateTimestamp l.state = lstateTimestamp
<lstateValue>[^\n \t\r]+ // We don't parse strictly correct floats as the conversion <lstateValue>[^\n \t\r]+ // We don't parse strictly correct floats as the conversion
// repeats the effort anyway. // repeats the effort anyway.
@ -108,7 +108,7 @@ M [a-zA-Z_:]
if l.err != nil { if l.err != nil {
return -1 return -1
} }
s = lstateTimestamp l.state = lstateTimestamp
<lstateTimestamp>[ \t]+ l.tstart = l.i <lstateTimestamp>[ \t]+ l.tstart = l.i
<lstateTimestamp>{D}+ ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64) <lstateTimestamp>{D}+ ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64)

View file

@ -24,21 +24,22 @@ import (
"github.com/prometheus/prometheus/pkg/value" "github.com/prometheus/prometheus/pkg/value"
) )
const (
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
// Lex is called by the parser generated by "go tool yacc" to obtain each // Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at // token. The method is opened before the matching rules block and closed at
// the end of the file. // the end of the file.
func (l *lexer) Lex() int { func (l *lexer) Lex() int {
const ( l.state = lstateInit
lstateInit = iota
lstateName
lstateValue
lstateTimestamp
lstateLabels
lstateLName
lstateLValue
lstateLValueIn
)
s := lstateInit
if l.i >= len(l.b) { if l.i >= len(l.b) {
return eof return eof
@ -51,7 +52,7 @@ func (l *lexer) Lex() int {
yystate0: yystate0:
switch yyt := s; yyt { switch yyt := l.state; yyt {
default: default:
panic(fmt.Errorf(`invalid start condition %d`, yyt)) panic(fmt.Errorf(`invalid start condition %d`, yyt))
case 0: // start condition: INITIAL case 0: // start condition: INITIAL
@ -386,22 +387,26 @@ yystate33:
yystart33: yystart33:
switch { switch {
default: default:
goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' goto yyabort
case c == '"': case c == '"':
goto yystate35 goto yystate35
case c == '\\': case c == '\\':
goto yystate36 goto yystate36
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate34
} }
yystate34: yystate34:
c = l.next() c = l.next()
switch { switch {
default: default:
goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' goto yyabort
case c == '"': case c == '"':
goto yystate35 goto yystate35
case c == '\\': case c == '\\':
goto yystate36 goto yystate36
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate34
} }
yystate35: yystate35:
@ -433,19 +438,19 @@ yyrule3: // [\r\n \t]+
} }
yyrule4: // {S}({M}|{D})* yyrule4: // {S}({M}|{D})*
{ {
s = lstateName l.state = lstateName
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
l.mend = l.i l.mend = l.i
goto yystate0 goto yystate0
} }
yyrule5: // ([ \t]*)\{ yyrule5: // ([ \t]*)\{
{ {
s = lstateLabels l.state = lstateLabels
goto yystate0 goto yystate0
} }
yyrule6: // [ \t]+ yyrule6: // [ \t]+
{ {
s = lstateValue l.state = lstateValue
l.vstart = l.i l.vstart = l.i
goto yystate0 goto yystate0
} }
@ -454,13 +459,13 @@ yyrule7: // [ \t]+
goto yystate0 goto yystate0
yyrule8: // ,?\} yyrule8: // ,?\}
{ {
s = lstateValue l.state = lstateValue
l.mend = l.i l.mend = l.i
goto yystate0 goto yystate0
} }
yyrule9: // (,?[ \t]*) yyrule9: // (,?[ \t]*)
{ {
s = lstateLName l.state = lstateLName
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
goto yystate0 goto yystate0
} }
@ -471,7 +476,7 @@ yyrule10: // {S}({L}|{D})*
} }
yyrule11: // [ \t]*= yyrule11: // [ \t]*=
{ {
s = lstateLValue l.state = lstateLValue
goto yystate0 goto yystate0
} }
yyrule12: // [ \t]+ yyrule12: // [ \t]+
@ -479,13 +484,13 @@ yyrule12: // [ \t]+
goto yystate0 goto yystate0
yyrule13: // \" yyrule13: // \"
{ {
s = lstateLValueIn l.state = lstateLValueIn
l.offsets = append(l.offsets, l.i) l.offsets = append(l.offsets, l.i)
goto yystate0 goto yystate0
} }
yyrule14: // (\\.|[^\\"]|\0)*\" yyrule14: // (\\.|[^\\"])*\"
{ {
s = lstateLabels l.state = lstateLabels
if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) { if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) {
l.err = fmt.Errorf("Invalid UTF-8 label value.") l.err = fmt.Errorf("Invalid UTF-8 label value.")
return -1 return -1
@ -501,7 +506,7 @@ yyrule15: // [ \t]+
yyrule16: // (NaN) yyrule16: // (NaN)
{ {
l.val = math.Float64frombits(value.NormalNaN) l.val = math.Float64frombits(value.NormalNaN)
s = lstateTimestamp l.state = lstateTimestamp
goto yystate0 goto yystate0
} }
yyrule17: // [^\n \t\r]+ yyrule17: // [^\n \t\r]+
@ -512,7 +517,7 @@ yyrule17: // [^\n \t\r]+
if l.err != nil { if l.err != nil {
return -1 return -1
} }
s = lstateTimestamp l.state = lstateTimestamp
goto yystate0 goto yystate0
} }
yyrule18: // [ \t]+ yyrule18: // [ \t]+

View file

@ -38,6 +38,8 @@ type lexer struct {
offsets []int offsets []int
mstart, mend int mstart, mend int
nextMstart int nextMstart int
state int
} }
const eof = 0 const eof = 0
@ -49,6 +51,11 @@ func (l *lexer) next() byte {
return eof return eof
} }
c := l.b[l.i] c := l.b[l.i]
// Consume null byte when encountered in label-value.
if c == eof && (l.state == lstateLValueIn || l.state == lstateLValue) {
return l.next()
}
return c return c
} }

View file

@ -163,6 +163,10 @@ func TestParseErrors(t *testing.T) {
input: "a true\n", input: "a true\n",
err: "strconv.ParseFloat: parsing \"true\": invalid syntax", err: "strconv.ParseFloat: parsing \"true\": invalid syntax",
}, },
{
input: "something_weird{problem=\"",
err: "no token found",
},
} }
for _, c := range cases { for _, c := range cases {
@ -174,6 +178,60 @@ func TestParseErrors(t *testing.T) {
} }
} }
func TestNullByteHandling(t *testing.T) {
cases := []struct {
input string
err string
}{
{
input: "null_byte_metric{a=\"abc\x00\"} 1",
err: "",
},
{
input: "a{b=\"\x00ss\"} 1\n",
err: "",
},
{
input: "a{b=\"\x00\"} 1\n",
err: "",
},
{
input: "a{b=\"\x00\"} 1\n",
err: "",
},
{
input: "a{b=\x00\"ssss\"} 1\n",
err: "no token found",
},
{
input: "a{b=\"\x00",
err: "no token found",
},
{
input: "a{b\x00=\"hiih\"} 1",
err: "no token found",
},
{
input: "a\x00{b=\"ddd\"} 1",
err: "no token found",
},
}
for _, c := range cases {
p := New([]byte(c.input))
for p.Next() {
}
if c.err == "" {
require.NoError(t, p.Err())
continue
}
require.Error(t, p.Err())
require.Equal(t, c.err, p.Err().Error())
}
}
const ( const (
testdataSampleCount = 410 testdataSampleCount = 410
) )

View file

@ -0,0 +1 @@
o { quantile = "1.0", a = "b" } 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05

View file

@ -0,0 +1 @@
go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05

View file

@ -16,6 +16,8 @@
package promql package promql
import "github.com/prometheus/prometheus/pkg/textparse"
// PromQL parser fuzzing instrumentation for use with // PromQL parser fuzzing instrumentation for use with
// https://github.com/dvyukov/go-fuzz. // https://github.com/dvyukov/go-fuzz.
// //
@ -48,8 +50,11 @@ const (
// Note that his is not the parser for the text-based exposition-format; that // Note that his is not the parser for the text-based exposition-format; that
// lives in github.com/prometheus/client_golang/text. // lives in github.com/prometheus/client_golang/text.
func FuzzParseMetric(in []byte) int { func FuzzParseMetric(in []byte) int {
_, err := ParseMetric(string(in)) p := textparse.New(in)
if err == nil { for p.Next() {
}
if p.Err() == nil {
return fuzzInteresting return fuzzInteresting
} }