textparse: Add fuzzing and fix bug caught

See https://github.com/cznic/golex/issues/11 for info on the bug Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
2025-03-05 20:59:13 -08:00 · 2017-07-07 10:29:38 +02:00 · 2017-07-07 10:29:38 +02:00 · b7eddbcd98
parent 24e9deac71
commit b7eddbcd98
9 changed files with 128 additions and 49 deletions
--- a/pkg/textparse/lex.l
+++ b/pkg/textparse/lex.l
@ -23,22 +23,22 @@ import (
        "github.com/prometheus/prometheus/pkg/value"
 )
 const (
    lstateInit = iota
    lstateName
    lstateValue
    lstateTimestamp
    lstateLabels
    lstateLName
    lstateLValue
    lstateLValueIn
 )
 // Lex is called by the parser generated by "go tool yacc" to obtain each
 // token. The method is opened before the matching rules block and closed at
 // the end of the file.
 func (l *lexer) Lex() int {
-    const (
+    l.state = lstateInit
        lstateInit = iota
        lstateName
        lstateValue
        lstateTimestamp
        lstateLabels
        lstateLName
        lstateLValue
        lstateLValueIn
    )
    s := lstateInit
    if l.i >= len(l.b) {
        return eof
@ -60,7 +60,7 @@ M     [a-zA-Z_:]
 %yyc c
 %yyn c = l.next()
-%yyt s
+%yyt l.state
 %%
@ -69,29 +69,29 @@ M     [a-zA-Z_:]
 #[^\r\n]*\n                           l.mstart = l.i
 [\r\n \t]+                            l.mstart = l.i
-{S}({M}|{D})*                         s = lstateName
+{S}({M}|{D})*                         l.state = lstateName
                                      l.offsets = append(l.offsets, l.i)
                                      l.mend = l.i
-<lstateName>([ \t]*)\{                 s = lstateLabels
+<lstateName>([ \t]*)\{                l.state = lstateLabels
-<lstateName>[ \t]+                    s = lstateValue
+<lstateName>[ \t]+                    l.state = lstateValue
                                      l.vstart = l.i
 <lstateLabels>[ \t]+
-<lstateLabels>,?\}                    s = lstateValue
+<lstateLabels>,?\}                    l.state = lstateValue
                                      l.mend = l.i
-<lstateLabels>(,?[ \t]*)              s = lstateLName
+<lstateLabels>(,?[ \t]*)              l.state = lstateLName
                                      l.offsets = append(l.offsets, l.i)
 <lstateLName>{S}({L}|{D})*            l.offsets = append(l.offsets, l.i)
-<lstateLName>[ \t]*=                  s = lstateLValue
+<lstateLName>[ \t]*=                  l.state = lstateLValue
 <lstateLValue>[ \t]+
-<lstateLValue>\"                      s = lstateLValueIn
+<lstateLValue>\"                      l.state = lstateLValueIn
                                      l.offsets = append(l.offsets, l.i)
-<lstateLValueIn>(\\.|[^\\"]|\0)*\"    s = lstateLabels
+<lstateLValueIn>(\\.|[^\\"])*\"       l.state = lstateLabels
                                      if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) {
                                        l.err = fmt.Errorf("Invalid UTF-8 label value.")
                                        return -1
@ -100,7 +100,7 @@ M     [a-zA-Z_:]
 <lstateValue>[ \t]+                   l.vstart = l.i
 <lstateValue>(NaN)                    l.val = math.Float64frombits(value.NormalNaN)
-                                      s = lstateTimestamp
+                                      l.state = lstateTimestamp
 <lstateValue>[^\n \t\r]+              // We don't parse strictly correct floats as the conversion
                                      // repeats the effort anyway.
@ -108,7 +108,7 @@ M     [a-zA-Z_:]
                                      if l.err != nil {
                                          return -1
                                      }
-                                      s = lstateTimestamp
+                                      l.state = lstateTimestamp
 <lstateTimestamp>[ \t]+               l.tstart = l.i         
 <lstateTimestamp>{D}+                 ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64)
--- a/pkg/textparse/lex.l.go
+++ b/pkg/textparse/lex.l.go
@ -24,21 +24,22 @@ import (
 	"github.com/prometheus/prometheus/pkg/value"
 )
 const (
 	lstateInit = iota
 	lstateName
 	lstateValue
 	lstateTimestamp
 	lstateLabels
 	lstateLName
 	lstateLValue
 	lstateLValueIn
 )
 // Lex is called by the parser generated by "go tool yacc" to obtain each
 // token. The method is opened before the matching rules block and closed at
 // the end of the file.
 func (l *lexer) Lex() int {
-	const (
+	l.state = lstateInit
 		lstateInit = iota
 		lstateName
 		lstateValue
 		lstateTimestamp
 		lstateLabels
 		lstateLName
 		lstateLValue
 		lstateLValueIn
 	)
 	s := lstateInit
 	if l.i >= len(l.b) {
 		return eof
@ -51,7 +52,7 @@ func (l *lexer) Lex() int {
 yystate0:
-	switch yyt := s; yyt {
+	switch yyt := l.state; yyt {
 	default:
 		panic(fmt.Errorf(`invalid start condition %d`, yyt))
 	case 0: // start condition: INITIAL
@ -386,22 +387,26 @@ yystate33:
 yystart33:
 	switch {
 	default:
-		goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ'
+		goto yyabort
 	case c == '"':
 		goto yystate35
 	case c == '\\':
 		goto yystate36
 	case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
 		goto yystate34
 	}
 yystate34:
 	c = l.next()
 	switch {
 	default:
-		goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ'
+		goto yyabort
 	case c == '"':
 		goto yystate35
 	case c == '\\':
 		goto yystate36
 	case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ':
 		goto yystate34
 	}
 yystate35:
@ -433,19 +438,19 @@ yyrule3: // [\r\n \t]+
 	}
 yyrule4: // {S}({M}|{D})*
 	{
-		s = lstateName
+		l.state = lstateName
 		l.offsets = append(l.offsets, l.i)
 		l.mend = l.i
 		goto yystate0
 	}
 yyrule5: // ([ \t]*)\{
 	{
-		s = lstateLabels
+		l.state = lstateLabels
 		goto yystate0
 	}
 yyrule6: // [ \t]+
 	{
-		s = lstateValue
+		l.state = lstateValue
 		l.vstart = l.i
 		goto yystate0
 	}
@ -454,13 +459,13 @@ yyrule7: // [ \t]+
 	goto yystate0
 yyrule8: // ,?\}
 	{
-		s = lstateValue
+		l.state = lstateValue
 		l.mend = l.i
 		goto yystate0
 	}
 yyrule9: // (,?[ \t]*)
 	{
-		s = lstateLName
+		l.state = lstateLName
 		l.offsets = append(l.offsets, l.i)
 		goto yystate0
 	}
@ -471,7 +476,7 @@ yyrule10: // {S}({L}|{D})*
 	}
 yyrule11: // [ \t]*=
 	{
-		s = lstateLValue
+		l.state = lstateLValue
 		goto yystate0
 	}
 yyrule12: // [ \t]+
@ -479,13 +484,13 @@ yyrule12: // [ \t]+
 	goto yystate0
 yyrule13: // \"
 	{
-		s = lstateLValueIn
+		l.state = lstateLValueIn
 		l.offsets = append(l.offsets, l.i)
 		goto yystate0
 	}
-yyrule14: // (\\.|[^\\"]|\0)*\"
+yyrule14: // (\\.|[^\\"])*\"
 	{
-		s = lstateLabels
+		l.state = lstateLabels
 		if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) {
 			l.err = fmt.Errorf("Invalid UTF-8 label value.")
 			return -1
@ -501,7 +506,7 @@ yyrule15: // [ \t]+
 yyrule16: // (NaN)
 	{
 		l.val = math.Float64frombits(value.NormalNaN)
-		s = lstateTimestamp
+		l.state = lstateTimestamp
 		goto yystate0
 	}
 yyrule17: // [^\n \t\r]+
@ -512,7 +517,7 @@ yyrule17: // [^\n \t\r]+
 		if l.err != nil {
 			return -1
 		}
-		s = lstateTimestamp
+		l.state = lstateTimestamp
 		goto yystate0
 	}
 yyrule18: // [ \t]+
--- a/pkg/textparse/parse.go
+++ b/pkg/textparse/parse.go
@ -38,6 +38,8 @@ type lexer struct {
 	offsets      []int
 	mstart, mend int
 	nextMstart   int
 	state int
 }
 const eof = 0
@ -49,6 +51,11 @@ func (l *lexer) next() byte {
 		return eof
 	}
 	c := l.b[l.i]
 	// Consume null byte when encountered in label-value.
 	if c == eof && (l.state == lstateLValueIn || l.state == lstateLValue) {
 		return l.next()
 	}
 	return c
 }
--- a/pkg/textparse/parse_test.go
+++ b/pkg/textparse/parse_test.go
@ -163,6 +163,10 @@ func TestParseErrors(t *testing.T) {
 			input: "a true\n",
 			err:   "strconv.ParseFloat: parsing \"true\": invalid syntax",
 		},
 		{
 			input: "something_weird{problem=\"",
 			err:   "no token found",
 		},
 	}
 	for _, c := range cases {
@ -174,6 +178,60 @@ func TestParseErrors(t *testing.T) {
 	}
 }
 func TestNullByteHandling(t *testing.T) {
 	cases := []struct {
 		input string
 		err   string
 	}{
 		{
 			input: "null_byte_metric{a=\"abc\x00\"} 1",
 			err:   "",
 		},
 		{
 			input: "a{b=\"\x00ss\"} 1\n",
 			err:   "",
 		},
 		{
 			input: "a{b=\"\x00\"} 1\n",
 			err:   "",
 		},
 		{
 			input: "a{b=\"\x00\"} 1\n",
 			err:   "",
 		},
 		{
 			input: "a{b=\x00\"ssss\"} 1\n",
 			err:   "no token found",
 		},
 		{
 			input: "a{b=\"\x00",
 			err:   "no token found",
 		},
 		{
 			input: "a{b\x00=\"hiih\"}	1",
 			err: "no token found",
 		},
 		{
 			input: "a\x00{b=\"ddd\"} 1",
 			err:   "no token found",
 		},
 	}
 	for _, c := range cases {
 		p := New([]byte(c.input))
 		for p.Next() {
 		}
 		if c.err == "" {
 			require.NoError(t, p.Err())
 			continue
 		}
 		require.Error(t, p.Err())
 		require.Equal(t, c.err, p.Err().Error())
 	}
 }
 const (
 	testdataSampleCount = 410
 )
--- a/promql/fuzz-data/ParseMetric/corpus/982cbe5ad899f03c630b1a21876a206707ea3dc9
+++ b/promql/fuzz-data/ParseMetric/corpus/982cbe5ad899f03c630b1a21876a206707ea3dc9
@ -0,0 +1 @@
 o { quantile = "1.0", a = "b" } 8.3835e-05
--- a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_5
+++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_5
@ -0,0 +1 @@
 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05
--- a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_6
+++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_6
@ -0,0 +1 @@
 go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05
--- a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_7
+++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_7
@ -0,0 +1 @@
 go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05
--- a/promql/fuzz.go
+++ b/promql/fuzz.go
@ -16,6 +16,8 @@
 package promql
 import "github.com/prometheus/prometheus/pkg/textparse"
 // PromQL parser fuzzing instrumentation for use with
 // https://github.com/dvyukov/go-fuzz.
 //
@ -48,8 +50,11 @@ const (
 // Note that his is not the parser for the text-based exposition-format; that
 // lives in github.com/prometheus/client_golang/text.
 func FuzzParseMetric(in []byte) int {
-	_, err := ParseMetric(string(in))
+	p := textparse.New(in)
-	if err == nil {
+	for p.Next() {
 	}
 	if p.Err() == nil {
 		return fuzzInteresting
 	}
		`@ -0,0 +1 @@`
							`go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05`
		`@ -0,0 +1 @@`
							`go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05`
		`@ -0,0 +1 @@`
							`go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05`