diff --git a/pkg/textparse/lex.l b/pkg/textparse/lex.l index c8963640c..c79094dca 100644 --- a/pkg/textparse/lex.l +++ b/pkg/textparse/lex.l @@ -23,22 +23,22 @@ import ( "github.com/prometheus/prometheus/pkg/value" ) +const ( + lstateInit = iota + lstateName + lstateValue + lstateTimestamp + lstateLabels + lstateLName + lstateLValue + lstateLValueIn +) // Lex is called by the parser generated by "go tool yacc" to obtain each // token. The method is opened before the matching rules block and closed at // the end of the file. func (l *lexer) Lex() int { - const ( - lstateInit = iota - lstateName - lstateValue - lstateTimestamp - lstateLabels - lstateLName - lstateLValue - lstateLValueIn - ) - s := lstateInit + l.state = lstateInit if l.i >= len(l.b) { return eof @@ -60,7 +60,7 @@ M [a-zA-Z_:] %yyc c %yyn c = l.next() -%yyt s +%yyt l.state %% @@ -69,29 +69,29 @@ M [a-zA-Z_:] #[^\r\n]*\n l.mstart = l.i [\r\n \t]+ l.mstart = l.i -{S}({M}|{D})* s = lstateName +{S}({M}|{D})* l.state = lstateName l.offsets = append(l.offsets, l.i) l.mend = l.i -([ \t]*)\{ s = lstateLabels +([ \t]*)\{ l.state = lstateLabels -[ \t]+ s = lstateValue +[ \t]+ l.state = lstateValue l.vstart = l.i [ \t]+ -,?\} s = lstateValue +,?\} l.state = lstateValue l.mend = l.i -(,?[ \t]*) s = lstateLName +(,?[ \t]*) l.state = lstateLName l.offsets = append(l.offsets, l.i) {S}({L}|{D})* l.offsets = append(l.offsets, l.i) -[ \t]*= s = lstateLValue +[ \t]*= l.state = lstateLValue [ \t]+ -\" s = lstateLValueIn +\" l.state = lstateLValueIn l.offsets = append(l.offsets, l.i) -(\\.|[^\\"]|\0)*\" s = lstateLabels +(\\.|[^\\"])*\" l.state = lstateLabels if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) { l.err = fmt.Errorf("Invalid UTF-8 label value.") return -1 @@ -100,7 +100,7 @@ M [a-zA-Z_:] [ \t]+ l.vstart = l.i (NaN) l.val = math.Float64frombits(value.NormalNaN) - s = lstateTimestamp + l.state = lstateTimestamp [^\n \t\r]+ // We don't parse strictly correct floats as the conversion // repeats the effort anyway. @@ -108,7 +108,7 @@ M [a-zA-Z_:] if l.err != nil { return -1 } - s = lstateTimestamp + l.state = lstateTimestamp [ \t]+ l.tstart = l.i {D}+ ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64) diff --git a/pkg/textparse/lex.l.go b/pkg/textparse/lex.l.go index cf7978048..00f204faa 100644 --- a/pkg/textparse/lex.l.go +++ b/pkg/textparse/lex.l.go @@ -24,21 +24,22 @@ import ( "github.com/prometheus/prometheus/pkg/value" ) +const ( + lstateInit = iota + lstateName + lstateValue + lstateTimestamp + lstateLabels + lstateLName + lstateLValue + lstateLValueIn +) + // Lex is called by the parser generated by "go tool yacc" to obtain each // token. The method is opened before the matching rules block and closed at // the end of the file. func (l *lexer) Lex() int { - const ( - lstateInit = iota - lstateName - lstateValue - lstateTimestamp - lstateLabels - lstateLName - lstateLValue - lstateLValueIn - ) - s := lstateInit + l.state = lstateInit if l.i >= len(l.b) { return eof @@ -51,7 +52,7 @@ func (l *lexer) Lex() int { yystate0: - switch yyt := s; yyt { + switch yyt := l.state; yyt { default: panic(fmt.Errorf(`invalid start condition %d`, yyt)) case 0: // start condition: INITIAL @@ -386,22 +387,26 @@ yystate33: yystart33: switch { default: - goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' + goto yyabort case c == '"': goto yystate35 case c == '\\': goto yystate36 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate34 } yystate34: c = l.next() switch { default: - goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' + goto yyabort case c == '"': goto yystate35 case c == '\\': goto yystate36 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate34 } yystate35: @@ -433,19 +438,19 @@ yyrule3: // [\r\n \t]+ } yyrule4: // {S}({M}|{D})* { - s = lstateName + l.state = lstateName l.offsets = append(l.offsets, l.i) l.mend = l.i goto yystate0 } yyrule5: // ([ \t]*)\{ { - s = lstateLabels + l.state = lstateLabels goto yystate0 } yyrule6: // [ \t]+ { - s = lstateValue + l.state = lstateValue l.vstart = l.i goto yystate0 } @@ -454,13 +459,13 @@ yyrule7: // [ \t]+ goto yystate0 yyrule8: // ,?\} { - s = lstateValue + l.state = lstateValue l.mend = l.i goto yystate0 } yyrule9: // (,?[ \t]*) { - s = lstateLName + l.state = lstateLName l.offsets = append(l.offsets, l.i) goto yystate0 } @@ -471,7 +476,7 @@ yyrule10: // {S}({L}|{D})* } yyrule11: // [ \t]*= { - s = lstateLValue + l.state = lstateLValue goto yystate0 } yyrule12: // [ \t]+ @@ -479,13 +484,13 @@ yyrule12: // [ \t]+ goto yystate0 yyrule13: // \" { - s = lstateLValueIn + l.state = lstateLValueIn l.offsets = append(l.offsets, l.i) goto yystate0 } -yyrule14: // (\\.|[^\\"]|\0)*\" +yyrule14: // (\\.|[^\\"])*\" { - s = lstateLabels + l.state = lstateLabels if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) { l.err = fmt.Errorf("Invalid UTF-8 label value.") return -1 @@ -501,7 +506,7 @@ yyrule15: // [ \t]+ yyrule16: // (NaN) { l.val = math.Float64frombits(value.NormalNaN) - s = lstateTimestamp + l.state = lstateTimestamp goto yystate0 } yyrule17: // [^\n \t\r]+ @@ -512,7 +517,7 @@ yyrule17: // [^\n \t\r]+ if l.err != nil { return -1 } - s = lstateTimestamp + l.state = lstateTimestamp goto yystate0 } yyrule18: // [ \t]+ diff --git a/pkg/textparse/parse.go b/pkg/textparse/parse.go index 1792cecf2..b9f7ed6a2 100644 --- a/pkg/textparse/parse.go +++ b/pkg/textparse/parse.go @@ -38,6 +38,8 @@ type lexer struct { offsets []int mstart, mend int nextMstart int + + state int } const eof = 0 @@ -49,6 +51,11 @@ func (l *lexer) next() byte { return eof } c := l.b[l.i] + + // Consume null byte when encountered in label-value. + if c == eof && (l.state == lstateLValueIn || l.state == lstateLValue) { + return l.next() + } return c } diff --git a/pkg/textparse/parse_test.go b/pkg/textparse/parse_test.go index b11a9e8a5..b1ef9cca0 100644 --- a/pkg/textparse/parse_test.go +++ b/pkg/textparse/parse_test.go @@ -163,6 +163,10 @@ func TestParseErrors(t *testing.T) { input: "a true\n", err: "strconv.ParseFloat: parsing \"true\": invalid syntax", }, + { + input: "something_weird{problem=\"", + err: "no token found", + }, } for _, c := range cases { @@ -174,6 +178,60 @@ func TestParseErrors(t *testing.T) { } } +func TestNullByteHandling(t *testing.T) { + cases := []struct { + input string + err string + }{ + { + input: "null_byte_metric{a=\"abc\x00\"} 1", + err: "", + }, + { + input: "a{b=\"\x00ss\"} 1\n", + err: "", + }, + { + input: "a{b=\"\x00\"} 1\n", + err: "", + }, + { + input: "a{b=\"\x00\"} 1\n", + err: "", + }, + { + input: "a{b=\x00\"ssss\"} 1\n", + err: "no token found", + }, + { + input: "a{b=\"\x00", + err: "no token found", + }, + { + input: "a{b\x00=\"hiih\"} 1", + err: "no token found", + }, + { + input: "a\x00{b=\"ddd\"} 1", + err: "no token found", + }, + } + + for _, c := range cases { + p := New([]byte(c.input)) + for p.Next() { + } + + if c.err == "" { + require.NoError(t, p.Err()) + continue + } + + require.Error(t, p.Err()) + require.Equal(t, c.err, p.Err().Error()) + } +} + const ( testdataSampleCount = 410 ) diff --git a/promql/fuzz-data/ParseMetric/corpus/982cbe5ad899f03c630b1a21876a206707ea3dc9 b/promql/fuzz-data/ParseMetric/corpus/982cbe5ad899f03c630b1a21876a206707ea3dc9 new file mode 100644 index 000000000..bdbf8f202 --- /dev/null +++ b/promql/fuzz-data/ParseMetric/corpus/982cbe5ad899f03c630b1a21876a206707ea3dc9 @@ -0,0 +1 @@ +o { quantile = "1.0", a = "b" } 8.3835e-05 diff --git a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_5 b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_5 new file mode 100644 index 000000000..bb9f16973 --- /dev/null +++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_5 @@ -0,0 +1 @@ +go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 diff --git a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_6 b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_6 new file mode 100644 index 000000000..8b91128ed --- /dev/null +++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_6 @@ -0,0 +1 @@ +go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05 diff --git a/promql/fuzz-data/ParseMetric/corpus/exposition_formats_7 b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_7 new file mode 100644 index 000000000..760ea69b5 --- /dev/null +++ b/promql/fuzz-data/ParseMetric/corpus/exposition_formats_7 @@ -0,0 +1 @@ +go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05 diff --git a/promql/fuzz.go b/promql/fuzz.go index e52ccfb25..40ab846dd 100644 --- a/promql/fuzz.go +++ b/promql/fuzz.go @@ -16,6 +16,8 @@ package promql +import "github.com/prometheus/prometheus/pkg/textparse" + // PromQL parser fuzzing instrumentation for use with // https://github.com/dvyukov/go-fuzz. // @@ -48,8 +50,11 @@ const ( // Note that his is not the parser for the text-based exposition-format; that // lives in github.com/prometheus/client_golang/text. func FuzzParseMetric(in []byte) int { - _, err := ParseMetric(string(in)) - if err == nil { + p := textparse.New(in) + for p.Next() { + } + + if p.Err() == nil { return fuzzInteresting }