parser: support underscores

Support underscores in numbers, namely, decimals, hexadecimals, and
exponentials.

Fixes #12769
Signed-off-by: Pranshu Srivastava <rexagod@gmail.com>

Signed-off-by: Pranshu Srivastava <rexagod@gmail.com>
This commit is contained in:
Pranshu Srivastava 2023-09-10 04:27:20 +05:30
parent b6ef745016
commit 3c9e3ee552
No known key found for this signature in database
GPG key ID: 63938388A4528764
4 changed files with 155 additions and 13 deletions

View file

@ -313,6 +313,11 @@ func (l *Lexer) accept(valid string) bool {
return false
}
// is peeks and returns true if the next rune is contained in the provided string.
func (l *Lexer) is(valid string) bool {
return strings.ContainsRune(valid, l.peek())
}
// acceptRun consumes a run of runes from the valid set.
func (l *Lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
@ -901,19 +906,78 @@ func acceptRemainingDuration(l *Lexer) bool {
// scanNumber scans numbers of different formats. The scanned Item is
// not necessarily a valid number. This case is caught by the parser.
func (l *Lexer) scanNumber() bool {
digits := "0123456789"
// Modify the digit pattern if the number is hexadecimal.
digitPattern := "0123456789"
// Disallow hexadecimal in series descriptions as the syntax is ambiguous.
if !l.seriesDesc && l.accept("0") && l.accept("xX") {
digits = "0123456789abcdefABCDEF"
if !l.seriesDesc &&
l.accept("0") && l.accept("xX") {
l.accept("_") // eg., 0X_1FFFP-16 == 0.1249847412109375
digitPattern = "0123456789abcdefABCDEF"
}
l.acceptRun(digits)
if l.accept(".") {
l.acceptRun(digits)
}
if l.accept("eE") {
l.accept("+-")
l.acceptRun("0123456789")
const (
// Define dot, exponent, and underscore patterns.
dotPattern = "."
exponentPattern = "eE"
underscorePattern = "_"
// Anti-patterns are rune sets that cannot follow their respective rune.
dotAntiPattern = "_."
exponentAntiPattern = "._eE" // and EOL.
underscoreAntiPattern = "._eE" // and EOL.
)
// All numbers follow the prefix: [.][d][d._eE]*
l.accept(dotPattern)
l.accept(digitPattern)
// [d._eE]* hereon.
dotConsumed := false
exponentConsumed := false
for l.is(digitPattern + dotPattern + underscorePattern + exponentPattern) {
// "." cannot repeat.
if l.is(dotPattern) {
if dotConsumed {
l.accept(dotPattern)
return false
}
}
// "eE" cannot repeat.
if l.is(exponentPattern) {
if exponentConsumed {
l.accept(exponentPattern)
return false
}
}
// Handle dots.
if l.accept(dotPattern) {
dotConsumed = true
if l.accept(dotAntiPattern) {
return false
}
// Fractional hexadecimal literals are not allowed.
if len(digitPattern) > 10 /* 0x[\da-fA-F].[\d]+p[\d] */ {
return false
}
continue
}
// Handle exponents.
if l.accept(exponentPattern) {
exponentConsumed = true
l.accept("+-")
if l.accept(exponentAntiPattern) || l.peek() == eof {
return false
}
continue
}
// Handle underscores.
if l.accept(underscorePattern) {
if l.accept(underscoreAntiPattern) || l.peek() == eof {
return false
}
continue
}
// Handle digits at the end since we already consumed before this loop.
l.acceptRun(digitPattern)
}
// Next thing must not be alphanumeric unless it's the times token
// for series repetitions.
if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) {

View file

@ -132,6 +132,84 @@ var tests = []struct {
}, {
input: "0x123",
expected: []Item{{NUMBER, 0, "0x123"}},
}, {
input: "1..2",
fail: true,
}, {
input: "1.2.",
fail: true,
}, {
input: "00_1_23_4.56_7_8",
expected: []Item{{NUMBER, 0, "00_1_23_4.56_7_8"}},
}, {
input: "00_1_23__4.56_7_8",
fail: true,
}, {
input: "00_1_23_4._56_7_8",
fail: true,
}, {
input: "00_1_23_4_.56_7_8",
fail: true,
}, {
input: "0x1_2_34",
expected: []Item{{NUMBER, 0, "0x1_2_34"}},
}, {
input: "0x1_2__34",
fail: true,
}, {
input: "0x1_2__34.5_6p1", // "0x1.1p1"-based formats are not supported yet.
fail: true,
}, {
input: "0x1_2__34.5_6",
fail: true,
}, {
input: "0x1_2__34.56",
fail: true,
}, {
input: "1_e2",
fail: true,
}, {
input: "1.e2",
expected: []Item{{NUMBER, 0, "1.e2"}},
}, {
input: "1e.2",
fail: true,
}, {
input: "1e+.2",
fail: true,
}, {
input: "1ee2",
fail: true,
}, {
input: "1e+e2",
fail: true,
}, {
input: "1e",
fail: true,
}, {
input: "1e+",
fail: true,
}, {
input: "1e1_2_34",
expected: []Item{{NUMBER, 0, "1e1_2_34"}},
}, {
input: "1e_1_2_34",
fail: true,
}, {
input: "1e1_2__34",
fail: true,
}, {
input: "1e+_1_2_34",
fail: true,
}, {
input: "1e-_1_2_34",
fail: true,
}, {
input: "12_",
fail: true,
}, {
input: "_1_2",
expected: []Item{{IDENTIFIER, 0, "_1_2"}},
},
},
},

View file

@ -513,12 +513,12 @@ var testExpr = []struct {
{
input: "2.5.",
fail: true,
errMsg: "unexpected character: '.'",
errMsg: `1:1: parse error: bad number or duration syntax: "2.5."`,
},
{
input: "100..4",
fail: true,
errMsg: `unexpected number ".4"`,
errMsg: `1:1: parse error: bad number or duration syntax: "100.."`,
},
{
input: "0deadbeef",

View file

@ -261,7 +261,7 @@ NumberLiteral {
LineComment { "#" ![\n]* }
number {
(std.digit+ ("." std.digit*)? | "." std.digit+) (("e" | "E") ("+" | "-")? std.digit+)? |
(std.digit+ (("_")? std.digit)* ("." std.digit+ (("_")? std.digit)*)? | "." std.digit+ (("_")? std.digit)*) (("e" | "E") ("+" | "-")? std.digit+ (("_")? std.digit)*)? |
"0x" (std.digit | $[a-fA-F])+
}
StringLiteral { // TODO: This is for JS, make this work for PromQL.