Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 17 additions & 27 deletions crates/squawk_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,43 +246,22 @@ impl Cursor<'_> {
'b' | 'B' => {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
let trailing_junk_start = self.pos_within_token();
self.eat_identifier();
return LiteralKind::Int {
base,
empty_int: true,
trailing_junk_start,
};
}
let has_digits = self.eat_decimal_digits();
return self.finish_base_prefixed_int(base, has_digits);
}
// https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L402
'o' | 'O' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
let trailing_junk_start = self.pos_within_token();
self.eat_identifier();
return LiteralKind::Int {
base,
empty_int: true,
trailing_junk_start,
};
}
let has_digits = self.eat_decimal_digits();
return self.finish_base_prefixed_int(base, has_digits);
}
// https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L401
'x' | 'X' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
let trailing_junk_start = self.pos_within_token();
self.eat_identifier();
return LiteralKind::Int {
base,
empty_int: true,
trailing_junk_start,
};
}
let has_digits = self.eat_hexadecimal_digits();
return self.finish_base_prefixed_int(base, has_digits);
}
// Not a base prefix; consume additional digits.
'0'..='9' | '_' => {
Expand Down Expand Up @@ -466,6 +445,17 @@ impl Cursor<'_> {
has_digits
}

fn finish_base_prefixed_int(&mut self, base: Base, has_digits: bool) -> LiteralKind {
let trailing_junk_start = self.pos_within_token();
self.eat_while(is_ident_cont);
let has_trailing_junk = self.pos_within_token() > trailing_junk_start;
LiteralKind::Int {
base,
empty_int: !has_digits && !has_trailing_junk,
trailing_junk_start,
}
}

fn eat_hexadecimal_digits(&mut self) -> bool {
let mut has_digits = false;
loop {
Expand Down
96 changes: 88 additions & 8 deletions crates/squawk_parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,8 @@ impl<'a> LexedStr<'a> {
// Some(self.error[err].msg.as_str())
// }

pub fn errors(&self) -> impl Iterator<Item = (ops::Range<u32>, &str)> + '_ {
self.error
.iter()
.map(|it| (it.range.clone(), it.msg.as_str()))
pub fn errors(&self) -> impl Iterator<Item = (&ops::Range<u32>, &str)> + '_ {
self.error.iter().map(|it| (&it.range, it.msg.as_str()))
}

fn push(&mut self, kind: SyntaxKind, offset: usize) {
Expand Down Expand Up @@ -242,14 +240,32 @@ impl<'a> Converter<'a> {
let syntax_kind = match *kind {
squawk_lexer::LiteralKind::Int {
empty_int,
base: _,
base,
trailing_junk_start,
} => {
if empty_int {
err = Some("Missing digits after the integer base prefix".into());
} else if (trailing_junk_start as usize) < token_text.len() {
err = Some("trailing junk after numeric literal".into());
err_range = Some(trailing_junk_start..token_text.len() as u32);
} else {
if matches!(base, squawk_lexer::Base::Binary | squawk_lexer::Base::Octal) {
let prefix_len = 2u32;
let digits = &token_text[prefix_len as usize..trailing_junk_start as usize];
let base = base as u32;
let token_start = self.offset as u32;
for (i, c) in digits.char_indices() {
if c != '_' && c.to_digit(base).is_none() {
let start = token_start + prefix_len + i as u32;
let end = start + c.len_utf8() as u32;
self.res.error.push(LexError {
msg: format!("invalid digit for a base {base} literal"),
range: start..end,
});
}
}
}
if (trailing_junk_start as usize) < token_text.len() {
err = Some("trailing junk after numeric literal".into());
err_range = Some(trailing_junk_start..token_text.len() as u32);
}
}
SyntaxKind::INT_NUMBER
}
Expand Down Expand Up @@ -364,6 +380,70 @@ mod tests {
");
}

#[test]
fn empty_int_with_trailing_ident_error() {
assert_snapshot!(lex("select 0xg;"), @"
error: trailing junk after numeric literal
╭▸
1 │ select 0xg;
╰╴ ━
");
}

#[test]
fn invalid_octal_digits_error() {
assert_snapshot!(lex("select 0o999;"), @"
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o999;
╰╴ ━
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o999;
╰╴ ━
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o999;
╰╴ ━
");
}

#[test]
fn invalid_binary_digits_error() {
assert_snapshot!(lex("select 0b234;"), @"
error: invalid digit for a base 2 literal
╭▸
1 │ select 0b234;
╰╴ ━
error: invalid digit for a base 2 literal
╭▸
1 │ select 0b234;
╰╴ ━
error: invalid digit for a base 2 literal
╭▸
1 │ select 0b234;
╰╴ ━
");
}

#[test]
fn invalid_octal_digits_after_valid_error() {
assert_snapshot!(lex("select 0o7889;"), @"
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o7889;
╰╴ ━
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o7889;
╰╴ ━
error: invalid digit for a base 8 literal
╭▸
1 │ select 0o7889;
╰╴ ━
");
}

#[test]
fn empty_exponent_error() {
assert_snapshot!(lex("select 1e;"), @"
Expand Down
4 changes: 4 additions & 0 deletions crates/squawk_parser/tests/data/err/select_literal.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ SELECT 0o0x;
SELECT 0x;
SELECT 1x;
SELECT 0x0y;
SELECT 0o999;
SELECT 0o7889;
SELECT 0b234;
SELECT 0b101_010_;
SELECT 100_;
SELECT 100__000;
SELECT _1_000.5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,46 @@ SOURCE_FILE
INT_NUMBER "0x0y"
SEMICOLON ";"
WHITESPACE "\n"
SELECT
SELECT_CLAUSE
SELECT_KW "SELECT"
WHITESPACE " "
TARGET_LIST
TARGET
LITERAL
INT_NUMBER "0o999"
SEMICOLON ";"
WHITESPACE "\n"
SELECT
SELECT_CLAUSE
SELECT_KW "SELECT"
WHITESPACE " "
TARGET_LIST
TARGET
LITERAL
INT_NUMBER "0o7889"
SEMICOLON ";"
WHITESPACE "\n"
SELECT
SELECT_CLAUSE
SELECT_KW "SELECT"
WHITESPACE " "
TARGET_LIST
TARGET
LITERAL
INT_NUMBER "0b234"
SEMICOLON ";"
WHITESPACE "\n"
SELECT
SELECT_CLAUSE
SELECT_KW "SELECT"
WHITESPACE " "
TARGET_LIST
TARGET
LITERAL
INT_NUMBER "0b101_010_"
SEMICOLON ";"
WHITESPACE "\n"
SELECT
SELECT_CLAUSE
SELECT_KW "SELECT"
Expand Down Expand Up @@ -342,39 +382,79 @@ error[syntax-error]: trailing junk after numeric literal
╭▸
18 │ SELECT 0x0y;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
19 │ SELECT 0o999;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
19 │ SELECT 0o999;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
19 │ SELECT 0o999;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
20 │ SELECT 0o7889;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
20 │ SELECT 0o7889;
╰╴ ━
error[syntax-error]: invalid digit for a base 8 literal
╭▸
20 │ SELECT 0o7889;
╰╴ ━
error[syntax-error]: invalid digit for a base 2 literal
╭▸
21 │ SELECT 0b234;
╰╴ ━
error[syntax-error]: invalid digit for a base 2 literal
╭▸
21 │ SELECT 0b234;
╰╴ ━
error[syntax-error]: invalid digit for a base 2 literal
╭▸
21 │ SELECT 0b234;
╰╴ ━
error[syntax-error]: trailing junk after numeric literal
╭▸
22 │ SELECT 0b101_010_;
╰╴ ━
error[syntax-error]: trailing junk after numeric literal
╭▸
19 │ SELECT 100_;
23 │ SELECT 100_;
╰╴ ━
error[syntax-error]: trailing junk after numeric literal
╭▸
20 │ SELECT 100__000;
24 │ SELECT 100__000;
╰╴ ━━━━━
error[syntax-error]: missing comma
╭▸
21 │ SELECT _1_000.5;
25 │ SELECT _1_000.5;
╰╴ ━
error[syntax-error]: trailing junk after numeric literal
╭▸
22 │ SELECT 1_000_.5;
26 │ SELECT 1_000_.5;
╰╴ ━
error[syntax-error]: missing comma
╭▸
22 │ SELECT 1_000_.5;
26 │ SELECT 1_000_.5;
╰╴ ━
error[syntax-error]: trailing junk after numeric literal
╭▸
23 │ SELECT 1_000._5;
27 │ SELECT 1_000._5;
╰╴ ━━
error[syntax-error]: trailing junk after numeric literal
╭▸
24 │ SELECT 1_000.5_;
28 │ SELECT 1_000.5_;
╰╴ ━
error[syntax-error]: Missing digits after the exponent symbol
╭▸
25 │ SELECT 1_000.5e_1;
29 │ SELECT 1_000.5e_1;
╰╴ ━
error[syntax-error]: trailing junk after positional parameter
╭▸
26 │ SELECT $0_1;
30 │ SELECT $0_1;
╰╴ ━━
Loading