From 3967a72dac06fc2159bd4d94c0032dbb72998103 Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Fri, 22 May 2026 22:04:40 -0400 Subject: [PATCH] parser: warn about empty / invalid params & empty quoted idents --- crates/squawk_parser/src/lexed_str.rs | 28 +- .../tests/data/err/select_literal.sql | 6 + .../snapshots/tests__select_literal_err.snap | 72 +++++ ...ax__test__custom_operators_validation.snap | 250 +++++++++++++++++- crates/squawk_syntax/src/validation.rs | 10 + .../test_data/validation/custom_operators.sql | 5 + crates/xtask/src/sync_pg.rs | 1 + postgres/kwlist.h | 6 +- postgres/regression_suite/constraints.sql | 3 + postgres/regression_suite/create_am.sql | 2 +- 10 files changed, 376 insertions(+), 7 deletions(-) diff --git a/crates/squawk_parser/src/lexed_str.rs b/crates/squawk_parser/src/lexed_str.rs index 6c599ddc8..72ee80383 100644 --- a/crates/squawk_parser/src/lexed_str.rs +++ b/crates/squawk_parser/src/lexed_str.rs @@ -1,6 +1,6 @@ // based on https://github.com/rust-lang/rust-analyzer/blob/d8887c0758bbd2d5f752d5bd405d4491e90e7ed6/crates/parser/src/lexed_str.rs -use std::ops; +use std::{num::IntErrorKind, ops}; use squawk_lexer::tokenize; @@ -120,6 +120,18 @@ struct Converter<'a> { offset: usize, } +fn is_empty_quoted_ident(token_text: &str) -> bool { + let inner = if let Some(stripped) = token_text + .strip_prefix(['u', 'U']) + .and_then(|s| s.strip_prefix('&')) + { + stripped + } else { + token_text + }; + inner == "\"\"" +} + impl<'a> Converter<'a> { fn new(text: &'a str) -> Self { Self { @@ -213,7 +225,17 @@ impl<'a> Converter<'a> { squawk_lexer::TokenKind::PositionalParam { trailing_junk_start, } => { - if (*trailing_junk_start as usize) < token_text.len() { + let digits = &token_text[1..*trailing_junk_start as usize]; + if digits.is_empty() { + err = "missing parameter number"; + err_range = Some(0..1); + } else if digits + .parse::() + .is_err_and(|err| matches!(err.kind(), IntErrorKind::PosOverflow)) + { + err = "parameter number too large"; + err_range = Some(0..*trailing_junk_start); + } else if (*trailing_junk_start as usize) < token_text.len() { err = "trailing junk after positional parameter"; err_range = Some(*trailing_junk_start..token_text.len() as u32); } @@ -222,6 +244,8 @@ impl<'a> Converter<'a> { squawk_lexer::TokenKind::QuotedIdent { terminated } => { if !terminated { err = "Missing trailing \" to terminate the quoted identifier" + } else if is_empty_quoted_ident(token_text) { + err = "empty delimited identifier"; } SyntaxKind::IDENT } diff --git a/crates/squawk_parser/tests/data/err/select_literal.sql b/crates/squawk_parser/tests/data/err/select_literal.sql index 4576b57e8..a663a3825 100644 --- a/crates/squawk_parser/tests/data/err/select_literal.sql +++ b/crates/squawk_parser/tests/data/err/select_literal.sql @@ -28,3 +28,9 @@ SELECT 1_000._5; SELECT 1_000.5_; SELECT 1_000.5e_1; SELECT $0_1; +SELECT $2147483648; +-- empty placeholder +SELECT $; +SELECT $0111111111111111111111111111111111111111111111111111; +SELECT ""; +SELECT U&""; diff --git a/crates/squawk_parser/tests/snapshots/tests__select_literal_err.snap b/crates/squawk_parser/tests/snapshots/tests__select_literal_err.snap index f96d25098..c9cbeb990 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_literal_err.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_literal_err.snap @@ -309,6 +309,58 @@ SOURCE_FILE POSITIONAL_PARAM "$0_1" SEMICOLON ";" WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + POSITIONAL_PARAM "$2147483648" + SEMICOLON ";" + WHITESPACE "\n" + COMMENT "-- empty placeholder" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + POSITIONAL_PARAM "$" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + POSITIONAL_PARAM "$0111111111111111111111111111111111111111111111111111" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "\"\"" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "U&\"\"" + SEMICOLON ";" + WHITESPACE "\n" --- error[syntax-error]: trailing junk after numeric literal ╭▸ @@ -458,3 +510,23 @@ error[syntax-error]: trailing junk after positional parameter ╭▸ 30 │ SELECT $0_1; ╰╴ ━━ +error[syntax-error]: parameter number too large + ╭▸ +31 │ SELECT $2147483648; + ╰╴ ━━━━━━━━━━━ +error[syntax-error]: missing parameter number + ╭▸ +33 │ SELECT $; + ╰╴ ━ +error[syntax-error]: parameter number too large + ╭▸ +34 │ SELECT $0111111111111111111111111111111111111111111111111111; + ╰╴ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +error[syntax-error]: empty delimited identifier + ╭▸ +35 │ SELECT ""; + ╰╴ ━━ +error[syntax-error]: empty delimited identifier + ╭▸ +36 │ SELECT U&""; + ╰╴ ━━━━ diff --git a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__custom_operators_validation.snap b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__custom_operators_validation.snap index dd8d33a9e..c756e88be 100644 --- a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__custom_operators_validation.snap +++ b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__custom_operators_validation.snap @@ -2,7 +2,7 @@ source: crates/squawk_syntax/src/test.rs input_file: crates/squawk_syntax/test_data/validation/custom_operators.sql --- -SOURCE_FILE@0..108 +SOURCE_FILE@0..423 COMMENT@0..30 "-- disallowed prefix ..." WHITESPACE@30..31 "\n" SELECT@31..41 @@ -95,6 +95,246 @@ SOURCE_FILE@0..108 IDENT@105..106 "m" SEMICOLON@106..107 ";" WHITESPACE@107..108 "\n" + COMMENT@108..165 "-- operators may not ..." + WHITESPACE@165..166 "\n" + SELECT@166..240 + SELECT_CLAUSE@166..239 + SELECT_KW@166..172 "select" + WHITESPACE@172..173 " " + TARGET_LIST@173..239 + TARGET@173..239 + PREFIX_EXPR@173..239 + CUSTOM_OP@173..237 + PIPE@173..174 "|" + PIPE@174..175 "|" + PIPE@175..176 "|" + PIPE@176..177 "|" + PIPE@177..178 "|" + PIPE@178..179 "|" + PIPE@179..180 "|" + PIPE@180..181 "|" + PIPE@181..182 "|" + PIPE@182..183 "|" + PIPE@183..184 "|" + PIPE@184..185 "|" + PIPE@185..186 "|" + PIPE@186..187 "|" + PIPE@187..188 "|" + PIPE@188..189 "|" + PIPE@189..190 "|" + PIPE@190..191 "|" + PIPE@191..192 "|" + PIPE@192..193 "|" + PIPE@193..194 "|" + PIPE@194..195 "|" + PIPE@195..196 "|" + PIPE@196..197 "|" + PIPE@197..198 "|" + PIPE@198..199 "|" + PIPE@199..200 "|" + PIPE@200..201 "|" + PIPE@201..202 "|" + PIPE@202..203 "|" + PIPE@203..204 "|" + PIPE@204..205 "|" + PIPE@205..206 "|" + PIPE@206..207 "|" + PIPE@207..208 "|" + PIPE@208..209 "|" + PIPE@209..210 "|" + PIPE@210..211 "|" + PIPE@211..212 "|" + PIPE@212..213 "|" + PIPE@213..214 "|" + PIPE@214..215 "|" + PIPE@215..216 "|" + PIPE@216..217 "|" + PIPE@217..218 "|" + PIPE@218..219 "|" + PIPE@219..220 "|" + PIPE@220..221 "|" + PIPE@221..222 "|" + PIPE@222..223 "|" + PIPE@223..224 "|" + PIPE@224..225 "|" + PIPE@225..226 "|" + PIPE@226..227 "|" + PIPE@227..228 "|" + PIPE@228..229 "|" + PIPE@229..230 "|" + PIPE@230..231 "|" + PIPE@231..232 "|" + PIPE@232..233 "|" + PIPE@233..234 "|" + PIPE@234..235 "|" + PIPE@235..236 "|" + PIPE@236..237 "|" + WHITESPACE@237..238 " " + LITERAL@238..239 + INT_NUMBER@238..239 "1" + SEMICOLON@239..240 ";" + WHITESPACE@240..241 "\n" + SELECT@241..317 + SELECT_CLAUSE@241..316 + SELECT_KW@241..247 "select" + WHITESPACE@247..248 " " + TARGET_LIST@248..316 + TARGET@248..316 + BIN_EXPR@248..316 + LITERAL@248..249 + INT_NUMBER@248..249 "1" + WHITESPACE@249..250 " " + CUSTOM_OP@250..314 + PIPE@250..251 "|" + PIPE@251..252 "|" + PIPE@252..253 "|" + PIPE@253..254 "|" + PIPE@254..255 "|" + PIPE@255..256 "|" + PIPE@256..257 "|" + PIPE@257..258 "|" + PIPE@258..259 "|" + PIPE@259..260 "|" + PIPE@260..261 "|" + PIPE@261..262 "|" + PIPE@262..263 "|" + PIPE@263..264 "|" + PIPE@264..265 "|" + PIPE@265..266 "|" + PIPE@266..267 "|" + PIPE@267..268 "|" + PIPE@268..269 "|" + PIPE@269..270 "|" + PIPE@270..271 "|" + PIPE@271..272 "|" + PIPE@272..273 "|" + PIPE@273..274 "|" + PIPE@274..275 "|" + PIPE@275..276 "|" + PIPE@276..277 "|" + PIPE@277..278 "|" + PIPE@278..279 "|" + PIPE@279..280 "|" + PIPE@280..281 "|" + PIPE@281..282 "|" + PIPE@282..283 "|" + PIPE@283..284 "|" + PIPE@284..285 "|" + PIPE@285..286 "|" + PIPE@286..287 "|" + PIPE@287..288 "|" + PIPE@288..289 "|" + PIPE@289..290 "|" + PIPE@290..291 "|" + PIPE@291..292 "|" + PIPE@292..293 "|" + PIPE@293..294 "|" + PIPE@294..295 "|" + PIPE@295..296 "|" + PIPE@296..297 "|" + PIPE@297..298 "|" + PIPE@298..299 "|" + PIPE@299..300 "|" + PIPE@300..301 "|" + PIPE@301..302 "|" + PIPE@302..303 "|" + PIPE@303..304 "|" + PIPE@304..305 "|" + PIPE@305..306 "|" + PIPE@306..307 "|" + PIPE@307..308 "|" + PIPE@308..309 "|" + PIPE@309..310 "|" + PIPE@310..311 "|" + PIPE@311..312 "|" + PIPE@312..313 "|" + PIPE@313..314 "|" + WHITESPACE@314..315 " " + LITERAL@315..316 + INT_NUMBER@315..316 "2" + SEMICOLON@316..317 ";" + WHITESPACE@317..318 "\n" + COMMENT@318..346 "-- 63 chars is still ..." + WHITESPACE@346..347 "\n" + SELECT@347..422 + SELECT_CLAUSE@347..421 + SELECT_KW@347..353 "select" + WHITESPACE@353..354 " " + TARGET_LIST@354..421 + TARGET@354..421 + BIN_EXPR@354..421 + LITERAL@354..355 + INT_NUMBER@354..355 "1" + WHITESPACE@355..356 " " + CUSTOM_OP@356..419 + PLUS@356..357 "+" + PLUS@357..358 "+" + PLUS@358..359 "+" + PLUS@359..360 "+" + PLUS@360..361 "+" + PLUS@361..362 "+" + PLUS@362..363 "+" + PLUS@363..364 "+" + PLUS@364..365 "+" + PLUS@365..366 "+" + PLUS@366..367 "+" + PLUS@367..368 "+" + PLUS@368..369 "+" + PLUS@369..370 "+" + PLUS@370..371 "+" + PLUS@371..372 "+" + PLUS@372..373 "+" + PLUS@373..374 "+" + PLUS@374..375 "+" + PLUS@375..376 "+" + PLUS@376..377 "+" + PLUS@377..378 "+" + PLUS@378..379 "+" + PLUS@379..380 "+" + PLUS@380..381 "+" + PLUS@381..382 "+" + PLUS@382..383 "+" + PLUS@383..384 "+" + PLUS@384..385 "+" + PLUS@385..386 "+" + PLUS@386..387 "+" + PLUS@387..388 "+" + PLUS@388..389 "+" + PLUS@389..390 "+" + PLUS@390..391 "+" + PLUS@391..392 "+" + PLUS@392..393 "+" + PLUS@393..394 "+" + PLUS@394..395 "+" + PLUS@395..396 "+" + PLUS@396..397 "+" + PLUS@397..398 "+" + PLUS@398..399 "+" + PLUS@399..400 "+" + PLUS@400..401 "+" + PLUS@401..402 "+" + PLUS@402..403 "+" + PLUS@403..404 "+" + PLUS@404..405 "+" + PLUS@405..406 "+" + PLUS@406..407 "+" + PLUS@407..408 "+" + PLUS@408..409 "+" + PLUS@409..410 "+" + PLUS@410..411 "+" + PLUS@411..412 "+" + PLUS@412..413 "+" + PLUS@413..414 "+" + PLUS@414..415 "+" + PLUS@415..416 "+" + PLUS@416..417 "+" + PLUS@417..418 "+" + PLUS@418..419 "+" + WHITESPACE@419..420 " " + LITERAL@420..421 + INT_NUMBER@420..421 "2" + SEMICOLON@421..422 ";" + WHITESPACE@422..423 "\n" error[syntax-error]: missing comma ╭▸ @@ -124,3 +364,11 @@ error[syntax-error]: Invalid operator. ╭▸ 8 │ select ^m; ╰╴ ━ +error[syntax-error]: operator too long + ╭▸ +10 │ select |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 1; + ╰╴ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +error[syntax-error]: operator too long + ╭▸ +11 │ select 1 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2; + ╰╴ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ diff --git a/crates/squawk_syntax/src/validation.rs b/crates/squawk_syntax/src/validation.rs index 4481c247b..6aa43ba41 100644 --- a/crates/squawk_syntax/src/validation.rs +++ b/crates/squawk_syntax/src/validation.rs @@ -19,6 +19,7 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec) { ast::BeginFuncOptionList(it) => validate_begin_func_option_list(it, errors), ast::CreateAggregate(it) => validate_aggregate_params(it.param_list(), errors), ast::CreateTable(it) => validate_create_table(it, errors), + ast::CustomOp(it) => validate_custom_op_length(it, errors), ast::PrefixExpr(it) => validate_prefix_expr(it, errors), ast::ArrayExpr(it) => validate_array_expr(it, errors), ast::DropAggregate(it) => validate_drop_aggregate(it, errors), @@ -586,6 +587,15 @@ fn validate_prefix_expr(prefix_expr: ast::PrefixExpr, acc: &mut Vec validate_custom_op(op, acc); } +// NAMEDATALEN == 64 and idents and operators can be NAMEDATALEN - 1 +const MAX_OPERATOR_LEN: TextSize = TextSize::new(63); +fn validate_custom_op_length(op: ast::CustomOp, acc: &mut Vec) { + let range = op.syntax().text_range(); + if range.len() > MAX_OPERATOR_LEN { + acc.push(SyntaxError::new("operator too long", range)); + } +} + // https://www.postgresql.org/docs/17/sql-createoperator.html fn validate_custom_op(op: ast::CustomOp, acc: &mut Vec) { // TODO: there's more we can validate diff --git a/crates/squawk_syntax/test_data/validation/custom_operators.sql b/crates/squawk_syntax/test_data/validation/custom_operators.sql index eb584ff39..93c114a08 100644 --- a/crates/squawk_syntax/test_data/validation/custom_operators.sql +++ b/crates/squawk_syntax/test_data/validation/custom_operators.sql @@ -6,3 +6,8 @@ select >f; select =g; select %l; select ^m; +-- operators may not exceed NAMEDATALEN-1 (63) characters +select |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 1; +select 1 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2; +-- 63 chars is still allowed +select 1 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2; diff --git a/crates/xtask/src/sync_pg.rs b/crates/xtask/src/sync_pg.rs index 58d61ebdb..9cb7851a2 100644 --- a/crates/xtask/src/sync_pg.rs +++ b/crates/xtask/src/sync_pg.rs @@ -46,6 +46,7 @@ const IGNORED_LINES: &[&str] = &[ "SELECT 0.0e+a;", "PREPARE p1 AS SELECT $1a;", "PREPARE p1 AS SELECT $2147483648;", + r#"CREATE TABLE i_am_a_failure() USING "";"#, "SELECT 0b;", "SELECT 1b;", "SELECT 0b0x;", diff --git a/postgres/kwlist.h b/postgres/kwlist.h index f6061daa7..5ebc18705 100644 --- a/postgres/kwlist.h +++ b/postgres/kwlist.h @@ -1,7 +1,7 @@ // synced from: -// commit: 0392fb900eb89f52988cccd33046443c39c70d1c -// committed at: 2026-05-20T23:23:49+03:00 -// file: https://github.com/postgres/postgres/blob/0392fb900eb89f52988cccd33046443c39c70d1c/src/include/parser/kwlist.h +// commit: 2c4bd2bf5700db98be0602854a8b7fa2c16b5f4a +// committed at: 2026-05-23T09:39:58+09:00 +// file: https://github.com/postgres/postgres/blob/2c4bd2bf5700db98be0602854a8b7fa2c16b5f4a/src/include/parser/kwlist.h // // update via: // cargo xtask sync-pg diff --git a/postgres/regression_suite/constraints.sql b/postgres/regression_suite/constraints.sql index b9169ffda..e5debbd39 100644 --- a/postgres/regression_suite/constraints.sql +++ b/postgres/regression_suite/constraints.sql @@ -757,6 +757,9 @@ DROP TABLE ATACC1, ATACC2, ATACC3; -- NOT NULL NO INHERIT is not possible on partitioned tables CREATE TABLE ATACC1 (a int NOT NULL NO INHERIT) PARTITION BY LIST (a); CREATE TABLE ATACC1 (a int, NOT NULL a NO INHERIT) PARTITION BY LIST (a); +CREATE TABLE ATACC1 (a int, CONSTRAINT a_is_not_null NOT NULL a) PARTITION BY LIST (a); +ALTER TABLE ATACC1 ALTER CONSTRAINT a_is_not_null NO INHERIT; +DROP TABLE ATACC1; -- it's not possible to override a no-inherit constraint with an inheritable one CREATE TABLE ATACC2 (a int, CONSTRAINT a_is_not_null NOT NULL a NO INHERIT); diff --git a/postgres/regression_suite/create_am.sql b/postgres/regression_suite/create_am.sql index 63b8e19a1..84ce6b612 100644 --- a/postgres/regression_suite/create_am.sql +++ b/postgres/regression_suite/create_am.sql @@ -353,7 +353,7 @@ ROLLBACK; -- Third, check that we can neither create a table using a nonexistent -- AM, nor using an index AM -CREATE TABLE i_am_a_failure() USING ""; +-- CREATE TABLE i_am_a_failure() USING ""; CREATE TABLE i_am_a_failure() USING i_do_not_exist_am; CREATE TABLE i_am_a_failure() USING "I do not exist AM"; CREATE TABLE i_am_a_failure() USING "btree";