Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Initial implementation of an experimental documentation generator that
generates Markdown documentation from `.prql` files. (@vanillajonathan,
#4152).
- Add `prqlc lex` command to the CLI (@max-sixty)

**Fixes**:

Expand Down
7 changes: 3 additions & 4 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ mod types;
use chumsky::error::SimpleReason;
use chumsky::{prelude::*, Stream};

use prqlc_ast::error::Reason;
use prqlc_ast::error::{Error, WithErrorInfo};
use prqlc_ast::error::{Error, Reason, WithErrorInfo};
use prqlc_ast::stmt::*;
use prqlc_ast::Span;

use lexer::TokenKind;
use lexer::{Token, TokenVec};
use lexer::Token;
pub use lexer::{TokenKind, TokenVec};
use span::ParserSpan;

/// Build PRQL AST from a PRQL query string.
Expand Down
69 changes: 63 additions & 6 deletions prqlc/prqlc/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ use std::path::Path;
use std::process::exit;
use std::str::FromStr;

use prqlc::ast;
use prqlc::semantic;
use prqlc::semantic::reporting::{collect_frames, label_references};
use prqlc::semantic::NS_DEFAULT_DB;
use prqlc::{ast, prql_to_tokens};
use prqlc::{ir::pl::Lineage, ir::Span};
use prqlc::{pl_to_prql, pl_to_rq_tree, prql_to_pl, prql_to_pl_tree, rq_to_sql, SourceTree};
use prqlc::{Options, Target};
Expand Down Expand Up @@ -79,6 +79,14 @@ enum Command {
format: Format,
},

/// Lex into Tokens
Lex {
#[command(flatten)]
io_args: IoArgs,
#[arg(value_enum, long, default_value = "yaml")]
format: Format,
},

/// Parse & generate PRQL code back
#[command(name = "fmt")]
Format {
Expand Down Expand Up @@ -288,6 +296,17 @@ impl Command {
Format::Yaml => serde_yaml::to_string(&ast)?.into_bytes(),
}
}
Command::Lex { format, .. } => {
let s = sources.sources.values().exactly_one().or_else(|_| {
// TODO: allow multiple sources
bail!("Currently `lex` only works with a single source, but found multiple sources")
})?;
let tokens = prql_to_tokens(s)?;
match format {
Format::Json => serde_json::to_string_pretty(&tokens)?.into_bytes(),
Format::Yaml => serde_yaml::to_string(&tokens)?.into_bytes(),
}
}
Command::Collect(_) => {
let mut root_module_def = prql_to_pl_tree(sources)?;

Expand Down Expand Up @@ -429,7 +448,7 @@ impl Command {
}
}

_ => unreachable!(),
_ => unreachable!("Other commands shouldn't reach `execute`"),
})
}

Expand All @@ -438,11 +457,10 @@ impl Command {
// `input`, rather than matching on them and grabbing `input` from
// `self`? But possibly if everything moves to `io_args`, then this is
// quite reasonable?
use Command::{
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
};
use Command::*;
let io_args = match self {
Parse { io_args, .. }
| Lex { io_args, .. }
| Collect(io_args)
| Resolve { io_args, .. }
| SQLCompile { io_args, .. }
Expand Down Expand Up @@ -481,10 +499,11 @@ impl Command {

fn write_output(&mut self, data: &[u8]) -> std::io::Result<()> {
use Command::{
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
Collect, Debug, Experimental, Lex, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
};
let mut output = match self {
Parse { io_args, .. }
| Lex { io_args, .. }
| Collect(io_args)
| Resolve { io_args, .. }
| SQLCompile { io_args, .. }
Expand Down Expand Up @@ -815,4 +834,42 @@ sort full
column: 2
"###);
}

#[test]
fn lex() {
let output = Command::execute(
&Command::Lex {
io_args: IoArgs::default(),
format: Format::Yaml,
},
&mut "from x | select y".into(),
"",
)
.unwrap();

// TODO: terser output; maybe serialize span as `0..4`? Remove the
// `!Ident` complication?
assert_snapshot!(String::from_utf8(output).unwrap().trim(), @r###"
- kind: !Ident from
span:
start: 0
end: 4
- kind: !Ident x
span:
start: 5
end: 6
- kind: !Control '|'
span:
start: 7
end: 8
- kind: !Ident select
span:
start: 9
end: 15
- kind: !Ident y
span:
start: 16
end: 17
"###);
}
}
6 changes: 6 additions & 0 deletions prqlc/prqlc/src/error_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ impl From<Error> for ErrorMessage {
}
}

impl From<Vec<ErrorMessage>> for ErrorMessages {
fn from(errors: Vec<ErrorMessage>) -> Self {
ErrorMessages { inner: errors }
}
}

#[derive(Debug, Clone, Serialize)]
pub struct ErrorMessages {
pub inner: Vec<ErrorMessage>,
Expand Down
11 changes: 11 additions & 0 deletions prqlc/prqlc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ pub static COMPILER_VERSION: Lazy<Version> =
Lazy::new(|| Version::parse(env!("CARGO_PKG_VERSION")).expect("Invalid prqlc version number"));

use once_cell::sync::Lazy;
use prqlc_parser::TokenVec;
use semver::Version;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, path::PathBuf, str::FromStr};
Expand Down Expand Up @@ -306,6 +307,16 @@ pub enum DisplayOptions {
#[cfg(doctest)]
pub struct ReadmeDoctests;

/// Lex PRQL source into tokens.
pub fn prql_to_tokens(prql: &str) -> Result<TokenVec, ErrorMessages> {
prqlc_parser::lex_source(prql).map_err(|e| {
e.into_iter()
.map(|e| e.into())
.collect::<Vec<ErrorMessage>>()
.into()
})
}

/// Parse PRQL into a PL AST
// TODO: rename this to `prql_to_pl_simple`
pub fn prql_to_pl(prql: &str) -> Result<ast::ModuleDef, ErrorMessages> {
Expand Down
47 changes: 47 additions & 0 deletions prqlc/prqlc/tests/integration/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ fn help() {

Commands:
parse Parse into PL AST
lex Lex into Tokens
fmt Parse & generate PRQL code back
collect Parse the whole project and collect it into a single PRQL source file
debug Commands for meant for debugging, prone to change
Expand Down Expand Up @@ -471,3 +472,49 @@ fn compile_no_prql_files() {

"###);
}

#[test]
fn lex() {
assert_cmd_snapshot!(prqlc_command().args(["lex"]).pass_stdin("from tracks"), @r###"
success: true
exit_code: 0
----- stdout -----
- kind: !Ident from
span:
start: 0
end: 4
- kind: !Ident tracks
span:
start: 5
end: 11

----- stderr -----
"###);

assert_cmd_snapshot!(prqlc_command().args(["lex", "--format=json"]).pass_stdin("from tracks"), @r###"
success: true
exit_code: 0
----- stdout -----
[
{
"kind": {
"Ident": "from"
},
"span": {
"start": 0,
"end": 4
}
},
{
"kind": {
"Ident": "tracks"
},
"span": {
"start": 5,
"end": 11
}
}
]
----- stderr -----
"###);
}
Loading