Skip to content

Commit 42c5b7b

Browse files
committed
lexer: fix offset calculation for string literals, fixing error reporting
1 parent 328010a commit 42c5b7b

File tree

2 files changed

+37
-33
lines changed

2 files changed

+37
-33
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ winnow = { version = "0.7.4", features = [
2020
#"unstable-doc", # build docs locally
2121
#"debug" # debug output/state of parser
2222
] }
23-
miette = { version = "7.5.0", features = ["fancy"] }
23+
miette = { version = "7.6.0", features = ["fancy"] }
2424
tracing = "0.1.41"
2525
tracing-subscriber = "0.3.19"
2626
lsp-server = "0.7.8"

src/par/lexer.rs

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
203203
let mut idx = 0;
204204
while let Ok(c) = peek(any::<&str, Error>).parse_next(input) {
205205
let column = last_newline - input.len(); // starting column
206-
let Some((raw, kind)) = (match c {
206+
let Some((raw, kind, len)) = (match c {
207207
'-' => {
208208
let (raw, mut kind) = alt((
209209
("->").map(|raw| (raw, TokenKind::ThinArrow)),
@@ -220,7 +220,7 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
220220
kind = TokenKind::Unknown;
221221
}
222222
}
223-
Some((raw, kind))
223+
Some((raw, kind, raw.len()))
224224
}
225225
'0'..='9' | '+' => {
226226
let raw = (
@@ -230,9 +230,9 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
230230
.take()
231231
.parse_next(input)?;
232232
if !raw.contains(|c| matches!(c, '0'..='9')) {
233-
Some((raw, TokenKind::Unknown))
233+
Some((raw, TokenKind::Unknown, raw.len()))
234234
} else {
235-
Some((raw, TokenKind::Integer))
235+
Some((raw, TokenKind::Integer, raw.len()))
236236
}
237237
}
238238
'"' => {
@@ -245,13 +245,15 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
245245
.parse_next(input)?;
246246
let is_closed = opt('"').parse_next(input)?.is_some();
247247
let is_valid = unescaper::unescape(raw).is_ok();
248+
let len = raw.len() + 1 + usize::from(is_closed);
248249
Some((
249250
raw,
250251
if is_closed && is_valid {
251252
TokenKind::String
252253
} else {
253254
TokenKind::InvalidString
254255
},
256+
len,
255257
))
256258
}
257259
'a'..='z' | 'A'..='Z' | '_' => {
@@ -295,7 +297,7 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
295297
}
296298
}
297299
};
298-
Some((raw, kind))
300+
Some((raw, kind, raw.len()))
299301
}
300302
'\n' => {
301303
let _ = any::<&str, Error>.parse_next(input);
@@ -311,46 +313,47 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
311313
}
312314
':' => {
313315
let raw = any::<&str, Error>.take().parse_next(input)?;
314-
Some((raw, TokenKind::Colon))
316+
Some((raw, TokenKind::Colon, raw.len()))
315317
}
316318
';' => {
317319
let raw = any::<&str, Error>.take().parse_next(input)?;
318-
Some((raw, TokenKind::Semicolon))
320+
Some((raw, TokenKind::Semicolon, raw.len()))
319321
}
320322
'[' => {
321323
let raw = any::<&str, Error>.take().parse_next(input)?;
322-
Some((raw, TokenKind::LBrack))
324+
Some((raw, TokenKind::LBrack, raw.len()))
323325
}
324326
']' => {
325327
let raw = any::<&str, Error>.take().parse_next(input)?;
326-
Some((raw, TokenKind::RBrack))
328+
Some((raw, TokenKind::RBrack, raw.len()))
327329
}
328330
'(' => {
329331
let raw = any::<&str, Error>.take().parse_next(input)?;
330-
Some((raw, TokenKind::LParen))
332+
Some((raw, TokenKind::LParen, raw.len()))
331333
}
332334
')' => {
333335
let raw = any::<&str, Error>.take().parse_next(input)?;
334-
Some((raw, TokenKind::RParen))
336+
Some((raw, TokenKind::RParen, raw.len()))
335337
}
336338
'{' => {
337339
let raw = any::<&str, Error>.take().parse_next(input)?;
338-
Some((raw, TokenKind::LCurly))
340+
Some((raw, TokenKind::LCurly, raw.len()))
339341
}
340342
'}' => {
341343
let raw = any::<&str, Error>.take().parse_next(input)?;
342-
Some((raw, TokenKind::RCurly))
344+
Some((raw, TokenKind::RCurly, raw.len()))
343345
}
344-
'<' => Some(
345-
alt((
346+
'<' => {
347+
let (raw, kind) = alt((
346348
"<>".map(|raw| (raw, TokenKind::Link)),
347349
"<".map(|raw| (raw, TokenKind::Lt)),
348350
))
349-
.parse_next(input)?,
350-
),
351+
.parse_next(input)?;
352+
Some((raw, kind, raw.len()))
353+
}
351354
'>' => {
352355
let raw = any::<&str, Error>.take().parse_next(input)?;
353-
Some((raw, TokenKind::Gt))
356+
Some((raw, TokenKind::Gt, raw.len()))
354357
}
355358
'/' => {
356359
let (is_comment, raw) = alt((
@@ -366,43 +369,44 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
366369
idx += raw.len();
367370
None
368371
} else {
369-
Some((raw, TokenKind::Slash))
372+
Some((raw, TokenKind::Slash, raw.len()))
370373
}
371374
}
372375
'@' => {
373376
let raw = any::<&str, Error>.take().parse_next(input)?;
374-
Some((raw, TokenKind::At))
377+
Some((raw, TokenKind::At, raw.len()))
375378
}
376379
',' => {
377380
let raw = any::<&str, Error>.take().parse_next(input)?;
378-
Some((raw, TokenKind::Comma))
381+
Some((raw, TokenKind::Comma, raw.len()))
379382
}
380383
'.' => {
381384
let raw = any::<&str, Error>.take().parse_next(input)?;
382-
Some((raw, TokenKind::Dot))
385+
Some((raw, TokenKind::Dot, raw.len()))
383386
}
384-
'=' => Some(
385-
alt((
387+
'=' => {
388+
let (raw, kind) = alt((
386389
("=>").map(|raw| (raw, TokenKind::FatArrow)),
387390
("=").map(|raw| (raw, TokenKind::Eq)),
388391
))
389-
.parse_next(input)?,
390-
),
392+
.parse_next(input)?;
393+
Some((raw, kind, raw.len()))
394+
}
391395
'!' => {
392396
let raw = any::<&str, Error>.take().parse_next(input)?;
393-
Some((raw, TokenKind::Bang))
397+
Some((raw, TokenKind::Bang, raw.len()))
394398
}
395399
'?' => {
396400
let raw = any::<&str, Error>.take().parse_next(input)?;
397-
Some((raw, TokenKind::Quest))
401+
Some((raw, TokenKind::Quest, raw.len()))
398402
}
399403
'*' => {
400404
let raw = any::<&str, Error>.take().parse_next(input)?;
401-
Some((raw, TokenKind::Star))
405+
Some((raw, TokenKind::Star, raw.len()))
402406
}
403407
_ => {
404408
let raw = any::<&str, Error>.take().parse_next(input)?;
405-
Some((raw, TokenKind::Unknown))
409+
Some((raw, TokenKind::Unknown, raw.len()))
406410
}
407411
}) else {
408412
continue;
@@ -412,11 +416,11 @@ pub fn lex<'s>(input: &'s str, file: &FileName) -> Vec<Token<'s>> {
412416
row: row as u32,
413417
column: column as u32,
414418
};
415-
idx += raw.len();
419+
idx += len;
416420
let end = Point {
417421
offset: idx.try_into().expect("position too large"),
418422
row: row as u32,
419-
column: (column + raw.len()) as u32,
423+
column: (column + len) as u32,
420424
};
421425
tokens.push(Token {
422426
kind,

0 commit comments

Comments
 (0)