|
1 | 1 | //! Extract links and fragments from markdown documents |
2 | 2 | use std::collections::{HashMap, HashSet}; |
3 | 3 |
|
| 4 | +use log::warn; |
4 | 5 | use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset}; |
5 | 6 |
|
6 | 7 | use crate::{ |
| 8 | + ErrorKind, |
7 | 9 | extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext}, |
8 | 10 | types::uri::raw::{ |
9 | 11 | OffsetSpanProvider, RawUri, RawUriSpan, SourceSpanProvider, SpanProvider as _, |
@@ -90,29 +92,17 @@ pub(crate) fn extract_markdown( |
90 | 92 | return None; |
91 | 93 | } |
92 | 94 |
|
93 | | - // Strip potholes (|) from wikilinks |
94 | | - let mut stripped_dest_url = if has_pothole { |
95 | | - pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) |
96 | | - } else { |
97 | | - dest_url.clone() |
98 | | - }; |
99 | | - |
100 | | - // Strip fragments (#) from wikilinks, according to the obsidian spec |
101 | | - // fragments come before potholes |
102 | | - if stripped_dest_url.contains('#') { |
103 | | - stripped_dest_url = pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('#').unwrap_or(dest_url.len())]); |
104 | | - } |
105 | | - |
106 | | - if stripped_dest_url.is_empty() { |
107 | | - None |
108 | | - } else { |
| 95 | + if let Ok(wikilink) = clean_wikilink(&dest_url, has_pothole) { |
109 | 96 | Some(vec![RawUri { |
110 | | - text: stripped_dest_url.to_string(), |
| 97 | + text: wikilink.to_string(), |
111 | 98 | element: Some("a".to_string()), |
112 | 99 | attribute: Some("wikilink".to_string()), |
113 | 100 | // wiki links start with `[[`, so offset the span by `2` |
114 | 101 | span: span.start + 2 |
115 | 102 | }]) |
| 103 | + } else { |
| 104 | + warn!("WARNING: The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments"); |
| 105 | + None |
116 | 106 | } |
117 | 107 | } |
118 | 108 | } |
@@ -287,6 +277,26 @@ pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet<String> { |
287 | 277 | out |
288 | 278 | } |
289 | 279 |
|
| 280 | +fn clean_wikilink(input: &str, has_pothole: bool) -> Result<CowStr<'_>, ErrorKind> { |
| 281 | + // Strip potholes (|) from wikilinks |
| 282 | + let mut stripped_input = if has_pothole { |
| 283 | + pulldown_cmark::CowStr::Borrowed(&input[0..input.find('|').unwrap_or(input.len())]) |
| 284 | + } else { |
| 285 | + pulldown_cmark::CowStr::Borrowed(input) |
| 286 | + }; |
| 287 | + |
| 288 | + // Strip fragments (#) from wikilinks, according to the obsidian spec |
| 289 | + // fragments always come before potholes |
| 290 | + if stripped_input.contains('#') { |
| 291 | + stripped_input = |
| 292 | + pulldown_cmark::CowStr::Borrowed(&input[0..input.find('#').unwrap_or(input.len())]); |
| 293 | + } |
| 294 | + if stripped_input.is_empty() { |
| 295 | + return Err(ErrorKind::EmptyUrl); |
| 296 | + } |
| 297 | + Ok(stripped_input) |
| 298 | +} |
| 299 | + |
290 | 300 | #[derive(Default)] |
291 | 301 | struct HeadingIdGenerator { |
292 | 302 | counter: HashMap<String, usize>, |
@@ -327,6 +337,7 @@ mod tests { |
327 | 337 | use crate::types::uri::raw::span; |
328 | 338 |
|
329 | 339 | use super::*; |
| 340 | + use rstest::rstest; |
330 | 341 |
|
331 | 342 | const MD_INPUT: &str = r#" |
332 | 343 | # A Test |
@@ -654,39 +665,53 @@ Shortcut link: [link4] |
654 | 665 | ); |
655 | 666 | } |
656 | 667 | } |
| 668 | + |
657 | 669 | #[test] |
658 | | - fn test_remove_wikilink_pothole() { |
659 | | - let markdown = r"[[foo|bar]]"; |
| 670 | + fn test_clean_wikilink() { |
| 671 | + let markdown = r" |
| 672 | +[[foo|bar]] |
| 673 | +[[foo#bar]] |
| 674 | +[[foo#bar|baz]] |
| 675 | +"; |
660 | 676 | let uris = extract_markdown(markdown, true, true); |
661 | | - let expected = vec![RawUri { |
662 | | - text: "foo".to_string(), |
663 | | - element: Some("a".to_string()), |
664 | | - attribute: Some("wikilink".to_string()), |
665 | | - }]; |
| 677 | + let expected = vec![ |
| 678 | + RawUri { |
| 679 | + text: "foo".to_string(), |
| 680 | + element: Some("a".to_string()), |
| 681 | + attribute: Some("wikilink".to_string()), |
| 682 | + }, |
| 683 | + RawUri { |
| 684 | + text: "foo".to_string(), |
| 685 | + element: Some("a".to_string()), |
| 686 | + attribute: Some("wikilink".to_string()), |
| 687 | + }, |
| 688 | + RawUri { |
| 689 | + text: "foo".to_string(), |
| 690 | + element: Some("a".to_string()), |
| 691 | + attribute: Some("wikilink".to_string()), |
| 692 | + }, |
| 693 | + ]; |
666 | 694 | assert_eq!(uris, expected); |
667 | 695 | } |
668 | 696 |
|
669 | 697 | #[test] |
670 | | - fn test_remove_wikilink_fragment() { |
671 | | - let markdown = r"[[foo#bar]]"; |
| 698 | + fn test_wikilink_extraction_returns_none_on_empty_links() { |
| 699 | + let markdown = r" |
| 700 | +[[|bar]] |
| 701 | +[[#bar]] |
| 702 | +[[#bar|baz]] |
| 703 | +"; |
| 704 | + |
672 | 705 | let uris = extract_markdown(markdown, true, true); |
673 | | - let expected = vec![RawUri { |
674 | | - text: "foo".to_string(), |
675 | | - element: Some("a".to_string()), |
676 | | - attribute: Some("wikilink".to_string()), |
677 | | - }]; |
678 | | - assert_eq!(uris, expected); |
| 706 | + assert!(uris.is_empty()); |
679 | 707 | } |
680 | 708 |
|
681 | | - #[test] |
682 | | - fn test_remove_wikilink_potholes_and_fragments() { |
683 | | - let markdown = r"[[foo#bar|baz]]"; |
684 | | - let uris = extract_markdown(markdown, true, true); |
685 | | - let expected = vec![RawUri { |
686 | | - text: "foo".to_string(), |
687 | | - element: Some("a".to_string()), |
688 | | - attribute: Some("wikilink".to_string()), |
689 | | - }]; |
690 | | - assert_eq!(uris, expected); |
| 709 | + #[rstest] |
| 710 | + #[case("|foo", true)] |
| 711 | + #[case("|foo#bar", true)] |
| 712 | + #[case("#baz", false)] |
| 713 | + fn test_from_str(#[case] input: &str, #[case] has_pothole: bool) { |
| 714 | + let result = clean_wikilink(input, has_pothole); |
| 715 | + assert!(result.is_err()); |
691 | 716 | } |
692 | 717 | } |
0 commit comments