Skip to content

Commit 840499d

Browse files
committed
refactor: wikilink cleanup
1 parent e4af5ed commit 840499d

File tree

1 file changed

+67
-42
lines changed

1 file changed

+67
-42
lines changed

lychee-lib/src/extract/markdown.rs

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
//! Extract links and fragments from markdown documents
22
use std::collections::{HashMap, HashSet};
33

4+
use log::warn;
45
use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
56

67
use crate::{
8+
ErrorKind,
79
extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext},
810
types::uri::raw::{
911
OffsetSpanProvider, RawUri, RawUriSpan, SourceSpanProvider, SpanProvider as _,
@@ -90,29 +92,17 @@ pub(crate) fn extract_markdown(
9092
return None;
9193
}
9294

93-
// Strip potholes (|) from wikilinks
94-
let mut stripped_dest_url = if has_pothole {
95-
pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())])
96-
} else {
97-
dest_url.clone()
98-
};
99-
100-
// Strip fragments (#) from wikilinks, according to the obsidian spec
101-
// fragments come before potholes
102-
if stripped_dest_url.contains('#') {
103-
stripped_dest_url = pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('#').unwrap_or(dest_url.len())]);
104-
}
105-
106-
if stripped_dest_url.is_empty() {
107-
None
108-
} else {
95+
if let Ok(wikilink) = clean_wikilink(&dest_url, has_pothole) {
10996
Some(vec![RawUri {
110-
text: stripped_dest_url.to_string(),
97+
text: wikilink.to_string(),
11198
element: Some("a".to_string()),
11299
attribute: Some("wikilink".to_string()),
113100
// wiki links start with `[[`, so offset the span by `2`
114101
span: span.start + 2
115102
}])
103+
} else {
104+
warn!("WARNING: The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments");
105+
None
116106
}
117107
}
118108
}
@@ -287,6 +277,26 @@ pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet<String> {
287277
out
288278
}
289279

280+
fn clean_wikilink(input: &str, has_pothole: bool) -> Result<CowStr<'_>, ErrorKind> {
281+
// Strip potholes (|) from wikilinks
282+
let mut stripped_input = if has_pothole {
283+
pulldown_cmark::CowStr::Borrowed(&input[0..input.find('|').unwrap_or(input.len())])
284+
} else {
285+
pulldown_cmark::CowStr::Borrowed(input)
286+
};
287+
288+
// Strip fragments (#) from wikilinks, according to the obsidian spec
289+
// fragments always come before potholes
290+
if stripped_input.contains('#') {
291+
stripped_input =
292+
pulldown_cmark::CowStr::Borrowed(&input[0..input.find('#').unwrap_or(input.len())]);
293+
}
294+
if stripped_input.is_empty() {
295+
return Err(ErrorKind::EmptyUrl);
296+
}
297+
Ok(stripped_input)
298+
}
299+
290300
#[derive(Default)]
291301
struct HeadingIdGenerator {
292302
counter: HashMap<String, usize>,
@@ -327,6 +337,7 @@ mod tests {
327337
use crate::types::uri::raw::span;
328338

329339
use super::*;
340+
use rstest::rstest;
330341

331342
const MD_INPUT: &str = r#"
332343
# A Test
@@ -654,39 +665,53 @@ Shortcut link: [link4]
654665
);
655666
}
656667
}
668+
657669
#[test]
658-
fn test_remove_wikilink_pothole() {
659-
let markdown = r"[[foo|bar]]";
670+
fn test_clean_wikilink() {
671+
let markdown = r"
672+
[[foo|bar]]
673+
[[foo#bar]]
674+
[[foo#bar|baz]]
675+
";
660676
let uris = extract_markdown(markdown, true, true);
661-
let expected = vec![RawUri {
662-
text: "foo".to_string(),
663-
element: Some("a".to_string()),
664-
attribute: Some("wikilink".to_string()),
665-
}];
677+
let expected = vec![
678+
RawUri {
679+
text: "foo".to_string(),
680+
element: Some("a".to_string()),
681+
attribute: Some("wikilink".to_string()),
682+
},
683+
RawUri {
684+
text: "foo".to_string(),
685+
element: Some("a".to_string()),
686+
attribute: Some("wikilink".to_string()),
687+
},
688+
RawUri {
689+
text: "foo".to_string(),
690+
element: Some("a".to_string()),
691+
attribute: Some("wikilink".to_string()),
692+
},
693+
];
666694
assert_eq!(uris, expected);
667695
}
668696

669697
#[test]
670-
fn test_remove_wikilink_fragment() {
671-
let markdown = r"[[foo#bar]]";
698+
fn test_wikilink_extraction_returns_none_on_empty_links() {
699+
let markdown = r"
700+
[[|bar]]
701+
[[#bar]]
702+
[[#bar|baz]]
703+
";
704+
672705
let uris = extract_markdown(markdown, true, true);
673-
let expected = vec![RawUri {
674-
text: "foo".to_string(),
675-
element: Some("a".to_string()),
676-
attribute: Some("wikilink".to_string()),
677-
}];
678-
assert_eq!(uris, expected);
706+
assert!(uris.is_empty());
679707
}
680708

681-
#[test]
682-
fn test_remove_wikilink_potholes_and_fragments() {
683-
let markdown = r"[[foo#bar|baz]]";
684-
let uris = extract_markdown(markdown, true, true);
685-
let expected = vec![RawUri {
686-
text: "foo".to_string(),
687-
element: Some("a".to_string()),
688-
attribute: Some("wikilink".to_string()),
689-
}];
690-
assert_eq!(uris, expected);
709+
#[rstest]
710+
#[case("|foo", true)]
711+
#[case("|foo#bar", true)]
712+
#[case("#baz", false)]
713+
fn test_from_str(#[case] input: &str, #[case] has_pothole: bool) {
714+
let result = clean_wikilink(input, has_pothole);
715+
assert!(result.is_err());
691716
}
692717
}

0 commit comments

Comments
 (0)