From 7484a1ff6cb7e1c9ed49d522fffc315c7bfb2c9e Mon Sep 17 00:00:00 2001 From: xlai89 <62478312+xlai89@users.noreply.github.com> Date: Sun, 20 Oct 2024 22:56:46 +0200 Subject: [PATCH] Don't check prefix attribute (#1536) --- lychee-lib/src/extract/html/html5ever.rs | 16 ++++++++++++++++ lychee-lib/src/extract/html/html5gum.rs | 15 +++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index 934ec90dc7..0a1414855b 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -86,6 +86,12 @@ impl TokenSink for LinkExtractor { } } + // Check and exclude `prefix` attribute. This attribute is used to define a prefix + // for the current element. It is not used to link to a resource. + if let Some(_prefix) = attrs.iter().find(|attr| &attr.name.local == "prefix") { + return TokenSinkResult::Continue; + } + for attr in attrs { let urls = LinkExtractor::extract_urls_from_elem_attr( &attr.name.local, @@ -416,6 +422,16 @@ mod tests { assert!(uris.is_empty()); } + #[test] + fn test_skip_prefix() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + #[test] fn test_ignore_text_content_links() { let input = r#" diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs index be78a3116a..5fb41be69f 100644 --- a/lychee-lib/src/extract/html/html5gum.rs +++ b/lychee-lib/src/extract/html/html5gum.rs @@ -178,6 +178,11 @@ impl LinkExtractor { return; } + if self.current_attributes.contains_key("prefix") { + self.current_attributes.clear(); + return; + } + let new_urls = self .extract_urls_from_elem_attr() .into_iter() @@ -613,6 +618,16 @@ mod tests { assert!(uris.is_empty()); } + #[test] + fn test_skip_prefix() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + #[test] fn test_ignore_text_content_links() { let input = r#"