Skip to content

Commit

Permalink
chore(page): fix page absolute join
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Nov 1, 2024
1 parent 9ac1f04 commit 8595e52
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 61 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.11.1"
version = "2.11.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
75 changes: 26 additions & 49 deletions spider/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use crate::Client;
use crate::RelativeSelectors;
use bytes::Bytes;
use hashbrown::HashSet;
use phf::phf_set;
use reqwest::StatusCode;
use tokio::time::Duration;

Expand Down Expand Up @@ -186,48 +185,17 @@ pub fn domain_name(domain: &Url) -> &str {
}
}

static URL_JOIN_SYMBOLS: phf::Set<&'static str> = phf_set! {
"?", "#", "/"
};

/// Convert to absolute path
#[inline]
pub fn convert_abs_path(base: &Url, href: &str) -> Url {
let should_adjust = !base.path().ends_with('/') && !href.is_empty();
let needs_slash = if should_adjust {
match href.chars().next() {
Some(c) => !URL_JOIN_SYMBOLS.contains(&c.to_string()) && !href.starts_with("http"),
_ => false,
match base.join(&href) {
Ok(mut joined) => {
joined.set_fragment(None);
joined
}
} else {
false
};

if needs_slash {
let mut base = base.clone();
let mut path = base.path().to_string();
path.push('/');
base.set_path(&path);
match base.join(&href) {
Ok(mut joined) => {
joined.set_fragment(None);
joined
}
Err(e) => {
log("URL Parse Error: ", e.to_string());
base.clone()
}
}
} else {
match base.join(&href) {
Ok(mut joined) => {
joined.set_fragment(None);
joined
}
Err(e) => {
log("URL Parse Error: ", e.to_string());
base.clone()
}
Err(e) => {
log("URL Parse Error: ", e.to_string());
base.clone()
}
}
}
Expand Down Expand Up @@ -1563,24 +1531,26 @@ async fn test_status_code() {
assert_eq!(page.status_code.as_u16(), 404);
}

#[cfg(all(
not(feature = "decentralized"),
not(feature = "chrome"),
not(feature = "cache")
))]
#[tokio::test]
async fn test_abs_path() {
let client = Client::builder()
.user_agent(TEST_AGENT_NAME)
.build()
.expect("a valid agent");
let link_result = "https://choosealicense.com/";
let page: Page = Page::new(link_result, &client).await;
let page: Page = build(&link_result, Default::default());

assert_eq!(
page.abs_path("?query=keyword").expect("a valid url"),
Url::parse("https://choosealicense.com?query=keyword").expect("a valid url")
);

assert_eq!(
page.abs_path("#query=keyword").expect("a valid url"),
Url::parse("https://choosealicense.com").expect("a valid url")
);

assert_eq!(
page.abs_path("/page").expect("a valid url"),
Url::parse("https://choosealicense.com/page").expect("a valid url")
);

assert_eq!(
page.abs_path("/page?query=keyword").expect("a valid url"),
Url::parse("https://choosealicense.com/page?query=keyword").expect("a valid url")
Expand All @@ -1602,6 +1572,13 @@ async fn test_abs_path() {
page.abs_path("tel://+212 3456").unwrap(),
Url::parse("https://choosealicense.com/").expect("a valid url")
);

let page: Page = build(&format!("{}index.php", link_result), Default::default());

assert_eq!(
page.abs_path("index.html").expect("a valid url"),
Url::parse("https://choosealicense.com/index.html").expect("a valid url")
);
}

#[cfg(all(feature = "time", not(feature = "decentralized")))]
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.11.1"
version = "2.11.2"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.11.1"
version = "2.11.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.11.1"
version = "2.11.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.11.1"
version = "2.11.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.11.1"
version = "2.11.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down

0 comments on commit 8595e52

Please sign in to comment.