diff --git a/Cargo.lock b/Cargo.lock index af1f3ead9..f646152a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3747,7 +3747,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.8.4" +version = "2.8.5" dependencies = [ "ahash", "async-openai", @@ -3781,6 +3781,7 @@ dependencies = [ "phf 0.11.2", "phf_codegen 0.11.2", "quick-xml", + "rand 0.8.5", "regex", "reqwest", "reqwest-middleware", @@ -3804,7 +3805,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.8.4" +version = "2.8.5" dependencies = [ "adblock", "async-tungstenite", @@ -3839,7 +3840,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.8.4" +version = "2.8.5" dependencies = [ "clap", "env_logger", @@ -3863,7 +3864,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.8.4" +version = "2.8.5" dependencies = [ "aho-corasick", "fast_html2md", @@ -3882,7 +3883,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.8.4" +version = "2.8.5" dependencies = [ "indexmap 1.9.3", "spider", @@ -3891,7 +3892,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.8.4" +version = "2.8.5" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 3b74840b3..6a4b0ec49 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.8.4" +version = "2.8.5" authors = [ "j-mendez " ] @@ -64,6 +64,7 @@ auto_encoder = { version = "0.1", optional = true } base64 = { version = "0.22", optional = true } string-interner = {version = "0.17", default-features = false, features = ["std", "inline-more", "backends"]} httpdate = { version = "1", optional = true } +rand = { version = "0.8", optional = true } [target.'cfg(all(not(windows), not(target_os = "android"), not(target_env = "musl")))'.dependencies] tikv-jemallocator = { version = "0.6", optional = true } @@ -139,7 +140,7 @@ chrome_intercept = ["chrome"] chrome_headless_new = ["chrome"] cookies = ["reqwest/cookies"] cron = ["dep:async_job", "dep:chrono", "dep:cron", "dep:async-trait"] -smart = ["chrome", "dep:regex", "chrome_intercept"] +smart = ["chrome", "dep:regex", "dep:rand", "chrome_intercept"] encoding = ["dep:auto_encoder"] headers = ["dep:httpdate"] real_browser = [] diff --git a/spider/src/utils/mod.rs b/spider/src/utils/mod.rs index 7b8f2927a..012ea7860 100644 --- a/spider/src/utils/mod.rs +++ b/spider/src/utils/mod.rs @@ -833,6 +833,25 @@ async fn navigate( Ok(()) } +#[cfg(all(feature = "real_browser", feature = "smart"))] +/// generate random mouse movement. +async fn perform_smart_mouse_movement(page: &chromiumoxide::Page) { + use chromiumoxide::layout::Point; + use rand::rngs::SmallRng; + use rand::{Rng, SeedableRng}; + + let mut rng = SmallRng::from_entropy(); + + // we can pass in the browser size once we allow re-adjusting it and real movements. + let random_x = rng.gen_range(0.0..1280.0); + let random_y = rng.gen_range(0.0..720.0); + + let _ = page.move_mouse(Point::new(random_x, random_y)).await; +} + +#[cfg(all(not(feature = "real_browser"), feature = "smart"))] +async fn perform_smart_mouse_movement(_page: &chromiumoxide::Page) {} + #[cfg(feature = "chrome")] /// Perform a network request to a resource extracting all content as text streaming via chrome. pub async fn fetch_page_html_chrome_base( @@ -859,6 +878,10 @@ pub async fn fetch_page_html_chrome_base( } } + if chrome_http_req_res.waf_check { + perform_smart_mouse_movement(&page).await; + } + // we do not need to wait for navigation if content is assigned. The method set_content already handles this. let final_url = if wait_for_navigation && !content { let last_redirect = tokio::time::timeout(tokio::time::Duration::from_secs(15), async { diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index cca0b3511..46cff02e0 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.8.4" +version = "2.8.5" rust-version = "1.70" authors = [ "j-mendez " @@ -48,7 +48,7 @@ reqwest = { version = "0.12", default-features = false } lazy_static = "1.5.0" phf = { version = "0.11", features = ["macros"] } adblock = { version = "0.8", optional = true, default-features = false, features = ["embedded-domain-resolver", "full-regex-handling"] } -rand = "0.8.5" +rand = "0.8" [target.'cfg(windows)'.dependencies] winreg = "0.52" diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 2ba814856..f6ca28560 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.8.4" +version = "2.8.5" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index 63e08cf9b..5a7de0cde 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.8.4" +version = "2.8.5" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index f6f9d8a18..e1dcfea72 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.8.4" +version = "2.8.5" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 34f4e1719..92a4530de 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.8.4" +version = "2.8.5" authors = [ "j-mendez " ]