Skip to content

Commit

Permalink
chore(chrome): patch logs stealth mode
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Oct 6, 2024
1 parent 5bce711 commit 51814aa
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 24 deletions.
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion examples/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_examples"
version = "2.6.33"
version = "2.8.6"
authors = [
"j-mendez <[email protected]>",
]
Expand Down
2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.8.5"
version = "2.8.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
16 changes: 8 additions & 8 deletions spider/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom

```toml
[dependencies]
spider = "2.0.12"
spider = "2"
```

And then the code:
Expand Down Expand Up @@ -93,7 +93,7 @@ We have the following optional feature flags.

```toml
[dependencies]
spider = { version = "2.0.12", features = ["regex", "ua_generator"] }
spider = { version = "2", features = ["regex", "ua_generator"] }
```

1. `ua_generator`: Enables auto generating a random real User-Agent.
Expand Down Expand Up @@ -139,7 +139,7 @@ Move processing to a worker, drastically increases performance even if worker is

```toml
[dependencies]
spider = { version = "2.0.12", features = ["decentralized"] }
spider = { version = "2", features = ["decentralized"] }
```

```sh
Expand Down Expand Up @@ -170,7 +170,7 @@ Use the subscribe method to get a broadcast channel.

```toml
[dependencies]
spider = { version = "2.0.12", features = ["sync"] }
spider = { version = "2", features = ["sync"] }
```

```rust,no_run
Expand Down Expand Up @@ -201,7 +201,7 @@ Allow regex for blacklisting routes

```toml
[dependencies]
spider = { version = "2.0.12", features = ["regex"] }
spider = { version = "2", features = ["regex"] }
```

```rust,no_run
Expand All @@ -228,7 +228,7 @@ If you are performing large workloads you may need to control the crawler by ena

```toml
[dependencies]
spider = { version = "2.0.12", features = ["control"] }
spider = { version = "2", features = ["control"] }
```

```rust
Expand Down Expand Up @@ -298,7 +298,7 @@ Use cron jobs to run crawls continuously at anytime.

```toml
[dependencies]
spider = { version = "2.0.12", features = ["sync", "cron"] }
spider = { version = "2", features = ["sync", "cron"] }
```

```rust,no_run
Expand Down Expand Up @@ -337,7 +337,7 @@ the feature flag [`chrome_intercept`] to possibly speed up request using Network

```toml
[dependencies]
spider = { version = "2.0.12", features = ["chrome", "chrome_intercept"] }
spider = { version = "2", features = ["chrome", "chrome_intercept"] }
```

You can use `website.crawl_concurrent_raw` to perform a crawl without chromium when needed. Use the feature flag `chrome_headed` to enable headful browser usage if needed to debug.
Expand Down
3 changes: 3 additions & 0 deletions spider/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,9 @@ async fn perform_smart_mouse_movement(page: &chromiumoxide::Page) {
#[cfg(all(not(feature = "real_browser"), feature = "smart"))]
async fn perform_smart_mouse_movement(_page: &chromiumoxide::Page) {}

#[cfg(all(not(feature = "real_browser"), not(feature = "smart")))]
async fn perform_smart_mouse_movement(_page: &chromiumoxide::Page) {}

#[cfg(feature = "chrome")]
/// Perform a network request to a resource extracting all content as text streaming via chrome.
pub async fn fetch_page_html_chrome_base(
Expand Down
47 changes: 47 additions & 0 deletions spider/src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2463,6 +2463,29 @@ impl Website {
)
.await;

let mut retry_count = shared.5.retry;

while page.should_retry && retry_count > 0 {
if let Some(timeout) = page.get_timeout() {
tokio::time::sleep(timeout).await;
}
page.clone_from(
&Page::new(
&target_url,
&shared.0,
&new_page,
&shared.5.wait_for,
&shared.5.screenshot,
false,
&shared.5.openai_config,
&shared.5.execution_scripts,
&shared.5.automation_scripts,
)
.await,
);
retry_count -= 1;
}

if add_external {
page.set_external(
shared
Expand Down Expand Up @@ -2730,6 +2753,30 @@ impl Website {
)
.await;

let mut retry_count = shared.6.retry;

while page.should_retry && retry_count > 0 {
if let Some(timeout) = page.get_timeout() {
tokio::time::sleep(timeout).await;
}
page.clone_from(
&Page::new(
&target_url,
&shared.0,
&new_page,
&shared.6.wait_for,
&shared.6.screenshot,
false,
&shared.6.openai_config,
&shared.6.execution_scripts,
&shared.6.automation_scripts,
)
.await,
);
retry_count -= 1;
}


match intercept_handle {
Some(h) => {
let _ = h.await;
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.8.5"
version = "2.8.7"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand Down
4 changes: 2 additions & 2 deletions spider_chrome/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,10 @@ impl Page {
Ok(())
}

/// Sets `window.chrome` on frame creation
/// Sets `window.chrome` on frame creation and console.log methods.
async fn hide_chrome(&self) -> Result<(), CdpError> {
self.execute(AddScriptToEvaluateOnNewDocumentParams {
source: "window.chrome = { runtime: {} };".to_string(),
source: "window.chrome = { runtime: {} };['log', 'warn', 'error', 'info', 'debug', 'table'].forEach((method) => { console[method] = () => {}; });".to_string(),
world_name: None,
include_command_line_api: None,
run_immediately: None,
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.8.5"
version = "2.8.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.8.5"
version = "2.8.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.8.5"
version = "2.8.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.8.5"
version = "2.8.7"
authors = [
"j-mendez <[email protected]>"
]
Expand Down

0 comments on commit 51814aa

Please sign in to comment.