diff --git a/Cargo.lock b/Cargo.lock index 3723af020..d3b6bb8ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3917,7 +3917,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.11.2" +version = "2.11.3" dependencies = [ "ahash", "async-openai", @@ -3978,7 +3978,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.11.2" +version = "2.11.3" dependencies = [ "adblock", "async-tungstenite", @@ -4013,7 +4013,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.11.2" +version = "2.11.3" dependencies = [ "clap", "env_logger", @@ -4037,7 +4037,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.11.2" +version = "2.11.3" dependencies = [ "aho-corasick", "fast_html2md", @@ -4059,7 +4059,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.11.2" +version = "2.11.3" dependencies = [ "indexmap 1.9.3", "serde", @@ -4071,7 +4071,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.11.2" +version = "2.11.3" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index ba7b007d2..3a0ced0bb 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.11.2" +version = "2.11.3" authors = [ "j-mendez " ] diff --git a/spider/src/features/chrome_common.rs b/spider/src/features/chrome_common.rs index f49c792fe..231da3c0d 100644 --- a/spider/src/features/chrome_common.rs +++ b/spider/src/features/chrome_common.rs @@ -447,6 +447,13 @@ pub enum WebAutomation { Wait(u64), /// Waits for the next navigation event. WaitForNavigation, + /// Wait for dom updates to stop. + WaitForDom { + /// The selector of the input element to fill. + selector: Option, + /// The timeout to wait for. + timeout: u32, + }, /// Waits for an element to appear. WaitFor(String), /// Waits for an element to appear and then clicks on it. @@ -475,6 +482,25 @@ pub enum WebAutomation { }, } +#[cfg(feature = "chrome")] +/// Generate the wait for function. +fn generate_wait_for_dom_js_code_with_selector(timeout: u32, selector: Option<&str>) -> String { + // Ensure the timeout doesn't exceed the maximum limit of 60000 milliseconds. + let clamped_timeout = if timeout > 60000 { 60000 } else { timeout }; + let query_selector = selector.unwrap_or("body"); + + format!( + "function w(){{return new Promise((r,j)=>{{ \ + let t={}; \ + let i=setTimeout(()=>{{j(new Error('Timeout: DOM did not update within the allowed time.'))}},t); \ + if(document.querySelector('{}')){{clearTimeout(i);r();}}else{{ \ + const o=new MutationObserver((m,a)=>{{ \ + if(document.querySelector('{}')){{a.disconnect();clearTimeout(i);r();}}}}); \ + o.observe(document,{{childList:true,subtree:true}});}}}});}}", + clamped_timeout, query_selector, query_selector + ) +} + impl WebAutomation { #[cfg(feature = "chrome")] /// Run the web automation step. @@ -497,6 +523,14 @@ impl WebAutomation { WebAutomation::Wait(ms) => { tokio::time::sleep(Duration::from_millis(*ms).min(Duration::from_secs(60))).await; } + WebAutomation::WaitForDom { selector, timeout } => { + let _ = page + .evaluate( + generate_wait_for_dom_js_code_with_selector(*timeout, selector.as_deref()) + .as_str(), + ) + .await; + } WebAutomation::WaitFor(selector) => { wait_for_selector(page, Some(Duration::from_secs(60)), &selector).await; } diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index 69d22c960..ac17892fa 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.11.2" +version = "2.11.3" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 1f6b15546..f51c16998 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.11.2" +version = "2.11.3" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index 7ea91c09a..501c9800e 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.11.2" +version = "2.11.3" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index c0a05a486..ed24a56d3 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.11.2" +version = "2.11.3" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 1c20a74be..7b4e8de91 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.11.2" +version = "2.11.3" authors = [ "j-mendez " ]