Skip to content

Commit

Permalink
perf(smart): add atomic re-render handling
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Nov 7, 2024
1 parent e323e29 commit edb7f4f
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 78 deletions.
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.13.1"
version = "2.13.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
116 changes: 50 additions & 66 deletions spider/src/page.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::borrow::Borrow;

use crate::compact_str::CompactString;

#[cfg(all(feature = "chrome", not(feature = "decentralized")))]
Expand All @@ -13,6 +15,7 @@ use hashbrown::HashSet;
use lol_html::Settings;
use regex::bytes::Regex;
use reqwest::StatusCode;
use tokio::sync::RwLock;
use tokio::time::Duration;

#[cfg(all(feature = "time", not(feature = "decentralized")))]
Expand Down Expand Up @@ -537,14 +540,10 @@ impl Page {
handle_response_bytes_writer, modify_selectors, setup_default_response,
AllowedDomainTypes,
};

let page_response = match client.get(url).send().await {
Ok(res) if res.status().is_success() => {
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel();
let mut senders: Option<(
tokio::sync::mpsc::UnboundedSender<String>,
tokio::sync::mpsc::UnboundedReceiver<String>,
)> = None;
let ssg_path = RwLock::new(String::new());

let base = match Url::parse(url) {
Ok(u) => Some(u),
Expand All @@ -571,8 +570,6 @@ impl Page {
let base_input_domain = &selectors.2; // the domain after redirects
let sub_matcher = &selectors.0;

// let prior_domain = self.domain_parsed.take();

let external_domains_caseless = external_domains_caseless.clone();

let base_links_settings = if r_settings.full_resources {
Expand Down Expand Up @@ -619,21 +616,18 @@ impl Page {
let mut element_content_handlers = vec![base_links_settings];

if r_settings.ssg_build {
let c = tokio::sync::mpsc::unbounded_channel();
let ctx = c.0.clone();

element_content_handlers.push(lol_html::element!("script", move |el| {
element_content_handlers.push(lol_html::element!("script", |el| {
if let Some(source) = el.get_attribute("src") {
if source.starts_with("/_next/static/")
&& source.ends_with("/_ssgManifest.js")
{
let _ = ctx.send(source);
if let Ok(mut writer) = ssg_path.try_write() {
*writer = source.to_string();
}
}
}
Ok(())
}));

senders.replace(c);
}

let settings = lol_html::send::Settings {
Expand Down Expand Up @@ -666,54 +660,45 @@ impl Page {

drop(rx);

if let Some(ctx) = senders {
let mut rtx = ctx.1;
drop(ctx.0);

if r_settings.ssg_build {
if let Some(mut ssg_map) = ssg_map {
let rc = rtx.recv().await;
if r_settings.ssg_build {
if let Some(mut ssg_map) = ssg_map {
let reader = ssg_path.read().await;
let source = reader.borrow();

if let Some(ref url_base) = base {
let build_ssg_path = convert_abs_path(&url_base, &source);
let build_page = Page::new_page(build_ssg_path.as_str(), &client).await;

for cap in SSG_CAPTURE.captures_iter(build_page.get_html_bytes_u8()) {
if let Some(matched) = cap.get(1) {
let href = auto_encode_bytes(&matched.as_bytes())
.replace(r#"\u002F"#, "/");

fn get_last_segment(path: &str) -> &str {
if let Some(pos) = path.rfind('/') {
&path[pos + 1..]
} else {
path
}
}

if let Some(source) = rc {
if let Some(ref url_base) = base {
let build_ssg_path = convert_abs_path(&url_base, &source);
let build_page =
Page::new_page(build_ssg_path.as_str(), &client).await;
let last_segment = get_last_segment(&href);

for cap in
SSG_CAPTURE.captures_iter(build_page.get_html_bytes_u8())
// we can pass in a static map of the dynamic SSG routes pre-hand, custom API endpoint to seed, or etc later.
if !(last_segment.starts_with("[")
&& last_segment.ends_with("]"))
{
if let Some(matched) = cap.get(1) {
let href = auto_encode_bytes(&matched.as_bytes())
.replace(r#"\u002F"#, "/");

fn get_last_segment(path: &str) -> &str {
if let Some(pos) = path.rfind('/') {
&path[pos + 1..]
} else {
path
}
}

let last_segment = get_last_segment(&href);

// we can pass in a static map of the dynamic SSG routes pre-hand, custom API endpoint to seed, or etc later.
if !(last_segment.starts_with("[")
&& last_segment.ends_with("]"))
{
push_link(
&base,
&href,
&mut ssg_map,
&selectors.0,
parent_host,
parent_host_scheme,
base_input_domain,
sub_matcher,
&external_domains_caseless,
);
}
}
push_link(
&base,
&href,
&mut ssg_map,
&selectors.0,
parent_host,
parent_host_scheme,
base_input_domain,
sub_matcher,
&external_domains_caseless,
);
}
}
}
Expand Down Expand Up @@ -1506,6 +1491,8 @@ impl Page {
configuration: &crate::configuration::Configuration,
context_id: &Option<chromiumoxide::cdp::browser_protocol::browser::BrowserContextId>,
) -> HashSet<A> {
use std::sync::atomic::{AtomicBool, Ordering};

use auto_encoder::auto_encode_bytes;
use lol_html::{doc_comments, element};

Expand All @@ -1520,9 +1507,7 @@ impl Page {
.await;
} else {
let (tx, rx) = tokio::sync::oneshot::channel();

let (txx, mut rxx) = tokio::sync::mpsc::unbounded_channel();
let (txxx, mut rxxx) = tokio::sync::mpsc::unbounded_channel();

let base_input_domain = &selectors.2;
let parent_frags = &selectors.1; // todo: allow mix match tpt
Expand All @@ -1535,13 +1520,13 @@ impl Page {
let base = self.base.clone();
let base1 = base.clone();

let txxx2 = txxx.clone();
let rerender = AtomicBool::new(false);

let mut static_app = false;

let rewriter_settings = Settings {
element_content_handlers: vec![
element!("script", move |element| {
element!("script", |element| {
if !static_app {
if let Some(src) = element.get_attribute("src") {
if src.starts_with("/") {
Expand All @@ -1563,7 +1548,7 @@ impl Page {
if p.ends_with(".js")
&& JS_FRAMEWORK_ASSETS.contains(&p)
{
let _ = txxx2.send(true);
rerender.swap(true, Ordering::Relaxed);
}
}
}
Expand Down Expand Up @@ -1630,7 +1615,6 @@ impl Page {
let _ = rewriter.end();
}

drop(txxx);
drop(txx);

let mut rewrited_bytes: Vec<u8> = Vec::new();
Expand All @@ -1639,7 +1623,7 @@ impl Page {
rewrited_bytes.extend_from_slice(&c);
}

let mut rerender = rxxx.recv().await.unwrap_or_default();
let mut rerender = rerender.load(Ordering::Relaxed);

if !rerender {
if let Some(_) = DOM_WATCH_METHODS.find(&rewrited_bytes) {
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.13.1"
version = "2.13.2"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.13.1"
version = "2.13.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.13.1"
version = "2.13.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.13.1"
version = "2.13.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.13.1"
version = "2.13.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down

0 comments on commit edb7f4f

Please sign in to comment.