Skip to content

Commit

Permalink
perf(smart): fix js need determination
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Nov 5, 2024
1 parent bc68858 commit 41cacf1
Show file tree
Hide file tree
Showing 12 changed files with 278 additions and 233 deletions.
16 changes: 8 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.11.20"
version = "2.12.4"
authors = [
"j-mendez <[email protected]>"
]
Expand Down Expand Up @@ -50,7 +50,7 @@ http-cache-reqwest = { version = "0.14.0", optional = true }
const_format = { version = "0.2", optional = true }
async-openai = { version = "0.25", optional = true }
tiktoken-rs = { version = "0.5", optional = true }
lol_html = { version = "1", optional = true }
lol_html = { version = "2" }
serde_json = { version = "1", optional = true }
quick-xml = { version = "0.36", features = ["serde", "serialize", "async-tokio"]}
moka = { version = "0.12", features = ["future"], optional = true }
Expand Down Expand Up @@ -147,7 +147,7 @@ smart = ["chrome", "dep:rand", "chrome_intercept"]
encoding = []
headers = ["dep:httpdate"]
real_browser = ["dep:statrs", "dep:rand"]
openai = ["chrome", "serde", "chrome_intercept", "dep:async-openai", "dep:tiktoken-rs", "dep:lol_html", "dep:serde_json"]
openai = ["chrome", "serde", "chrome_intercept", "dep:async-openai", "dep:tiktoken-rs", "dep:serde_json"]
openai_slim_fit = []
decentralized_headers = ["dep:const_format", "dep:itertools"]
spoof = ["dep:fastrand"]
Expand Down
4 changes: 1 addition & 3 deletions spider/src/packages/scraper/element_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ impl<'a> ElementRef<'a> {
create_missing_parent: false,
};
let mut buf = Vec::new();
match serialize(&mut buf, self, opts) {
_ => (),
};
let _ = serialize(&mut buf, self, opts);
// we need to get the initial encoding of the html lang if used.
auto_encoder::auto_encode_bytes(&buf)
}
Expand Down
4 changes: 1 addition & 3 deletions spider/src/packages/scraper/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,7 @@ impl Html {
create_missing_parent: false,
};
let mut buf = Vec::new();
match serialize(&mut buf, self, opts) {
_ => (),
};
let _ = serialize(&mut buf, self, opts);
auto_encoder::auto_encode_bytes(&buf)
}

Expand Down
11 changes: 6 additions & 5 deletions spider/src/packages/scraper/html/tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,16 @@ impl TreeSink for Html {

// Detach the given node from its parent.
fn remove_from_parent(&mut self, target: &Self::Handle) {
self.tree.get_mut(*target).unwrap().detach();
if let Some(mut p) = self.tree.get_mut(*target) {
p.detach();
}
}

// Remove all the children from node and append them to new_parent.
fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
self.tree
.get_mut(*new_parent)
.unwrap()
.reparent_from_id_append(*node);
if let Some(mut p) = self.tree.get_mut(*new_parent) {
p.reparent_from_id_append(*node);
}
}

// Add each attribute to the given element, if no attribute with that name already exists. The
Expand Down
Loading

0 comments on commit 41cacf1

Please sign in to comment.