From 6cd558d402b8fe80df37d9e29804026a7d44ecde Mon Sep 17 00:00:00 2001 From: j-mendez Date: Fri, 25 Oct 2024 09:30:06 -0400 Subject: [PATCH] chore(transforms): add optional ignore tags --- Cargo.lock | 192 +++++++++++------- spider/Cargo.toml | 2 +- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_transformations/Cargo.toml | 2 +- .../src/transformation/content.rs | 31 ++- .../src/transformation/mod.rs | 16 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 9 files changed, 166 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 757db48d9..5dd258c05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,9 +114,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" dependencies = [ "anstyle", "anstyle-parse", @@ -129,36 +129,36 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -246,7 +246,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -676,7 +676,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -687,9 +687,9 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "compact_str" @@ -954,7 +954,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -978,7 +978,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -989,7 +989,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1025,7 +1025,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1035,7 +1035,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1048,7 +1048,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1082,6 +1082,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "dotenv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + [[package]] name = "dtoa" version = "1.0.9" @@ -1117,9 +1123,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -1133,7 +1139,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1231,9 +1237,9 @@ dependencies = [ [[package]] name = "fast_html2md" -version = "0.0.15" +version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cd1437cbb04999eb52557dc8c7449cc3c7d7fb3ba879ad367040ee02abe8097" +checksum = "1b2bb7e4e60024aa3c6a35f0badb53ad12b939b6efa7b82f18e8305a3035a783" dependencies = [ "auto_encoder", "html5ever", @@ -1405,7 +1411,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -1707,7 +1713,7 @@ dependencies = [ "markup5ever 0.12.1", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2179,9 +2185,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "llm_readability" -version = "0.0.10" +version = "0.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c9dea2799904be3c0544aefaec93f1b83259534710873e8e9c30ecfdb9a299" +checksum = "6ceb797337daff18c6b2fe710ea090619d75542e3bff8906c95d295ee5d5d1ad" dependencies = [ "auto_encoder", "html5ever", @@ -2323,7 +2329,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2361,7 +2367,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2475,7 +2481,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2646,7 +2652,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2860,7 +2866,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -2892,29 +2898,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -2984,7 +2990,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" dependencies = [ "once_cell", - "toml_edit", + "toml_edit 0.19.15", ] [[package]] @@ -3298,9 +3304,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -3720,7 +3726,7 @@ checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -3745,6 +3751,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3894,7 +3909,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.10.21" +version = "2.10.22" dependencies = [ "ahash", "async-openai", @@ -3955,7 +3970,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.10.21" +version = "2.10.22" dependencies = [ "adblock", "async-tungstenite", @@ -3990,7 +4005,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.10.21" +version = "2.10.22" dependencies = [ "clap", "env_logger", @@ -4014,7 +4029,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.10.21" +version = "2.10.22" dependencies = [ "aho-corasick", "fast_html2md", @@ -4035,7 +4050,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.10.21" +version = "2.10.22" dependencies = [ "indexmap 1.9.3", "serde", @@ -4047,7 +4062,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.10.21" +version = "2.10.22" dependencies = [ "env_logger", "lazy_static", @@ -4170,7 +4185,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -4213,9 +4228,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.84" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a2c4efbc0b0670e3d41f388e3cb936ff364bf681703b4c92ae26ca509966111" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -4317,7 +4332,7 @@ checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -4447,7 +4462,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -4530,11 +4545,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit 0.22.22", +] + [[package]] name = "toml_datetime" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] [[package]] name = "toml_edit" @@ -4544,7 +4574,20 @@ checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ "indexmap 2.6.0", "toml_datetime", - "winnow", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap 2.6.0", + "serde", + "serde_spanned", + "toml_datetime", + "winnow 0.6.20", ] [[package]] @@ -4573,7 +4616,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -4674,13 +4717,15 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ua_generator" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d4f7fcefca2ec9c0b34043d4cd8c233fb975607a6c51143e765d9f22c652d8" +checksum = "abb6278d04b350875180a99b024830d33d0ba3da9de724835f90a97faf1f0d19" dependencies = [ + "dotenv", "fastrand", "serde", "serde_json", + "toml", "ureq", ] @@ -4896,7 +4941,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", "wasm-bindgen-shared", ] @@ -4930,7 +4975,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4943,9 +4988,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -5072,7 +5117,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -5083,7 +5128,7 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] @@ -5273,6 +5318,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.50.0" @@ -5340,7 +5394,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.84", + "syn 2.0.85", ] [[package]] diff --git a/spider/Cargo.toml b/spider/Cargo.toml index e1b660566..80512529a 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.10.21" +version = "2.10.22" authors = [ "j-mendez " ] diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index 5f670d557..84a6acaa9 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.10.21" +version = "2.10.22" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index ac4fb2445..5b7510197 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.10.21" +version = "2.10.22" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index 57d47eb08..288dce628 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.10.21" +version = "2.10.22" authors = [ "j-mendez " ] diff --git a/spider_transformations/src/transformation/content.rs b/spider_transformations/src/transformation/content.rs index 2393d091f..2bab6a13a 100644 --- a/spider_transformations/src/transformation/content.rs +++ b/spider_transformations/src/transformation/content.rs @@ -109,6 +109,8 @@ pub struct TransformConfig { pub clean_html: bool, /// Filter svgs. pub filter_svg: bool, + /// Main content for the page. Exclude the nav, footer, and etc. + pub main_content: bool, } /// Select elements to show or hide using a CSS selector. @@ -318,6 +320,7 @@ pub fn transform_content( c: &TransformConfig, encoding: &Option, selector_config: &Option, + ignore_tags: &Option>, ) -> String { let base_html = get_html_with_selector(res, encoding, selector_config); @@ -337,7 +340,6 @@ pub fn transform_content( Some(u) => u, _ => &EXAMPLE_URL, }, - &None, ) { Ok(product) => product.content, _ => base_html, @@ -366,6 +368,18 @@ pub fn transform_content( tag_factory.insert(String::from("svg"), tag.clone()); } + if c.main_content { + tag_factory.insert(String::from("nav"), tag.clone()); + tag_factory.insert(String::from("footer"), tag.clone()); + tag_factory.insert(String::from("aside"), tag.clone()); + } + + if let Some(ignore) = ignore_tags { + for ignore_tag_name in ignore { + tag_factory.insert(ignore_tag_name.into(), tag.clone()); + } + } + let base_html = if c.clean_html { clean_html(&base_html) } else { @@ -399,6 +413,18 @@ pub fn transform_content( tag_factory.insert(String::from("svg"), tag.clone()); } + if c.main_content { + tag_factory.insert(String::from("nav"), tag.clone()); + tag_factory.insert(String::from("footer"), tag.clone()); + tag_factory.insert(String::from("aside"), tag.clone()); + } + + if let Some(ignore) = ignore_tags { + for ignore_tag_name in ignore { + tag_factory.insert(ignore_tag_name.into(), tag.clone()); + } + } + let base_html = if c.clean_html { clean_html(&base_html) } else { @@ -449,6 +475,7 @@ pub fn transform_content_to_bytes( c: &TransformConfig, encoding: &Option, selector_config: &Option, + ignore_tags: &Option>, ) -> Bytes { if is_binary_file(res.get_html_bytes_u8()) { let b = res.get_bytes(); @@ -458,7 +485,7 @@ pub fn transform_content_to_bytes( Default::default() } } else { - let content = transform_content(res, c, encoding, selector_config); + let content = transform_content(res, c, encoding, selector_config, ignore_tags); let b = content.as_bytes(); Bytes::copy_from_slice(b) } diff --git a/spider_transformations/src/transformation/mod.rs b/spider_transformations/src/transformation/mod.rs index 45c3d05f5..f3d94cd3f 100644 --- a/spider_transformations/src/transformation/mod.rs +++ b/spider_transformations/src/transformation/mod.rs @@ -55,7 +55,7 @@ mod tests { conf.return_format = ReturnFormat::Markdown; - let content = content::transform_content(&page, &conf, &None, &None); + let content = content::transform_content(&page, &conf, &None, &None, &None); assert!( content @@ -65,7 +65,7 @@ mod tests { conf.return_format = ReturnFormat::Html2Text; - let content = content::transform_content(&page, &conf, &None, &None); + let content = content::transform_content(&page, &conf, &None, &None, &None); assert!( content @@ -76,7 +76,7 @@ mod tests { conf.return_format = ReturnFormat::Bytes; conf.readability = true; - let content = content::transform_content(&page, &conf, &None, &None); + let content = content::transform_content(&page, &conf, &None, &None, &None); assert!( content @@ -85,7 +85,7 @@ mod tests { ); conf.return_format = ReturnFormat::XML; - let content = content::transform_content(&page, &conf, &Some("UTF-8".into()), &None); + let content = content::transform_content(&page, &conf, &Some("UTF-8".into()), &None, &None); assert!( content == r#" @@ -112,7 +112,7 @@ mod tests { conf.return_format = ReturnFormat::XML; page_response.content = Some(Bytes::from(markup)); let page = build(url, page_response); - let content = content::transform_content(&page, &conf, &None, &None); + let content = content::transform_content(&page, &conf, &None, &None, &None); assert!( content == r#"Transform Test

Fun is fun

Spider Cloud
The content is ready
"#, @@ -137,7 +137,7 @@ mod tests { select_config.root_selector = Some("pre".into()); - let content = content::transform_content(&page, &conf, &None, &Some(select_config)); + let content = content::transform_content(&page, &conf, &None, &Some(select_config), &None); assert!( content.contains(&"The content is ready"), @@ -162,7 +162,7 @@ mod tests { select_config.exclude_selector = Some("pre".into()); - let content = content::transform_content(&page, &conf, &None, &Some(select_config)); + let content = content::transform_content(&page, &conf, &None, &Some(select_config), &None); assert!( content.contains(&"Transform Test# Fun is fun\n[Spider Cloud](https://spider.cloud)"), @@ -187,7 +187,7 @@ mod tests { let page = build("https://example.com/example.pdf", page_response); - let content = content::transform_content(&page, &conf, &None, &None); + let content = content::transform_content(&page, &conf, &None, &None, &None); assert!(content.is_empty(), "The tranform to markdown is invalid"); } diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index 1fe67acbc..0f7da9023 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.10.21" +version = "2.10.22" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 74f3ee986..b01be0db6 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.10.21" +version = "2.10.22" authors = [ "j-mendez " ]