diff --git a/Cargo.lock b/Cargo.lock index 757dec4a0..5d5bf371d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3365,7 +3365,7 @@ dependencies = [ [[package]] name = "spider" -version = "1.37.5" +version = "1.37.6" dependencies = [ "ahash", "bytes", @@ -3399,7 +3399,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "1.37.5" +version = "1.37.6" dependencies = [ "clap 3.2.25", "env_logger 0.9.3", @@ -3411,7 +3411,7 @@ dependencies = [ [[package]] name = "spider_examples" -version = "1.37.5" +version = "1.37.6" dependencies = [ "convert_case", "env_logger 0.9.3", @@ -3432,7 +3432,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "1.37.5" +version = "1.37.6" dependencies = [ "env_logger 0.10.0", "lazy_static", diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 94028de60..023a3c486 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_examples" -version = "1.37.5" +version = "1.37.6" authors = ["madeindjs ", "j-mendez "] description = "Multithreaded web crawler written in Rust." repository = "https://github.com/spider-rs/spider" @@ -22,7 +22,7 @@ htr = "0.5.27" flexbuffers = "2.0.0" [dependencies.spider] -version = "1.37.5" +version = "1.37.6" path = "../spider" features = ["serde"] diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 8cecfda41..ac982ad4e 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "1.37.5" +version = "1.37.6" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler written in Rust." repository = "https://github.com/spider-rs/spider" diff --git a/spider/README.md b/spider/README.md index 11e750235..83c18cb3f 100644 --- a/spider/README.md +++ b/spider/README.md @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom ```toml [dependencies] -spider = "1.37.5" +spider = "1.37.6" ``` And then the code: @@ -87,7 +87,7 @@ We have a couple optional feature flags. Regex blacklisting, jemaloc backend, gl ```toml [dependencies] -spider = { version = "1.37.5", features = ["regex", "ua_generator"] } +spider = { version = "1.37.6", features = ["regex", "ua_generator"] } ``` 1. `ua_generator`: Enables auto generating a random real User-Agent. @@ -109,7 +109,7 @@ Move processing to a worker, drastically increases performance even if worker is ```toml [dependencies] -spider = { version = "1.37.5", features = ["decentralized"] } +spider = { version = "1.37.6", features = ["decentralized"] } ``` ```sh @@ -130,7 +130,7 @@ Use the subscribe method to get a broadcast channel. ```toml [dependencies] -spider = { version = "1.37.5", features = ["sync"] } +spider = { version = "1.37.6", features = ["sync"] } ``` ```rust,no_run @@ -160,7 +160,7 @@ Allow regex for blacklisting routes ```toml [dependencies] -spider = { version = "1.37.5", features = ["regex"] } +spider = { version = "1.37.6", features = ["regex"] } ``` ```rust,no_run @@ -187,7 +187,7 @@ If you are performing large workloads you may need to control the crawler by ena ```toml [dependencies] -spider = { version = "1.37.5", features = ["control"] } +spider = { version = "1.37.6", features = ["control"] } ``` ```rust diff --git a/spider/src/website.rs b/spider/src/website.rs index 734a202a6..fa3b61fc2 100644 --- a/spider/src/website.rs +++ b/spider/src/website.rs @@ -234,6 +234,11 @@ impl Website { &self.links_visited } + /// domain name getter + pub fn get_domain(&self) -> &CaseInsensitiveString { + &self.domain + } + /// crawl delay getter fn get_delay(&self) -> Duration { Duration::from_millis(self.configuration.delay) diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index fc9ca112d..aae5ef77c 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "1.37.5" +version = "1.37.6" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler CLI written in Rust." repository = "https://github.com/spider-rs/spider" @@ -26,7 +26,7 @@ quote = "1.0.18" failure_derive = "0.1.8" [dependencies.spider] -version = "1.37.5" +version = "1.37.6" path = "../spider" [[bin]] diff --git a/spider_cli/README.md b/spider_cli/README.md index 0232fb32e..2848e2304 100644 --- a/spider_cli/README.md +++ b/spider_cli/README.md @@ -40,7 +40,7 @@ spider --domain http://localhost:3000 download -t _temp_spider_downloads ``` ```sh -spider_cli 1.37.5 +spider_cli 1.37.6 madeindjs , j-mendez The fastest web crawler CLI written in Rust. diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 9c5260b08..f83fdd442 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "1.37.5" +version = "1.37.6" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler CLI written in Rust." repository = "https://github.com/spider-rs/spider" @@ -22,7 +22,7 @@ lazy_static = "1.4.0" env_logger = "0.10.0" [dependencies.spider] -version = "1.37.5" +version = "1.37.6" path = "../spider" features = ["serde", "flexbuffers"]