From cb09258ce7b996b685684b933de86d8780b2182d Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Mon, 11 Mar 2024 16:40:11 +0100 Subject: [PATCH 01/24] Create request chain with basic authentication --- lychee-lib/src/chain/mod.rs | 39 +++++++++++++++++++++++++++++++++++++ lychee-lib/src/client.rs | 31 ++++++++++++++++------------- lychee-lib/src/lib.rs | 1 + 3 files changed, 58 insertions(+), 13 deletions(-) create mode 100644 lychee-lib/src/chain/mod.rs diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs new file mode 100644 index 0000000000..359983dac1 --- /dev/null +++ b/lychee-lib/src/chain/mod.rs @@ -0,0 +1,39 @@ +use headers::authorization::Credentials; +use http::header::AUTHORIZATION; +use reqwest::Request; + +use crate::BasicAuthCredentials; + +pub(crate) type Chain = Vec + Send>>; + +pub(crate) fn traverse(chain: Chain, mut input: T) -> T { + for mut e in chain { + input = e.handle(input) + } + + input +} + +pub(crate) trait Chainable { + fn handle(&mut self, input: T) -> T; +} + +pub(crate) struct BasicAuth { + credentials: BasicAuthCredentials, +} + +impl BasicAuth { + pub(crate) fn new(credentials: BasicAuthCredentials) -> Self { + Self { credentials } + } +} + +impl Chainable for BasicAuth { + fn handle(&mut self, mut request: Request) -> Request { + request.headers_mut().append( + AUTHORIZATION, + self.credentials.to_authorization().0.encode(), + ); + request + } +} diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 801d4e9891..fa469d59a5 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -17,9 +17,8 @@ use std::{collections::HashSet, path::Path, sync::Arc, time::Duration}; #[cfg(all(feature = "email-check", feature = "native-tls"))] use check_if_email_exists::{check_email, CheckEmailInput, Reachable}; -use headers::authorization::Credentials; use http::{ - header::{HeaderMap, HeaderValue, AUTHORIZATION}, + header::{HeaderMap, HeaderValue}, StatusCode, }; use log::{debug, warn}; @@ -31,6 +30,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ + chain::{traverse, BasicAuth, Chain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, @@ -647,23 +647,28 @@ impl Client { /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). async fn check_default(&self, uri: &Uri, credentials: &Option) -> Status { - let request = match credentials { - Some(credentials) => self - .reqwest_client - .request(self.method.clone(), uri.as_str()) - .header(AUTHORIZATION, credentials.to_authorization().0.encode()) - .build(), - None => self - .reqwest_client - .request(self.method.clone(), uri.as_str()) - .build(), - }; + // todo: middleware + + // todo: create credentials middleware + let request = self + .reqwest_client + .request(self.method.clone(), uri.as_str()) + .build(); let request = match request { Ok(r) => r, Err(e) => return e.into(), }; + let mut chain: Chain = vec![]; + + if let Some(c) = credentials { + chain.push(Box::new(BasicAuth::new(c.clone()))); + } + + let request = traverse(chain, request); + + // todo: quirks middleware let request = self.quirks.apply(request); match self.reqwest_client.execute(request).await { diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index a74b899bc5..dd413868c4 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -54,6 +54,7 @@ mod basic_auth; mod client; /// A pool of clients, to handle concurrent checks pub mod collector; +mod chain; mod quirks; mod retry; mod types; From 3c0747edcca32ba80db69707044b63ad56432694 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Mon, 11 Mar 2024 16:48:59 +0100 Subject: [PATCH 02/24] Test chain --- lychee-lib/src/chain/mod.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 359983dac1..222531488d 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -37,3 +37,24 @@ impl Chainable for BasicAuth { request } } + +mod test { + use super::Chainable; + + struct Add(i64); + + struct Request(i64); + + impl Chainable for Add { + fn handle(&mut self, req: Request) -> Request { + Request(req.0 + self.0) + } + } + + #[test] + fn example_chain() { + let chain: crate::chain::Chain = vec![Box::new(Add(10)), Box::new(Add(-3))]; + let result = crate::chain::traverse(chain, Request(0)); + assert_eq!(result.0, 7); + } +} From bbb47923c5fa0b7f0e8b41c09343808b040919b7 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Tue, 12 Mar 2024 08:49:46 +0100 Subject: [PATCH 03/24] Add quirks to request chain --- lychee-lib/src/client.rs | 5 +---- lychee-lib/src/quirks/mod.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index fa469d59a5..66b4266242 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -660,7 +660,7 @@ impl Client { Err(e) => return e.into(), }; - let mut chain: Chain = vec![]; + let mut chain: Chain = vec![Box::new(self.quirks.clone())]; if let Some(c) = credentials { chain.push(Box::new(BasicAuth::new(c.clone()))); @@ -668,9 +668,6 @@ impl Client { let request = traverse(chain, request); - // todo: quirks middleware - let request = self.quirks.apply(request); - match self.reqwest_client.execute(request).await { Ok(ref response) => Status::new(response, self.accepted.clone()), Err(e) => e.into(), diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index 7f30cba854..76ffe8fb76 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -1,3 +1,4 @@ +use crate::chain::Chainable; use header::HeaderValue; use http::header; use once_cell::sync::Lazy; @@ -71,11 +72,11 @@ impl Default for Quirks { } } -impl Quirks { - /// Apply quirks to a given request. Only the first quirk regex pattern +impl Chainable for Quirks { + /// Handle quirks in a given request. Only the first quirk regex pattern /// matching the URL will be applied. The rest will be discarded for /// simplicity reasons. This limitation might be lifted in the future. - pub(crate) fn apply(&self, request: Request) -> Request { + fn handle(&mut self, request: Request) -> Request { for quirk in &self.quirks { if quirk.pattern.is_match(request.url().as_str()) { return (quirk.rewrite)(request); @@ -92,6 +93,8 @@ mod tests { use http::{header, Method}; use reqwest::{Request, Url}; + use crate::chain::Chainable; + use super::Quirks; #[derive(Debug)] @@ -113,7 +116,7 @@ mod tests { fn test_cratesio_request() { let url = Url::parse("https://crates.io/crates/lychee").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().apply(request); + let modified = Quirks::default().handle(request); assert_eq!( modified.headers().get(header::ACCEPT).unwrap(), @@ -125,7 +128,7 @@ mod tests { fn test_youtube_video_request() { let url = Url::parse("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().apply(request); + let modified = Quirks::default().handle(request); let expected_url = Url::parse("https://img.youtube.com/vi/NlKuICiT470/0.jpg").unwrap(); assert_eq!( @@ -138,7 +141,7 @@ mod tests { fn test_youtube_video_shortlink_request() { let url = Url::parse("https://youtu.be/Rvu7N4wyFpk?t=42").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().apply(request); + let modified = Quirks::default().handle(request); let expected_url = Url::parse("https://img.youtube.com/vi/Rvu7N4wyFpk/0.jpg").unwrap(); assert_eq!( @@ -151,7 +154,7 @@ mod tests { fn test_non_video_youtube_url_untouched() { let url = Url::parse("https://www.youtube.com/channel/UCaYhcUwRBNscFNUKTjgPFiA").unwrap(); let request = Request::new(Method::GET, url.clone()); - let modified = Quirks::default().apply(request); + let modified = Quirks::default().handle(request); assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url)); } @@ -160,7 +163,7 @@ mod tests { fn test_no_quirk_applied() { let url = Url::parse("https://endler.dev").unwrap(); let request = Request::new(Method::GET, url.clone()); - let modified = Quirks::default().apply(request); + let modified = Quirks::default().handle(request); assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url)); } From 95b046a078db1a1432bff405718c94b8cf614026 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Tue, 12 Mar 2024 14:48:38 +0100 Subject: [PATCH 04/24] Pass down request_chain instead of credentials & add test --- lychee-lib/src/chain/mod.rs | 34 ++++++++++++---- lychee-lib/src/client.rs | 80 +++++++++++++++++++++---------------- 2 files changed, 72 insertions(+), 42 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 222531488d..7b9a564432 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,23 +1,38 @@ +use core::fmt::Debug; use headers::authorization::Credentials; use http::header::AUTHORIZATION; use reqwest::Request; use crate::BasicAuthCredentials; -pub(crate) type Chain = Vec + Send>>; +pub(crate) type RequestChain = Chain; -pub(crate) fn traverse(chain: Chain, mut input: T) -> T { - for mut e in chain { - input = e.handle(input) +#[derive(Debug)] +pub struct Chain(Vec + Send>>); + +impl Chain { + pub(crate) fn new(values: Vec + Send>>) -> Self { + Self(values) + } + + pub(crate) fn traverse(&mut self, mut input: T) -> T { + for e in self.0.iter_mut() { + input = e.handle(input) + } + + input } - input + pub(crate) fn push(&mut self, value: Box + Send>) { + self.0.push(value); + } } -pub(crate) trait Chainable { +pub(crate) trait Chainable: Debug { fn handle(&mut self, input: T) -> T; } +#[derive(Debug)] pub(crate) struct BasicAuth { credentials: BasicAuthCredentials, } @@ -41,8 +56,10 @@ impl Chainable for BasicAuth { mod test { use super::Chainable; + #[derive(Debug)] struct Add(i64); + #[derive(Debug)] struct Request(i64); impl Chainable for Add { @@ -53,8 +70,9 @@ mod test { #[test] fn example_chain() { - let chain: crate::chain::Chain = vec![Box::new(Add(10)), Box::new(Add(-3))]; - let result = crate::chain::traverse(chain, Request(0)); + use super::Chain; + let mut chain: Chain = Chain::new(vec![Box::new(Add(10)), Box::new(Add(-3))]); + let result = chain.traverse(Request(0)); assert_eq!(result.0, 7); } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 66b4266242..00c11b647c 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,14 +30,14 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{traverse, BasicAuth, Chain}, + chain::{BasicAuth, Chain, RequestChain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, retry::RetryExt, types::uri::github::GithubUri, utils::fragment_checker::FragmentChecker, - BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri, + ErrorKind, Request, Response, Result, Status, Uri, }; #[cfg(all(feature = "email-check", feature = "native-tls"))] @@ -374,8 +374,6 @@ impl ClientBuilder { include_mail: self.include_mail, }; - let quirks = Quirks::default(); - Ok(Client { reqwest_client, github_client, @@ -386,7 +384,6 @@ impl ClientBuilder { method: self.method, accepted: self.accepted, require_https: self.require_https, - quirks, include_fragments: self.include_fragments, fragment_checker: FragmentChecker::new(), }) @@ -433,9 +430,6 @@ pub struct Client { /// This would treat unencrypted links as errors when HTTPS is available. require_https: bool, - /// Override behaviors for certain known issues with special URIs. - quirks: Quirks, - /// Enable the checking of fragments in links. include_fragments: bool, @@ -477,16 +471,23 @@ impl Client { // )); // } - self.remap(uri)?; + self.remap(uri)?; // todo: possible to do in request chain? if self.is_excluded(uri) { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } + let quirks = Quirks::default(); + let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); + + if let Some(c) = credentials { + request_chain.push(Box::new(BasicAuth::new(c.clone()))); + } + let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, credentials).await?, + _ => self.check_website(uri, &mut request_chain).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -522,12 +523,12 @@ impl Client { pub async fn check_website( &self, uri: &Uri, - credentials: &Option, + request_chain: &mut RequestChain, ) -> Result { - match self.check_website_inner(uri, credentials).await { + match self.check_website_inner(uri, request_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, credentials) + .check_website_inner(&uri.to_https()?, request_chain) .await .is_success() { @@ -552,11 +553,7 @@ impl Client { /// - The URI is invalid. /// - The request failed. /// - The response status code is not accepted. - pub async fn check_website_inner( - &self, - uri: &Uri, - credentials: &Option, - ) -> Status { + pub async fn check_website_inner(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { // Workaround for upstream reqwest panic if validate_url(&uri.url) { if matches!(uri.scheme(), "http" | "https") { @@ -571,7 +568,7 @@ impl Client { return Status::Unsupported(ErrorKind::InvalidURI(uri.clone())); } - let status = self.retry_request(uri, credentials).await; + let status = self.retry_request(uri, request_chain).await; if status.is_success() { return status; } @@ -593,11 +590,11 @@ impl Client { /// Retry requests up to `max_retries` times /// with an exponential backoff. - async fn retry_request(&self, uri: &Uri, credentials: &Option) -> Status { + async fn retry_request(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { let mut retries: u64 = 0; let mut wait_time = self.retry_wait_time; - let mut status = self.check_default(uri, credentials).await; + let mut status = self.check_default(uri, request_chain).await; while retries < self.max_retries { if status.is_success() || !status.should_retry() { return status; @@ -605,7 +602,7 @@ impl Client { retries += 1; tokio::time::sleep(wait_time).await; wait_time = wait_time.saturating_mul(2); - status = self.check_default(uri, credentials).await; + status = self.check_default(uri, request_chain).await; } status } @@ -646,10 +643,7 @@ impl Client { } /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). - async fn check_default(&self, uri: &Uri, credentials: &Option) -> Status { - // todo: middleware - - // todo: create credentials middleware + async fn check_default(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { let request = self .reqwest_client .request(self.method.clone(), uri.as_str()) @@ -660,13 +654,7 @@ impl Client { Err(e) => return e.into(), }; - let mut chain: Chain = vec![Box::new(self.quirks.clone())]; - - if let Some(c) = credentials { - chain.push(Box::new(BasicAuth::new(c.clone()))); - } - - let request = traverse(chain, request); + let request = request_chain.traverse(request); match self.reqwest_client.execute(request).await { Ok(ref response) => Status::new(response, self.accepted.clone()), @@ -773,7 +761,7 @@ mod tests { use wiremock::matchers::path; use super::ClientBuilder; - use crate::{mock_server, test_utils::get_mock_client_response, Uri}; + use crate::{mock_server, test_utils::get_mock_client_response, Request, Uri}; #[tokio::test] async fn test_nonexistent() { @@ -820,6 +808,30 @@ mod tests { assert!(res.status().is_failure()); } + #[tokio::test] + async fn test_basic_auth() { + let mut r = Request::new( + "https://authenticationtest.com/HTTPAuth/" + .try_into() + .unwrap(), + crate::InputSource::Stdin, + None, + None, + None, + ); + + let res = get_mock_client_response(r.clone()).await; + assert_eq!(res.status().code(), Some(401.try_into().unwrap())); + + r.credentials = Some(crate::BasicAuthCredentials { + username: "user".into(), + password: "pass".into(), + }); + + let res = get_mock_client_response(r.clone()).await; + assert!(res.status().is_success()); + } + #[tokio::test] async fn test_non_github() { let mock_server = mock_server!(StatusCode::OK); From e7020864837313349d1bfa6a7d1cca90362fbc6f Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 13 Mar 2024 08:25:01 +0100 Subject: [PATCH 05/24] Introduce early exit in chain --- lychee-lib/src/chain/mod.rs | 76 ++++++++++++++++++++++++------------ lychee-lib/src/client.rs | 12 ++++-- lychee-lib/src/quirks/mod.rs | 29 ++++++++------ 3 files changed, 77 insertions(+), 40 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 7b9a564432..8ea86b960d 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,35 +1,46 @@ +use crate::{BasicAuthCredentials, Response}; use core::fmt::Debug; use headers::authorization::Credentials; use http::header::AUTHORIZATION; use reqwest::Request; -use crate::BasicAuthCredentials; +#[derive(Debug, PartialEq)] +pub(crate) enum ChainResult { + Chained(T), + EarlyExit(R), +} -pub(crate) type RequestChain = Chain; +pub(crate) type RequestChain = Chain; #[derive(Debug)] -pub struct Chain(Vec + Send>>); +pub struct Chain(Vec + Send>>); -impl Chain { - pub(crate) fn new(values: Vec + Send>>) -> Self { +impl Chain { + pub(crate) fn new(values: Vec + Send>>) -> Self { Self(values) } - pub(crate) fn traverse(&mut self, mut input: T) -> T { + pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { + use ChainResult::*; for e in self.0.iter_mut() { - input = e.handle(input) + match e.handle(input) { + Chained(r) => input = r, + EarlyExit(r) => { + return EarlyExit(r); + } + } } - input + Chained(input) } - pub(crate) fn push(&mut self, value: Box + Send>) { + pub(crate) fn push(&mut self, value: Box + Send>) { self.0.push(value); } } -pub(crate) trait Chainable: Debug { - fn handle(&mut self, input: T) -> T; +pub(crate) trait Chainable: Debug { + fn handle(&mut self, input: T) -> ChainResult; } #[derive(Debug)] @@ -43,36 +54,51 @@ impl BasicAuth { } } -impl Chainable for BasicAuth { - fn handle(&mut self, mut request: Request) -> Request { +impl Chainable for BasicAuth { + fn handle(&mut self, mut request: Request) -> ChainResult { request.headers_mut().append( AUTHORIZATION, self.credentials.to_authorization().0.encode(), ); - request + ChainResult::Chained(request) } } mod test { - use super::Chainable; + use super::{ChainResult, ChainResult::*, Chainable}; #[derive(Debug)] struct Add(i64); - #[derive(Debug)] - struct Request(i64); - - impl Chainable for Add { - fn handle(&mut self, req: Request) -> Request { - Request(req.0 + self.0) + #[derive(Debug, PartialEq, Eq)] + struct Result(i64); + + impl Chainable for Add { + fn handle(&mut self, req: Result) -> ChainResult { + let added = req.0 + self.0; + if added > 100 { + EarlyExit(Result(req.0)) + } else { + Chained(Result(added)) + } } } #[test] - fn example_chain() { + fn simple_chain() { + use super::Chain; + let mut chain: Chain = + Chain::new(vec![Box::new(Add(10)), Box::new(Add(-3))]); + let result = chain.traverse(Result(0)); + assert_eq!(result, Chained(Result(7))); + } + + #[test] + fn early_exit_chain() { use super::Chain; - let mut chain: Chain = Chain::new(vec![Box::new(Add(10)), Box::new(Add(-3))]); - let result = chain.traverse(Request(0)); - assert_eq!(result.0, 7); + let mut chain: Chain = + Chain::new(vec![Box::new(Add(80)), Box::new(Add(30)), Box::new(Add(1))]); + let result = chain.traverse(Result(0)); + assert_eq!(result, EarlyExit(Result(80))); } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 00c11b647c..3e2d3e3cd9 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,6 +30,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ + chain::ChainResult::*, chain::{BasicAuth, Chain, RequestChain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, @@ -654,11 +655,14 @@ impl Client { Err(e) => return e.into(), }; - let request = request_chain.traverse(request); + let result = request_chain.traverse(request); - match self.reqwest_client.execute(request).await { - Ok(ref response) => Status::new(response, self.accepted.clone()), - Err(e) => e.into(), + match result { + EarlyExit(r) => r.1.status, + Chained(r) => match self.reqwest_client.execute(r).await { + Ok(ref response) => Status::new(response, self.accepted.clone()), + Err(e) => e.into(), + }, } } diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index 76ffe8fb76..e93d392cb7 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -1,4 +1,7 @@ -use crate::chain::Chainable; +use crate::{ + chain::{ChainResult, Chainable}, + Response, +}; use header::HeaderValue; use http::header; use once_cell::sync::Lazy; @@ -72,11 +75,11 @@ impl Default for Quirks { } } -impl Chainable for Quirks { - /// Handle quirks in a given request. Only the first quirk regex pattern +impl Quirks { + /// Apply quirks to a given request. Only the first quirk regex pattern /// matching the URL will be applied. The rest will be discarded for /// simplicity reasons. This limitation might be lifted in the future. - fn handle(&mut self, request: Request) -> Request { + pub(crate) fn apply(&self, request: Request) -> Request { for quirk in &self.quirks { if quirk.pattern.is_match(request.url().as_str()) { return (quirk.rewrite)(request); @@ -87,14 +90,18 @@ impl Chainable for Quirks { } } +impl Chainable for Quirks { + fn handle(&mut self, input: Request) -> ChainResult { + ChainResult::Chained(self.apply(input)) + } +} + #[cfg(test)] mod tests { use header::HeaderValue; use http::{header, Method}; use reqwest::{Request, Url}; - use crate::chain::Chainable; - use super::Quirks; #[derive(Debug)] @@ -116,7 +123,7 @@ mod tests { fn test_cratesio_request() { let url = Url::parse("https://crates.io/crates/lychee").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().handle(request); + let modified = Quirks::default().apply(request); assert_eq!( modified.headers().get(header::ACCEPT).unwrap(), @@ -128,7 +135,7 @@ mod tests { fn test_youtube_video_request() { let url = Url::parse("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().handle(request); + let modified = Quirks::default().apply(request); let expected_url = Url::parse("https://img.youtube.com/vi/NlKuICiT470/0.jpg").unwrap(); assert_eq!( @@ -141,7 +148,7 @@ mod tests { fn test_youtube_video_shortlink_request() { let url = Url::parse("https://youtu.be/Rvu7N4wyFpk?t=42").unwrap(); let request = Request::new(Method::GET, url); - let modified = Quirks::default().handle(request); + let modified = Quirks::default().apply(request); let expected_url = Url::parse("https://img.youtube.com/vi/Rvu7N4wyFpk/0.jpg").unwrap(); assert_eq!( @@ -154,7 +161,7 @@ mod tests { fn test_non_video_youtube_url_untouched() { let url = Url::parse("https://www.youtube.com/channel/UCaYhcUwRBNscFNUKTjgPFiA").unwrap(); let request = Request::new(Method::GET, url.clone()); - let modified = Quirks::default().handle(request); + let modified = Quirks::default().apply(request); assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url)); } @@ -163,7 +170,7 @@ mod tests { fn test_no_quirk_applied() { let url = Url::parse("https://endler.dev").unwrap(); let request = Request::new(Method::GET, url.clone()); - let modified = Quirks::default().handle(request); + let modified = Quirks::default().apply(request); assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url)); } From d9a3d1d2618f3fdfcd81bafed6392cc0ae581417 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 13 Mar 2024 08:35:19 +0100 Subject: [PATCH 06/24] Implement Chainable directly for BasicAuthCredentials --- lychee-lib/src/chain/mod.rs | 25 +------------------ lychee-lib/src/client.rs | 4 +-- .../src/types/basic_auth/credentials.rs | 17 +++++++++++++ 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 8ea86b960d..97d534f94d 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,7 +1,5 @@ -use crate::{BasicAuthCredentials, Response}; +use crate::Response; use core::fmt::Debug; -use headers::authorization::Credentials; -use http::header::AUTHORIZATION; use reqwest::Request; #[derive(Debug, PartialEq)] @@ -43,27 +41,6 @@ pub(crate) trait Chainable: Debug { fn handle(&mut self, input: T) -> ChainResult; } -#[derive(Debug)] -pub(crate) struct BasicAuth { - credentials: BasicAuthCredentials, -} - -impl BasicAuth { - pub(crate) fn new(credentials: BasicAuthCredentials) -> Self { - Self { credentials } - } -} - -impl Chainable for BasicAuth { - fn handle(&mut self, mut request: Request) -> ChainResult { - request.headers_mut().append( - AUTHORIZATION, - self.credentials.to_authorization().0.encode(), - ); - ChainResult::Chained(request) - } -} - mod test { use super::{ChainResult, ChainResult::*, Chainable}; diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 3e2d3e3cd9..48e4c742e0 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -31,7 +31,7 @@ use typed_builder::TypedBuilder; use crate::{ chain::ChainResult::*, - chain::{BasicAuth, Chain, RequestChain}, + chain::{Chain, RequestChain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, @@ -482,7 +482,7 @@ impl Client { let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); if let Some(c) = credentials { - request_chain.push(Box::new(BasicAuth::new(c.clone()))); + request_chain.push(Box::new(c.clone())); } let status = match uri.scheme() { diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 2452fb973b..bfbc519595 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -1,9 +1,17 @@ use std::str::FromStr; +use headers::authorization::Credentials; use headers::{authorization::Basic, Authorization}; +use http::header::AUTHORIZATION; +use reqwest::Request; use serde::Deserialize; use thiserror::Error; +use crate::{ + chain::{ChainResult, Chainable}, + Response, +}; + #[derive(Copy, Clone, Debug, Error, PartialEq)] pub enum BasicAuthCredentialsParseError { #[error("Invalid basic auth credentials syntax")] @@ -66,3 +74,12 @@ impl BasicAuthCredentials { Authorization::basic(&self.username, &self.password) } } + +impl Chainable for BasicAuthCredentials { + fn handle(&mut self, mut request: Request) -> ChainResult { + request + .headers_mut() + .append(AUTHORIZATION, self.to_authorization().0.encode()); + ChainResult::Chained(request) + } +} From 811a73b6a418524056d13129dd8d11df7c4851da Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 13 Mar 2024 09:45:38 +0100 Subject: [PATCH 07/24] Move chain into check_website function --- lychee-lib/src/chain/mod.rs | 4 +--- lychee-lib/src/client.rs | 28 +++++++++++++--------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 97d534f94d..bbfd0d29aa 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,6 +1,4 @@ -use crate::Response; use core::fmt::Debug; -use reqwest::Request; #[derive(Debug, PartialEq)] pub(crate) enum ChainResult { @@ -8,7 +6,7 @@ pub(crate) enum ChainResult { EarlyExit(R), } -pub(crate) type RequestChain = Chain; +pub(crate) type RequestChain = Chain; #[derive(Debug)] pub struct Chain(Vec + Send>>); diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 48e4c742e0..f561e799dc 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,15 +30,14 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::ChainResult::*, - chain::{Chain, RequestChain}, + chain::{Chain, ChainResult::*, RequestChain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, retry::RetryExt, types::uri::github::GithubUri, utils::fragment_checker::FragmentChecker, - ErrorKind, Request, Response, Result, Status, Uri, + BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri, }; #[cfg(all(feature = "email-check", feature = "native-tls"))] @@ -472,23 +471,16 @@ impl Client { // )); // } - self.remap(uri)?; // todo: possible to do in request chain? + self.remap(uri)?; if self.is_excluded(uri) { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } - let quirks = Quirks::default(); - let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); - - if let Some(c) = credentials { - request_chain.push(Box::new(c.clone())); - } - let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, &mut request_chain).await?, + _ => self.check_website(uri, credentials).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -524,12 +516,18 @@ impl Client { pub async fn check_website( &self, uri: &Uri, - request_chain: &mut RequestChain, + credentials: &Option, ) -> Result { - match self.check_website_inner(uri, request_chain).await { + let quirks = Quirks::default(); + let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); + if let Some(c) = credentials { + request_chain.push(Box::new(c.clone())); + } + + match self.check_website_inner(uri, &mut request_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, request_chain) + .check_website_inner(&uri.to_https()?, &mut request_chain) .await .is_success() { From da39a4f041632788e66922ab60387097b51a914d Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 13 Mar 2024 14:58:50 +0100 Subject: [PATCH 08/24] Update RequestChain & add chain to client --- lychee-lib/src/chain/mod.rs | 14 ++++- lychee-lib/src/client.rs | 55 +++++++++++++++++-- lychee-lib/src/quirks/mod.rs | 6 +- .../src/types/basic_auth/credentials.rs | 10 ++-- 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index bbfd0d29aa..4585a3383f 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,21 +1,33 @@ use core::fmt::Debug; +use crate::Status; + #[derive(Debug, PartialEq)] pub(crate) enum ChainResult { Chained(T), EarlyExit(R), } -pub(crate) type RequestChain = Chain; +pub(crate) type RequestChain = Chain; #[derive(Debug)] pub struct Chain(Vec + Send>>); +impl Default for Chain { + fn default() -> Self { + Self(vec![]) + } +} + impl Chain { pub(crate) fn new(values: Vec + Send>>) -> Self { Self(values) } + pub(crate) fn append(&mut self, other: &mut Chain) { + self.0.append(&mut other.0); + } + pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { use ChainResult::*; for e in self.0.iter_mut() { diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index f561e799dc..bd21f2ddfa 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -13,7 +13,12 @@ clippy::default_trait_access, clippy::used_underscore_binding )] -use std::{collections::HashSet, path::Path, sync::Arc, time::Duration}; +use std::{ + collections::HashSet, + path::Path, + sync::{Arc, Mutex}, + time::Duration, +}; #[cfg(all(feature = "email-check", feature = "native-tls"))] use check_if_email_exists::{check_email, CheckEmailInput, Reachable}; @@ -30,7 +35,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, ChainResult::*, RequestChain}, + chain::{Chain, ChainResult::{Chained, EarlyExit}, RequestChain}, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, @@ -274,6 +279,8 @@ pub struct ClientBuilder { /// Enable the checking of fragments in links. include_fragments: bool, + + request_chain: Arc>, } impl Default for ClientBuilder { @@ -386,6 +393,7 @@ impl ClientBuilder { require_https: self.require_https, include_fragments: self.include_fragments, fragment_checker: FragmentChecker::new(), + request_chain: self.request_chain, }) } } @@ -435,6 +443,8 @@ pub struct Client { /// Caches Fragments fragment_checker: FragmentChecker, + + request_chain: Arc>, } impl Client { @@ -520,6 +530,8 @@ impl Client { ) -> Result { let quirks = Quirks::default(); let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); + request_chain.append(&mut self.request_chain.lock().unwrap()); + if let Some(c) = credentials { request_chain.push(Box::new(c.clone())); } @@ -656,7 +668,7 @@ impl Client { let result = request_chain.traverse(request); match result { - EarlyExit(r) => r.1.status, + EarlyExit(status) => status, Chained(r) => match self.reqwest_client.execute(r).await { Ok(ref response) => Status::new(response, self.accepted.clone()), Err(e) => e.into(), @@ -754,6 +766,7 @@ where mod tests { use std::{ fs::File, + sync::{Arc, Mutex}, time::{Duration, Instant}, }; @@ -763,7 +776,12 @@ mod tests { use wiremock::matchers::path; use super::ClientBuilder; - use crate::{mock_server, test_utils::get_mock_client_response, Request, Uri}; + use crate::{ + chain::{ChainResult, Chainable, RequestChain}, + mock_server, + test_utils::get_mock_client_response, + Request, Status, Uri, + }; #[tokio::test] async fn test_nonexistent() { @@ -1092,4 +1110,33 @@ mod tests { assert!(res.status().is_unsupported()); } } + + #[tokio::test] + async fn test_chain() { + use reqwest::Request; + use ChainResult::EarlyExit; + + #[derive(Debug)] + struct ExampleHandler(); + + impl Chainable for ExampleHandler { + fn handle(&mut self, _: Request) -> ChainResult { + EarlyExit(Status::Excluded) + } + } + + let chain = Arc::new(Mutex::new(RequestChain::new(vec![Box::new( + ExampleHandler {}, + )]))); + + let client = ClientBuilder::builder() + .request_chain(chain) + .build() + .client() + .unwrap(); + + let result = client.check("http://example.com"); + let res = result.await.unwrap(); + assert_eq!(res.status(), &Status::Excluded); + } } diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index e93d392cb7..fcfb5bfb25 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -1,6 +1,6 @@ use crate::{ chain::{ChainResult, Chainable}, - Response, + Status, }; use header::HeaderValue; use http::header; @@ -90,8 +90,8 @@ impl Quirks { } } -impl Chainable for Quirks { - fn handle(&mut self, input: Request) -> ChainResult { +impl Chainable for Quirks { + fn handle(&mut self, input: Request) -> ChainResult { ChainResult::Chained(self.apply(input)) } } diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index bfbc519595..ed10511344 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -7,10 +7,8 @@ use reqwest::Request; use serde::Deserialize; use thiserror::Error; -use crate::{ - chain::{ChainResult, Chainable}, - Response, -}; +use crate::chain::{ChainResult, Chainable}; +use crate::Status; #[derive(Copy, Clone, Debug, Error, PartialEq)] pub enum BasicAuthCredentialsParseError { @@ -75,8 +73,8 @@ impl BasicAuthCredentials { } } -impl Chainable for BasicAuthCredentials { - fn handle(&mut self, mut request: Request) -> ChainResult { +impl Chainable for BasicAuthCredentials { + fn handle(&mut self, mut request: Request) -> ChainResult { request .headers_mut() .append(AUTHORIZATION, self.to_authorization().0.encode()); From b006a9f2ea367f31fffd69b543b99456c15f5227 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 14 Mar 2024 14:04:43 +0100 Subject: [PATCH 09/24] Add doc comment --- lychee-lib/src/client.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index bd21f2ddfa..1642f20ef1 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -35,7 +35,11 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, ChainResult::{Chained, EarlyExit}, RequestChain}, + chain::{ + Chain, + ChainResult::{Chained, EarlyExit}, + RequestChain, + }, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, @@ -280,6 +284,10 @@ pub struct ClientBuilder { /// Enable the checking of fragments in links. include_fragments: bool, + /// Requests run through this chain where each item in the chain + /// can modify the request. A chained item can also decide to exit + /// early and return a status, so that subsequent chain items are + /// skipped and no HTTP request is ever made. request_chain: Arc>, } @@ -1114,14 +1122,13 @@ mod tests { #[tokio::test] async fn test_chain() { use reqwest::Request; - use ChainResult::EarlyExit; #[derive(Debug)] struct ExampleHandler(); - impl Chainable for ExampleHandler { - fn handle(&mut self, _: Request) -> ChainResult { - EarlyExit(Status::Excluded) + impl Chainable for ExampleHandler { + fn handle(&mut self, _: Request) -> ChainResult { + ChainResult::EarlyExit(Status::Excluded) } } From 84dfd00583c6e02381faa753980809b632ccd8db Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 14 Mar 2024 14:34:19 +0100 Subject: [PATCH 10/24] Small improvements --- lychee-lib/src/client.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 1642f20ef1..4f61016631 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -838,15 +838,9 @@ mod tests { #[tokio::test] async fn test_basic_auth() { - let mut r = Request::new( - "https://authenticationtest.com/HTTPAuth/" - .try_into() - .unwrap(), - crate::InputSource::Stdin, - None, - None, - None, - ); + let mut r: Request = "https://authenticationtest.com/HTTPAuth/" + .try_into() + .unwrap(); let res = get_mock_client_response(r.clone()).await; assert_eq!(res.status().code(), Some(401.try_into().unwrap())); @@ -856,7 +850,7 @@ mod tests { password: "pass".into(), }); - let res = get_mock_client_response(r.clone()).await; + let res = get_mock_client_response(r).await; assert!(res.status().is_success()); } From 9b4224014682cec7e5a5c6f614da5f836831897f Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 15 Mar 2024 12:40:12 +0100 Subject: [PATCH 11/24] Apply suggestions --- lychee-lib/src/chain/mod.rs | 32 +++++++++---------- lychee-lib/src/client.rs | 10 +++--- lychee-lib/src/quirks/mod.rs | 4 +-- .../src/types/basic_auth/credentials.rs | 4 +-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 4585a3383f..6bdee55a48 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -4,8 +4,8 @@ use crate::Status; #[derive(Debug, PartialEq)] pub(crate) enum ChainResult { - Chained(T), - EarlyExit(R), + Next(T), + Done(R), } pub(crate) type RequestChain = Chain; @@ -31,15 +31,15 @@ impl Chain { pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { use ChainResult::*; for e in self.0.iter_mut() { - match e.handle(input) { - Chained(r) => input = r, - EarlyExit(r) => { - return EarlyExit(r); + match e.chain(input) { + Next(r) => input = r, + Done(r) => { + return Done(r); } } } - Chained(input) + Next(input) } pub(crate) fn push(&mut self, value: Box + Send>) { @@ -48,25 +48,25 @@ impl Chain { } pub(crate) trait Chainable: Debug { - fn handle(&mut self, input: T) -> ChainResult; + fn chain(&mut self, input: T) -> ChainResult; } mod test { use super::{ChainResult, ChainResult::*, Chainable}; #[derive(Debug)] - struct Add(i64); + struct Add(usize); #[derive(Debug, PartialEq, Eq)] - struct Result(i64); + struct Result(usize); impl Chainable for Add { - fn handle(&mut self, req: Result) -> ChainResult { + fn chain(&mut self, req: Result) -> ChainResult { let added = req.0 + self.0; if added > 100 { - EarlyExit(Result(req.0)) + Done(Result(req.0)) } else { - Chained(Result(added)) + Next(Result(added)) } } } @@ -75,9 +75,9 @@ mod test { fn simple_chain() { use super::Chain; let mut chain: Chain = - Chain::new(vec![Box::new(Add(10)), Box::new(Add(-3))]); + Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); let result = chain.traverse(Result(0)); - assert_eq!(result, Chained(Result(7))); + assert_eq!(result, Next(Result(10))); } #[test] @@ -86,6 +86,6 @@ mod test { let mut chain: Chain = Chain::new(vec![Box::new(Add(80)), Box::new(Add(30)), Box::new(Add(1))]); let result = chain.traverse(Result(0)); - assert_eq!(result, EarlyExit(Result(80))); + assert_eq!(result, Done(Result(80))); } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 4f61016631..88c1839ccc 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -37,7 +37,7 @@ use typed_builder::TypedBuilder; use crate::{ chain::{ Chain, - ChainResult::{Chained, EarlyExit}, + ChainResult::{Next, Done}, RequestChain, }, filter::{Excludes, Filter, Includes}, @@ -676,8 +676,8 @@ impl Client { let result = request_chain.traverse(request); match result { - EarlyExit(status) => status, - Chained(r) => match self.reqwest_client.execute(r).await { + Done(status) => status, + Next(r) => match self.reqwest_client.execute(r).await { Ok(ref response) => Status::new(response, self.accepted.clone()), Err(e) => e.into(), }, @@ -1121,8 +1121,8 @@ mod tests { struct ExampleHandler(); impl Chainable for ExampleHandler { - fn handle(&mut self, _: Request) -> ChainResult { - ChainResult::EarlyExit(Status::Excluded) + fn chain(&mut self, _: Request) -> ChainResult { + ChainResult::Done(Status::Excluded) } } diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index fcfb5bfb25..0b177d7554 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -91,8 +91,8 @@ impl Quirks { } impl Chainable for Quirks { - fn handle(&mut self, input: Request) -> ChainResult { - ChainResult::Chained(self.apply(input)) + fn chain(&mut self, input: Request) -> ChainResult { + ChainResult::Next(self.apply(input)) } } diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index ed10511344..87c17dacf6 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -74,10 +74,10 @@ impl BasicAuthCredentials { } impl Chainable for BasicAuthCredentials { - fn handle(&mut self, mut request: Request) -> ChainResult { + fn chain(&mut self, mut request: Request) -> ChainResult { request .headers_mut() .append(AUTHORIZATION, self.to_authorization().0.encode()); - ChainResult::Chained(request) + ChainResult::Next(request) } } From 3fb34e7c6623fd96630015db851f36534259a127 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 15 Mar 2024 12:46:36 +0100 Subject: [PATCH 12/24] Apply clippy suggestions --- lychee-lib/src/chain/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 6bdee55a48..7034b1d4aa 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -29,8 +29,8 @@ impl Chain { } pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { - use ChainResult::*; - for e in self.0.iter_mut() { + use ChainResult::{Done, Next}; + for e in &mut self.0 { match e.chain(input) { Next(r) => input = r, Done(r) => { @@ -52,7 +52,7 @@ pub(crate) trait Chainable: Debug { } mod test { - use super::{ChainResult, ChainResult::*, Chainable}; + use super::{ChainResult, ChainResult::{Done, Next}, Chainable}; #[derive(Debug)] struct Add(usize); From 9f381e3b55cb8ce63bfc2259f38a7d70bd8e4234 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 15 Mar 2024 13:35:23 +0100 Subject: [PATCH 13/24] Move Arc and Mutex inside of Chain struct --- lychee-lib/src/chain/mod.rs | 34 ++++++++++++++++++++++------------ lychee-lib/src/client.rs | 13 ++++++------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 7034b1d4aa..04225be3d8 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,6 +1,6 @@ -use core::fmt::Debug; - use crate::Status; +use core::fmt::Debug; +use std::sync::{Arc, Mutex}; #[derive(Debug, PartialEq)] pub(crate) enum ChainResult { @@ -11,26 +11,33 @@ pub(crate) enum ChainResult { pub(crate) type RequestChain = Chain; #[derive(Debug)] -pub struct Chain(Vec + Send>>); +pub struct Chain(Arc + Send>>>>); impl Default for Chain { fn default() -> Self { - Self(vec![]) + Self(Arc::new(Mutex::new(vec![]))) + } +} + +impl Clone for Chain { + fn clone(&self) -> Self { + Self(self.0.clone()) } } + impl Chain { pub(crate) fn new(values: Vec + Send>>) -> Self { - Self(values) + Self(Arc::new(Mutex::new(values))) } - pub(crate) fn append(&mut self, other: &mut Chain) { - self.0.append(&mut other.0); + pub(crate) fn append(&mut self, other: &Chain) { + self.0.lock().unwrap().append(&mut other.0.lock().unwrap()); } pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; - for e in &mut self.0 { + for e in self.0.lock().unwrap().iter_mut() { match e.chain(input) { Next(r) => input = r, Done(r) => { @@ -43,7 +50,7 @@ impl Chain { } pub(crate) fn push(&mut self, value: Box + Send>) { - self.0.push(value); + self.0.lock().unwrap().push(value); } } @@ -52,7 +59,11 @@ pub(crate) trait Chainable: Debug { } mod test { - use super::{ChainResult, ChainResult::{Done, Next}, Chainable}; + use super::{ + ChainResult, + ChainResult::{Done, Next}, + Chainable, + }; #[derive(Debug)] struct Add(usize); @@ -74,8 +85,7 @@ mod test { #[test] fn simple_chain() { use super::Chain; - let mut chain: Chain = - Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); + let mut chain: Chain = Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); let result = chain.traverse(Result(0)); assert_eq!(result, Next(Result(10))); } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 88c1839ccc..e3d6376461 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -16,7 +16,7 @@ use std::{ collections::HashSet, path::Path, - sync::{Arc, Mutex}, + sync::Arc, time::Duration, }; @@ -288,7 +288,7 @@ pub struct ClientBuilder { /// can modify the request. A chained item can also decide to exit /// early and return a status, so that subsequent chain items are /// skipped and no HTTP request is ever made. - request_chain: Arc>, + request_chain: RequestChain, } impl Default for ClientBuilder { @@ -452,7 +452,7 @@ pub struct Client { /// Caches Fragments fragment_checker: FragmentChecker, - request_chain: Arc>, + request_chain: RequestChain, } impl Client { @@ -538,7 +538,7 @@ impl Client { ) -> Result { let quirks = Quirks::default(); let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); - request_chain.append(&mut self.request_chain.lock().unwrap()); + request_chain.append(&self.request_chain); if let Some(c) = credentials { request_chain.push(Box::new(c.clone())); @@ -774,7 +774,6 @@ where mod tests { use std::{ fs::File, - sync::{Arc, Mutex}, time::{Duration, Instant}, }; @@ -1126,9 +1125,9 @@ mod tests { } } - let chain = Arc::new(Mutex::new(RequestChain::new(vec![Box::new( + let chain = RequestChain::new(vec![Box::new( ExampleHandler {}, - )]))); + )]); let client = ClientBuilder::builder() .request_chain(chain) From 1c6c39f0b9187bd36c99434f28236a88efab7219 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 20 Mar 2024 11:50:26 +0100 Subject: [PATCH 14/24] Extract checking functionality & make chain async --- lychee-lib/src/chain/mod.rs | 5 +- lychee-lib/src/checker.rs | 65 +++++++++++++++ lychee-lib/src/client.rs | 80 +++++-------------- lychee-lib/src/lib.rs | 3 +- lychee-lib/src/quirks/mod.rs | 2 +- .../src/types/basic_auth/credentials.rs | 2 +- 6 files changed, 93 insertions(+), 64 deletions(-) create mode 100644 lychee-lib/src/checker.rs diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 04225be3d8..3f0a8f0c1f 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -25,7 +25,6 @@ impl Clone for Chain { } } - impl Chain { pub(crate) fn new(values: Vec + Send>>) -> Self { Self(Arc::new(Mutex::new(values))) @@ -55,7 +54,7 @@ impl Chain { } pub(crate) trait Chainable: Debug { - fn chain(&mut self, input: T) -> ChainResult; + async fn chain(&mut self, input: T) -> ChainResult; } mod test { @@ -72,7 +71,7 @@ mod test { struct Result(usize); impl Chainable for Add { - fn chain(&mut self, req: Result) -> ChainResult { + async fn chain(&mut self, req: Result) -> ChainResult { let added = req.0 + self.0; if added > 100 { Done(Result(req.0)) diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs new file mode 100644 index 0000000000..598884fefb --- /dev/null +++ b/lychee-lib/src/checker.rs @@ -0,0 +1,65 @@ +use crate::{ + chain::{Chain, ChainResult, Chainable}, + retry::RetryExt, + Status, +}; +use http::StatusCode; +use reqwest::Request; +use std::{collections::HashSet, time::Duration}; + +#[derive(Debug)] +pub(crate) struct Checker { + retry_wait_time: Duration, + max_retries: u64, + reqwest_client: reqwest::Client, + accepted: Option>, +} + +impl Checker { + pub(crate) fn new( + retry_wait_time: Duration, + max_retries: u64, + reqwest_client: reqwest::Client, + accepted: Option>, + ) -> Self { + Self { + retry_wait_time, + max_retries, + reqwest_client, + accepted, + } + } + + /// Retry requests up to `max_retries` times + /// with an exponential backoff. + pub(crate) async fn retry_request(&self, request: reqwest::Request) -> Status { + let mut retries: u64 = 0; + let mut wait_time = self.retry_wait_time; + + let mut status = self.check_default(request.try_clone().unwrap()).await; // TODO: try_clone + while retries < self.max_retries { + if status.is_success() || !status.should_retry() { + return status; + } + retries += 1; + tokio::time::sleep(wait_time).await; + wait_time = wait_time.saturating_mul(2); + status = self.check_default(request.try_clone().unwrap()).await; // TODO: try_clone + } + status + } + + /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). + async fn check_default(&self, request: reqwest::Request) -> Status { + match self.reqwest_client.execute(request).await { + Ok(ref response) => Status::new(response, self.accepted.clone()), + Err(e) => e.into(), + } + } +} + +impl Chainable for Checker { + async fn chain(&mut self, input: Request) -> ChainResult { + ChainResult::Done(self.retry_request(input).await) + } +} diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index e3d6376461..1707171468 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -13,12 +13,7 @@ clippy::default_trait_access, clippy::used_underscore_binding )] -use std::{ - collections::HashSet, - path::Path, - sync::Arc, - time::Duration, -}; +use std::{collections::HashSet, path::Path, sync::Arc, time::Duration}; #[cfg(all(feature = "email-check", feature = "native-tls"))] use check_if_email_exists::{check_email, CheckEmailInput, Reachable}; @@ -35,11 +30,8 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{ - Chain, - ChainResult::{Next, Done}, - RequestChain, - }, + chain::{Chain, RequestChain}, + checker::Checker, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, @@ -587,7 +579,23 @@ impl Client { return Status::Unsupported(ErrorKind::InvalidURI(uri.clone())); } - let status = self.retry_request(uri, request_chain).await; + let request = self + .reqwest_client + .request(self.method.clone(), uri.as_str()) + .build(); + + let request = match request { + Ok(r) => r, + Err(e) => return e.into(), + }; + + let checker = Checker::new( + self.retry_wait_time, + self.max_retries, + self.reqwest_client.clone(), + self.accepted.clone(), + ); + let status = checker.retry_request(request).await; if status.is_success() { return status; } @@ -607,25 +615,6 @@ impl Client { status } - /// Retry requests up to `max_retries` times - /// with an exponential backoff. - async fn retry_request(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { - let mut retries: u64 = 0; - let mut wait_time = self.retry_wait_time; - - let mut status = self.check_default(uri, request_chain).await; - while retries < self.max_retries { - if status.is_success() || !status.should_retry() { - return status; - } - retries += 1; - tokio::time::sleep(wait_time).await; - wait_time = wait_time.saturating_mul(2); - status = self.check_default(uri, request_chain).await; - } - status - } - /// Check a `uri` hosted on `GitHub` via the GitHub API. /// /// # Caveats @@ -661,29 +650,6 @@ impl Client { Status::Ok(StatusCode::OK) } - /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). - async fn check_default(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { - let request = self - .reqwest_client - .request(self.method.clone(), uri.as_str()) - .build(); - - let request = match request { - Ok(r) => r, - Err(e) => return e.into(), - }; - - let result = request_chain.traverse(request); - - match result { - Done(status) => status, - Next(r) => match self.reqwest_client.execute(r).await { - Ok(ref response) => Status::new(response, self.accepted.clone()), - Err(e) => e.into(), - }, - } - } - /// Check a `file` URI. pub async fn check_file(&self, uri: &Uri) -> Status { let Ok(path) = uri.url.to_file_path() else { @@ -1120,14 +1086,12 @@ mod tests { struct ExampleHandler(); impl Chainable for ExampleHandler { - fn chain(&mut self, _: Request) -> ChainResult { + async fn chain(&mut self, _: Request) -> ChainResult { ChainResult::Done(Status::Excluded) } } - let chain = RequestChain::new(vec![Box::new( - ExampleHandler {}, - )]); + let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]); let client = ClientBuilder::builder() .request_chain(chain) diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index dd413868c4..a425063676 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -51,10 +51,11 @@ doc_comment::doctest!("../../README.md"); mod basic_auth; +mod chain; +mod checker; mod client; /// A pool of clients, to handle concurrent checks pub mod collector; -mod chain; mod quirks; mod retry; mod types; diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index 0b177d7554..bf0e758cb0 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -91,7 +91,7 @@ impl Quirks { } impl Chainable for Quirks { - fn chain(&mut self, input: Request) -> ChainResult { + async fn chain(&mut self, input: Request) -> ChainResult { ChainResult::Next(self.apply(input)) } } diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 87c17dacf6..9c2ab3edca 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -74,7 +74,7 @@ impl BasicAuthCredentials { } impl Chainable for BasicAuthCredentials { - fn chain(&mut self, mut request: Request) -> ChainResult { + async fn chain(&mut self, mut request: Request) -> ChainResult { request .headers_mut() .append(AUTHORIZATION, self.to_authorization().0.encode()); From 7917d5d66d00ad4e316b0e94a0fcd2c2676fdf60 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Wed, 20 Mar 2024 17:46:17 +0100 Subject: [PATCH 15/24] Use `async_trait` to fix issues with `Chain` type inference --- Cargo.lock | 5 +++-- lychee-lib/Cargo.toml | 1 + lychee-lib/src/chain/mod.rs | 20 +++++++++++-------- lychee-lib/src/checker.rs | 4 +++- lychee-lib/src/client.rs | 3 ++- lychee-lib/src/quirks/mod.rs | 2 ++ .../src/types/basic_auth/credentials.rs | 2 ++ 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4b3a458a06..1ab49f0e15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -435,9 +435,9 @@ checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" dependencies = [ "proc-macro2", "quote", @@ -2394,6 +2394,7 @@ name = "lychee-lib" version = "0.14.3" dependencies = [ "async-stream", + "async-trait", "cached", "check-if-email-exists", "doc-comment", diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 917266c5d7..0007424e75 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -13,6 +13,7 @@ version.workspace = true [dependencies] async-stream = "0.3.5" +async-trait = "0.1.78" cached = "0.46.1" check-if-email-exists = { version = "0.9.1", optional = true } email_address = "0.2.4" diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 3f0a8f0c1f..573aeb8c11 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,4 +1,5 @@ use crate::Status; +use async_trait::async_trait; use core::fmt::Debug; use std::sync::{Arc, Mutex}; @@ -34,10 +35,10 @@ impl Chain { self.0.lock().unwrap().append(&mut other.0.lock().unwrap()); } - pub(crate) fn traverse(&mut self, mut input: T) -> ChainResult { + pub(crate) async fn traverse(&mut self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; for e in self.0.lock().unwrap().iter_mut() { - match e.chain(input) { + match e.chain(input).await { Next(r) => input = r, Done(r) => { return Done(r); @@ -53,6 +54,7 @@ impl Chain { } } +#[async_trait] pub(crate) trait Chainable: Debug { async fn chain(&mut self, input: T) -> ChainResult; } @@ -63,6 +65,7 @@ mod test { ChainResult::{Done, Next}, Chainable, }; + use async_trait::async_trait; #[derive(Debug)] struct Add(usize); @@ -70,6 +73,7 @@ mod test { #[derive(Debug, PartialEq, Eq)] struct Result(usize); + #[async_trait] impl Chainable for Add { async fn chain(&mut self, req: Result) -> ChainResult { let added = req.0 + self.0; @@ -81,20 +85,20 @@ mod test { } } - #[test] - fn simple_chain() { + #[tokio::test] + async fn simple_chain() { use super::Chain; let mut chain: Chain = Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); - let result = chain.traverse(Result(0)); + let result = chain.traverse(Result(0)).await; assert_eq!(result, Next(Result(10))); } - #[test] - fn early_exit_chain() { + #[tokio::test] + async fn early_exit_chain() { use super::Chain; let mut chain: Chain = Chain::new(vec![Box::new(Add(80)), Box::new(Add(30)), Box::new(Add(1))]); - let result = chain.traverse(Result(0)); + let result = chain.traverse(Result(0)).await; assert_eq!(result, Done(Result(80))); } } diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index 598884fefb..c6b76d3cec 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -1,8 +1,9 @@ use crate::{ - chain::{Chain, ChainResult, Chainable}, + chain::{ChainResult, Chainable}, retry::RetryExt, Status, }; +use async_trait::async_trait; use http::StatusCode; use reqwest::Request; use std::{collections::HashSet, time::Duration}; @@ -58,6 +59,7 @@ impl Checker { } } +#[async_trait] impl Chainable for Checker { async fn chain(&mut self, input: Request) -> ChainResult { ChainResult::Done(self.retry_request(input).await) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 1707171468..49499a3a37 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -35,7 +35,6 @@ use crate::{ filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, - retry::RetryExt, types::uri::github::GithubUri, utils::fragment_checker::FragmentChecker, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri, @@ -743,6 +742,7 @@ mod tests { time::{Duration, Instant}, }; + use async_trait::async_trait; use http::{header::HeaderMap, StatusCode}; use reqwest::header; use tempfile::tempdir; @@ -1085,6 +1085,7 @@ mod tests { #[derive(Debug)] struct ExampleHandler(); + #[async_trait] impl Chainable for ExampleHandler { async fn chain(&mut self, _: Request) -> ChainResult { ChainResult::Done(Status::Excluded) diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index bf0e758cb0..333b921310 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -2,6 +2,7 @@ use crate::{ chain::{ChainResult, Chainable}, Status, }; +use async_trait::async_trait; use header::HeaderValue; use http::header; use once_cell::sync::Lazy; @@ -90,6 +91,7 @@ impl Quirks { } } +#[async_trait] impl Chainable for Quirks { async fn chain(&mut self, input: Request) -> ChainResult { ChainResult::Next(self.apply(input)) diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 9c2ab3edca..2bdf19e7b5 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -1,3 +1,4 @@ +use async_trait::async_trait; use std::str::FromStr; use headers::authorization::Credentials; @@ -73,6 +74,7 @@ impl BasicAuthCredentials { } } +#[async_trait] impl Chainable for BasicAuthCredentials { async fn chain(&mut self, mut request: Request) -> ChainResult { request From 31f4494c47d61740aeb7fc99c5257068cb794c12 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 22 Mar 2024 12:39:20 +0100 Subject: [PATCH 16/24] Make checker part of the request chain --- lychee-lib/src/chain/mod.rs | 20 +++++++++---------- lychee-lib/src/checker.rs | 6 +++--- lychee-lib/src/client.rs | 40 ++++++++++++++++++------------------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 573aeb8c11..d303a92784 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,7 +1,8 @@ use crate::Status; use async_trait::async_trait; use core::fmt::Debug; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; +use tokio::sync::Mutex; #[derive(Debug, PartialEq)] pub(crate) enum ChainResult { @@ -11,8 +12,10 @@ pub(crate) enum ChainResult { pub(crate) type RequestChain = Chain; +pub(crate) type InnerChain = Vec + Send>>; + #[derive(Debug)] -pub struct Chain(Arc + Send>>>>); +pub struct Chain(Arc>>); impl Default for Chain { fn default() -> Self { @@ -27,17 +30,13 @@ impl Clone for Chain { } impl Chain { - pub(crate) fn new(values: Vec + Send>>) -> Self { + pub(crate) fn new(values: InnerChain) -> Self { Self(Arc::new(Mutex::new(values))) } - pub(crate) fn append(&mut self, other: &Chain) { - self.0.lock().unwrap().append(&mut other.0.lock().unwrap()); - } - pub(crate) async fn traverse(&mut self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; - for e in self.0.lock().unwrap().iter_mut() { + for e in self.0.lock().await.iter_mut() { match e.chain(input).await { Next(r) => input = r, Done(r) => { @@ -49,8 +48,9 @@ impl Chain { Next(input) } - pub(crate) fn push(&mut self, value: Box + Send>) { - self.0.lock().unwrap().push(value); + // TODO: probably remove + pub(crate) fn into_inner(self) -> InnerChain { + Arc::try_unwrap(self.0).expect("Arc still has multiple owners").into_inner() } } diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index c6b76d3cec..c920550d74 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -17,7 +17,7 @@ pub(crate) struct Checker { } impl Checker { - pub(crate) fn new( + pub(crate) const fn new( retry_wait_time: Duration, max_retries: u64, reqwest_client: reqwest::Client, @@ -33,7 +33,7 @@ impl Checker { /// Retry requests up to `max_retries` times /// with an exponential backoff. - pub(crate) async fn retry_request(&self, request: reqwest::Request) -> Status { + pub(crate) async fn retry_request(&self, request: Request) -> Status { let mut retries: u64 = 0; let mut wait_time = self.retry_wait_time; @@ -51,7 +51,7 @@ impl Checker { } /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). - async fn check_default(&self, request: reqwest::Request) -> Status { + async fn check_default(&self, request: Request) -> Status { match self.reqwest_client.execute(request).await { Ok(ref response) => Status::new(response, self.accepted.clone()), Err(e) => e.into(), diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 49499a3a37..cadd7e3463 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,14 +30,13 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, RequestChain}, + chain::{Chain, ChainResult, RequestChain}, checker::Checker, filter::{Excludes, Filter, Includes}, quirks::Quirks, remap::Remaps, types::uri::github::GithubUri, - utils::fragment_checker::FragmentChecker, - BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri, + utils::fragment_checker::FragmentChecker, ErrorKind, Request, Response, Result, Status, Uri, }; #[cfg(all(feature = "email-check", feature = "native-tls"))] @@ -486,10 +485,20 @@ impl Client { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } + let request_chain: RequestChain = Chain::new(vec![ + Box::::default(), + Box::new(Checker::new( + self.retry_wait_time, + self.max_retries, + self.reqwest_client.clone(), + self.accepted.clone(), + )), + ]); + let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, credentials).await?, + _ => self.check_website(uri, request_chain).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -525,16 +534,8 @@ impl Client { pub async fn check_website( &self, uri: &Uri, - credentials: &Option, + mut request_chain: RequestChain, ) -> Result { - let quirks = Quirks::default(); - let mut request_chain: RequestChain = Chain::new(vec![Box::new(quirks)]); - request_chain.append(&self.request_chain); - - if let Some(c) = credentials { - request_chain.push(Box::new(c.clone())); - } - match self.check_website_inner(uri, &mut request_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self @@ -588,13 +589,12 @@ impl Client { Err(e) => return e.into(), }; - let checker = Checker::new( - self.retry_wait_time, - self.max_retries, - self.reqwest_client.clone(), - self.accepted.clone(), - ); - let status = checker.retry_request(request).await; + let status = match request_chain.traverse(request).await { + // consider as excluded if no chain element has converted it to a done + ChainResult::Next(_) => Status::Excluded, + ChainResult::Done(status) => status, + }; + if status.is_success() { return status; } From ea66ab06ea746129efef9dcea098ea38b2502605 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 3 Apr 2024 13:15:41 +0200 Subject: [PATCH 17/24] Add credentials to chain --- lychee-lib/src/checker.rs | 2 +- lychee-lib/src/client.rs | 7 +++++-- lychee-lib/src/types/basic_auth/credentials.rs | 11 +++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index c920550d74..9a4d7b921e 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -8,7 +8,7 @@ use http::StatusCode; use reqwest::Request; use std::{collections::HashSet, time::Duration}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct Checker { retry_wait_time: Duration, max_retries: u64, diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index cadd7e3463..62e32a73ff 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -36,7 +36,8 @@ use crate::{ quirks::Quirks, remap::Remaps, types::uri::github::GithubUri, - utils::fragment_checker::FragmentChecker, ErrorKind, Request, Response, Result, Status, Uri, + utils::fragment_checker::FragmentChecker, + ErrorKind, Request, Response, Result, Status, Uri, }; #[cfg(all(feature = "email-check", feature = "native-tls"))] @@ -465,7 +466,7 @@ impl Client { { let Request { ref mut uri, - ref credentials, + credentials, source, .. } = request.try_into()?; @@ -486,7 +487,9 @@ impl Client { } let request_chain: RequestChain = Chain::new(vec![ + // TODO: insert self.request_chain Box::::default(), + Box::new(credentials), Box::new(Checker::new( self.retry_wait_time, self.max_retries, diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 2bdf19e7b5..906357f671 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -75,11 +75,14 @@ impl BasicAuthCredentials { } #[async_trait] -impl Chainable for BasicAuthCredentials { +impl Chainable for Option { async fn chain(&mut self, mut request: Request) -> ChainResult { - request - .headers_mut() - .append(AUTHORIZATION, self.to_authorization().0.encode()); + if let Some(credentials) = self { + request + .headers_mut() + .append(AUTHORIZATION, credentials.to_authorization().0.encode()); + } + ChainResult::Next(request) } } From ca55953d7b4f01acd574e7fd6619e568083f3c19 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 5 Apr 2024 08:37:09 +0200 Subject: [PATCH 18/24] Create ClientRequestChain helper structure to combine multiple chains --- lychee-lib/src/chain/mod.rs | 37 +++++++++++++++++++++++++++++-------- lychee-lib/src/client.rs | 30 +++++++++++------------------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index d303a92784..738b90800c 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -34,7 +34,7 @@ impl Chain { Self(Arc::new(Mutex::new(values))) } - pub(crate) async fn traverse(&mut self, mut input: T) -> ChainResult { + pub(crate) async fn traverse(&self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; for e in self.0.lock().await.iter_mut() { match e.chain(input).await { @@ -47,11 +47,6 @@ impl Chain { Next(input) } - - // TODO: probably remove - pub(crate) fn into_inner(self) -> InnerChain { - Arc::try_unwrap(self.0).expect("Arc still has multiple owners").into_inner() - } } #[async_trait] @@ -59,6 +54,32 @@ pub(crate) trait Chainable: Debug { async fn chain(&mut self, input: T) -> ChainResult; } +#[derive(Debug)] +pub(crate) struct ClientRequestChain<'a> { + chains: Vec<&'a RequestChain>, +} + +impl<'a> ClientRequestChain<'a> { + pub(crate) fn new(chains: Vec<&'a RequestChain>) -> Self { + Self { chains } + } + + pub(crate) async fn traverse(&self, mut input: reqwest::Request) -> Status { + use ChainResult::{Done, Next}; + for e in &self.chains { + match e.traverse(input).await { + Next(r) => input = r, + Done(r) => { + return r; + } + } + } + + // consider as excluded if no chain element has converted it to a done + Status::Excluded + } +} + mod test { use super::{ ChainResult, @@ -88,7 +109,7 @@ mod test { #[tokio::test] async fn simple_chain() { use super::Chain; - let mut chain: Chain = Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); + let chain: Chain = Chain::new(vec![Box::new(Add(7)), Box::new(Add(3))]); let result = chain.traverse(Result(0)).await; assert_eq!(result, Next(Result(10))); } @@ -96,7 +117,7 @@ mod test { #[tokio::test] async fn early_exit_chain() { use super::Chain; - let mut chain: Chain = + let chain: Chain = Chain::new(vec![Box::new(Add(80)), Box::new(Add(30)), Box::new(Add(1))]); let result = chain.traverse(Result(0)).await; assert_eq!(result, Done(Result(80))); diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 62e32a73ff..7b52aecaaa 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,7 +30,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, ChainResult, RequestChain}, + chain::{Chain, ClientRequestChain, RequestChain}, checker::Checker, filter::{Excludes, Filter, Includes}, quirks::Quirks, @@ -279,7 +279,7 @@ pub struct ClientBuilder { /// can modify the request. A chained item can also decide to exit /// early and return a status, so that subsequent chain items are /// skipped and no HTTP request is ever made. - request_chain: RequestChain, + plugin_request_chain: RequestChain, } impl Default for ClientBuilder { @@ -392,7 +392,7 @@ impl ClientBuilder { require_https: self.require_https, include_fragments: self.include_fragments, fragment_checker: FragmentChecker::new(), - request_chain: self.request_chain, + plugin_request_chain: self.plugin_request_chain, }) } } @@ -443,7 +443,7 @@ pub struct Client { /// Caches Fragments fragment_checker: FragmentChecker, - request_chain: RequestChain, + plugin_request_chain: RequestChain, } impl Client { @@ -487,7 +487,6 @@ impl Client { } let request_chain: RequestChain = Chain::new(vec![ - // TODO: insert self.request_chain Box::::default(), Box::new(credentials), Box::new(Checker::new( @@ -534,15 +533,11 @@ impl Client { /// - The request failed. /// - The response status code is not accepted. /// - The URI cannot be converted to HTTPS. - pub async fn check_website( - &self, - uri: &Uri, - mut request_chain: RequestChain, - ) -> Result { - match self.check_website_inner(uri, &mut request_chain).await { + pub async fn check_website(&self, uri: &Uri, request_chain: RequestChain) -> Result { + match self.check_website_inner(uri, &request_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, &mut request_chain) + .check_website_inner(&uri.to_https()?, &request_chain) .await .is_success() { @@ -567,7 +562,7 @@ impl Client { /// - The URI is invalid. /// - The request failed. /// - The response status code is not accepted. - pub async fn check_website_inner(&self, uri: &Uri, request_chain: &mut RequestChain) -> Status { + pub async fn check_website_inner(&self, uri: &Uri, request_chain: &RequestChain) -> Status { // Workaround for upstream reqwest panic if validate_url(&uri.url) { if matches!(uri.scheme(), "http" | "https") { @@ -592,11 +587,8 @@ impl Client { Err(e) => return e.into(), }; - let status = match request_chain.traverse(request).await { - // consider as excluded if no chain element has converted it to a done - ChainResult::Next(_) => Status::Excluded, - ChainResult::Done(status) => status, - }; + let chain = ClientRequestChain::new(vec![&self.plugin_request_chain, request_chain]); + let status = chain.traverse(request).await; if status.is_success() { return status; @@ -1098,7 +1090,7 @@ mod tests { let chain = RequestChain::new(vec![Box::new(ExampleHandler {})]); let client = ClientBuilder::builder() - .request_chain(chain) + .plugin_request_chain(chain) .build() .client() .unwrap(); From 61df5c91c460825b986783989e472549ed992179 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 5 Apr 2024 12:10:43 +0200 Subject: [PATCH 19/24] Small tweaks & extract method --- lychee-lib/src/client.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 7b52aecaaa..72727ee49f 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -278,7 +278,7 @@ pub struct ClientBuilder { /// Requests run through this chain where each item in the chain /// can modify the request. A chained item can also decide to exit /// early and return a status, so that subsequent chain items are - /// skipped and no HTTP request is ever made. + /// skipped and the lychee-internal request chain is not activated. plugin_request_chain: RequestChain, } @@ -486,7 +486,7 @@ impl Client { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } - let request_chain: RequestChain = Chain::new(vec![ + let chain: RequestChain = Chain::new(vec![ Box::::default(), Box::new(credentials), Box::new(Checker::new( @@ -500,7 +500,7 @@ impl Client { let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, request_chain).await?, + _ => self.check_website(uri, chain).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -533,11 +533,11 @@ impl Client { /// - The request failed. /// - The response status code is not accepted. /// - The URI cannot be converted to HTTPS. - pub async fn check_website(&self, uri: &Uri, request_chain: RequestChain) -> Result { - match self.check_website_inner(uri, &request_chain).await { + pub async fn check_website(&self, uri: &Uri, chain: RequestChain) -> Result { + match self.check_website_inner(uri, &chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, &request_chain) + .check_website_inner(&uri.to_https()?, &chain) .await .is_success() { @@ -562,7 +562,7 @@ impl Client { /// - The URI is invalid. /// - The request failed. /// - The response status code is not accepted. - pub async fn check_website_inner(&self, uri: &Uri, request_chain: &RequestChain) -> Status { + pub async fn check_website_inner(&self, uri: &Uri, chain: &RequestChain) -> Status { // Workaround for upstream reqwest panic if validate_url(&uri.url) { if matches!(uri.scheme(), "http" | "https") { @@ -587,16 +587,21 @@ impl Client { Err(e) => return e.into(), }; - let chain = ClientRequestChain::new(vec![&self.plugin_request_chain, request_chain]); - let status = chain.traverse(request).await; + let status = ClientRequestChain::new(vec![&self.plugin_request_chain, chain]) + .traverse(request) + .await; + + self.handle_github(status, uri).await + } + // Pull out the heavy machinery in case of a failed normal request. + // This could be a GitHub URL and we ran into the rate limiter. + // TODO: We should first try to parse the URI as GitHub URI first (Lucius, Jan 2023) + async fn handle_github(&self, status: Status, uri: &Uri) -> Status { if status.is_success() { return status; } - // Pull out the heavy machinery in case of a failed normal request. - // This could be a GitHub URL and we ran into the rate limiter. - // TODO: We should first try to parse the URI as GitHub URI first (Lucius, Jan 2023) if let Ok(github_uri) = GithubUri::try_from(uri) { let status = self.check_github(github_uri).await; // Only return Github status in case of success From 7c4834d994eb3d1fa374d21d08fccb63a0b17783 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 11 Apr 2024 11:33:38 +0200 Subject: [PATCH 20/24] Extract function and add SAFETY note --- lychee-lib/src/checker.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index 9a4d7b921e..7dfad42051 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -37,7 +37,7 @@ impl Checker { let mut retries: u64 = 0; let mut wait_time = self.retry_wait_time; - let mut status = self.check_default(request.try_clone().unwrap()).await; // TODO: try_clone + let mut status = self.check_default(clone_unwrap(&request)).await; while retries < self.max_retries { if status.is_success() || !status.should_retry() { return status; @@ -45,7 +45,7 @@ impl Checker { retries += 1; tokio::time::sleep(wait_time).await; wait_time = wait_time.saturating_mul(2); - status = self.check_default(request.try_clone().unwrap()).await; // TODO: try_clone + status = self.check_default(clone_unwrap(&request)).await; } status } @@ -59,6 +59,12 @@ impl Checker { } } +/// SAFETY: unwrapping the `try_clone` of `reqwest::Request` is safe because a request only fails to be cloned when `body` of `Request` is a stream +/// and `body` cannot be a stream as long as the `stream` feature is disabled. +fn clone_unwrap(request: &Request) -> Request { + request.try_clone().unwrap() +} + #[async_trait] impl Chainable for Checker { async fn chain(&mut self, input: Request) -> ChainResult { From a4f57cbff01246c7e191b08e8bf8bd1747b6ac3f Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 21 Apr 2024 15:38:01 +0200 Subject: [PATCH 21/24] Add documentation to `chain` module Also make `Chainable` and `ChainResult` public to support external plugins/handlers. --- lychee-lib/src/chain/mod.rs | 111 ++++++++++++++++++++++++++++++++++-- lychee-lib/src/client.rs | 16 +++--- lychee-lib/src/lib.rs | 2 + 3 files changed, 115 insertions(+), 14 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 738b90800c..c99f47b358 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,39 +1,89 @@ +//! [Chain of responsibility pattern][pattern] implementation. +//! +//! lychee is based on a chain of responsibility, where each handler can modify +//! a request and decide if it should be passed to the next element or not. +//! +//! The chain is implemented as a vector of [`Chainable`] handlers. It is +//! traversed by calling [`Chain::traverse`], which will call +//! [`Chainable::chain`] on each handler in the chain. +//! +//! To add external handlers, you can implement the [`Chainable`] trait and add +//! the handler to the chain. +//! +//! [pattern]: https://github.com/lpxxn/rust-design-pattern/blob/master/behavioral/chain_of_responsibility.rs use crate::Status; use async_trait::async_trait; use core::fmt::Debug; use std::sync::Arc; use tokio::sync::Mutex; +/// Result of a handler. +/// +/// This is used to decide if the chain should continue to the next handler or +/// stop and return the result: +/// +/// - If the chain should continue, the handler should return +/// [`ChainResult::Next`]. This will traverse the next handler in the chain. +/// - If the chain should stop, the handler should return [`ChainResult::Done`]. +/// This will stop the chain immediately and return the result of the handler. #[derive(Debug, PartialEq)] -pub(crate) enum ChainResult { +pub enum ChainResult { + /// Continue to the next handler in the chain. Next(T), + /// Stop the chain and return the result. Done(R), } +/// Request chain type +/// +/// This takes a request and returns a status. pub(crate) type RequestChain = Chain; +/// Inner chain type. +/// +/// This holds all handlers, which were chained together. +/// Handlers are traversed in order. +/// +/// Each handler needs to implement the `Chainable` trait and be `Send`, because +/// the chain is traversed concurrently and the handlers can be sent between +/// threads. pub(crate) type InnerChain = Vec + Send>>; +/// The outer chain type. +/// +/// This is a wrapper around the inner chain type and allows for +/// concurrent access to the chain. #[derive(Debug)] pub struct Chain(Arc>>); impl Default for Chain { fn default() -> Self { - Self(Arc::new(Mutex::new(vec![]))) + Self(Arc::new(Mutex::new(InnerChain::default()))) } } impl Clone for Chain { fn clone(&self) -> Self { + // Cloning the chain is a cheap operation, because the inner chain is + // wrapped in an `Arc` and `Mutex`. Self(self.0.clone()) } } impl Chain { + /// Create a new chain from a vector of chainable handlers pub(crate) fn new(values: InnerChain) -> Self { Self(Arc::new(Mutex::new(values))) } + /// Traverse the chain with the given input. + /// + /// This will call `chain` on each handler in the chain and return + /// the result. If a handler returns `ChainResult::Done`, the chain + /// will stop and return. + /// + /// If no handler returns `ChainResult::Done`, the chain will return + /// `ChainResult::Next` with the input. pub(crate) async fn traverse(&self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; for e in self.0.lock().await.iter_mut() { @@ -49,23 +99,71 @@ impl Chain { } } +/// Chainable trait for implementing request handlers +/// +/// This trait needs to be implemented by all chainable handlers. +/// It is the only requirement to handle requests in lychee. +/// +/// It takes an input request and returns a [`ChainResult`], which can be either +/// [`ChainResult::Next`] to continue to the next handler or +/// [`ChainResult::Done`] to stop the chain. +/// +/// The request can be modified by the handler before it is passed to the next +/// handler. This allows for modifying the request, such as adding headers or +/// changing the URL (e.g. for remapping or filtering). #[async_trait] -pub(crate) trait Chainable: Debug { +pub trait Chainable: Debug { + /// Given an input request, return a [`ChainResult`] to continue or stop the + /// chain. + /// + /// The input request can be modified by the handler before it is passed to + /// the next handler. + /// + /// # Example + /// + /// ``` + /// use lychee_lib::{Chainable, ChainResult, Status}; + /// use reqwest::Request; + /// use async_trait::async_trait; + /// + /// #[derive(Debug)] + /// struct AddHeader; + /// + /// #[async_trait] + /// impl Chainable for AddHeader { + /// async fn chain(&mut self, mut request: Request) -> ChainResult { + /// // You can modify the request however you like here + /// request.headers_mut().append("X-Header", "value".parse().unwrap()); + /// + /// // Pass the request to the next handler + /// ChainResult::Next(request) + /// } + /// } + /// ``` async fn chain(&mut self, input: T) -> ChainResult; } +/// Client request chains +/// +/// This struct holds all request chains. +/// +/// Usually, this is used to hold the default request chain and the external +/// plugin request chain. #[derive(Debug)] -pub(crate) struct ClientRequestChain<'a> { +pub(crate) struct ClientRequestChains<'a> { chains: Vec<&'a RequestChain>, } -impl<'a> ClientRequestChain<'a> { +impl<'a> ClientRequestChains<'a> { + /// Create a new chain of request chains. pub(crate) fn new(chains: Vec<&'a RequestChain>) -> Self { Self { chains } } + /// Traverse all request chains and resolve to a status. pub(crate) async fn traverse(&self, mut input: reqwest::Request) -> Status { use ChainResult::{Done, Next}; + for e in &self.chains { match e.traverse(input).await { Next(r) => input = r, @@ -75,7 +173,8 @@ impl<'a> ClientRequestChain<'a> { } } - // consider as excluded if no chain element has converted it to a done + // Consider the request to be excluded if no chain element has converted + // it to a `ChainResult::Done` Status::Excluded } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 72727ee49f..0bbc422103 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,7 +30,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, ClientRequestChain, RequestChain}, + chain::{Chain, ClientRequestChains, RequestChain}, checker::Checker, filter::{Excludes, Filter, Includes}, quirks::Quirks, @@ -486,7 +486,7 @@ impl Client { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } - let chain: RequestChain = Chain::new(vec![ + let default_chain: RequestChain = Chain::new(vec![ Box::::default(), Box::new(credentials), Box::new(Checker::new( @@ -500,7 +500,7 @@ impl Client { let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, chain).await?, + _ => self.check_website(uri, default_chain).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -533,11 +533,11 @@ impl Client { /// - The request failed. /// - The response status code is not accepted. /// - The URI cannot be converted to HTTPS. - pub async fn check_website(&self, uri: &Uri, chain: RequestChain) -> Result { - match self.check_website_inner(uri, &chain).await { + pub async fn check_website(&self, uri: &Uri, default_chain: RequestChain) -> Result { + match self.check_website_inner(uri, &default_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, &chain) + .check_website_inner(&uri.to_https()?, &default_chain) .await .is_success() { @@ -562,7 +562,7 @@ impl Client { /// - The URI is invalid. /// - The request failed. /// - The response status code is not accepted. - pub async fn check_website_inner(&self, uri: &Uri, chain: &RequestChain) -> Status { + pub async fn check_website_inner(&self, uri: &Uri, default_chain: &RequestChain) -> Status { // Workaround for upstream reqwest panic if validate_url(&uri.url) { if matches!(uri.scheme(), "http" | "https") { @@ -587,7 +587,7 @@ impl Client { Err(e) => return e.into(), }; - let status = ClientRequestChain::new(vec![&self.plugin_request_chain, chain]) + let status = ClientRequestChains::new(vec![&self.plugin_request_chain, default_chain]) .traverse(request) .await; diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index a425063676..272bd78a7b 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -85,6 +85,8 @@ use openssl_sys as _; // required for vendored-openssl feature #[doc(inline)] pub use crate::{ basic_auth::BasicAuthExtractor, + // Expose the `Chainable` trait to allow defining external handlers (plugins) + chain::{ChainResult, Chainable}, // Constants get exposed so that the CLI can use the same defaults as the library client::{ check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, From 2a5fbcb516adbfe19be07ae4e6b38d1b97c7a530 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 21 Apr 2024 15:52:00 +0200 Subject: [PATCH 22/24] Extend docs around `clone_unwrap` --- lychee-lib/src/chain/mod.rs | 2 +- lychee-lib/src/checker.rs | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index c99f47b358..020b8747e0 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -134,7 +134,7 @@ pub trait Chainable: Debug { /// async fn chain(&mut self, mut request: Request) -> ChainResult { /// // You can modify the request however you like here /// request.headers_mut().append("X-Header", "value".parse().unwrap()); - /// + /// /// // Pass the request to the next handler /// ChainResult::Next(request) /// } diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index 7dfad42051..695c00f11d 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -59,10 +59,17 @@ impl Checker { } } -/// SAFETY: unwrapping the `try_clone` of `reqwest::Request` is safe because a request only fails to be cloned when `body` of `Request` is a stream -/// and `body` cannot be a stream as long as the `stream` feature is disabled. +/// Clones a `reqwest::Request`. +/// +/// # Safety +/// +/// This panics if the request cannot be cloned. This should only happen if the +/// request body is a `reqwest` stream. We disable the `stream` feature, so the +/// body should never be a stream. +/// +/// See fn clone_unwrap(request: &Request) -> Request { - request.try_clone().unwrap() + request.try_clone().expect("Failed to clone request: body was a stream, which should be impossible with `stream` feature disabled") } #[async_trait] From d99ba5cf6bc6c759aa0bfb74df6056b9015b899c Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Mon, 22 Apr 2024 09:26:12 +0200 Subject: [PATCH 23/24] Adjust documentation --- lychee-lib/src/chain/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 020b8747e0..6935275b4e 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -5,7 +5,7 @@ //! //! The chain is implemented as a vector of [`Chainable`] handlers. It is //! traversed by calling [`Chain::traverse`], which will call -//! [`Chainable::chain`] on each handler in the chain. +//! [`Chainable::chain`] on each handler in the chain consecutively. //! //! To add external handlers, you can implement the [`Chainable`] trait and add //! the handler to the chain. @@ -25,7 +25,7 @@ use tokio::sync::Mutex; /// - If the chain should continue, the handler should return /// [`ChainResult::Next`]. This will traverse the next handler in the chain. /// - If the chain should stop, the handler should return [`ChainResult::Done`]. -/// This will stop the chain immediately and return the result of the handler. +/// All subsequent chain elements are skipped and the result is returned. #[derive(Debug, PartialEq)] pub enum ChainResult { /// Continue to the next handler in the chain. From 50bd88a158a0064bd709c6355391feb911762422 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Mon, 22 Apr 2024 10:59:29 +0200 Subject: [PATCH 24/24] Rename Chainable to Handler --- lychee-lib/src/chain/mod.rs | 22 +++++++++---------- lychee-lib/src/checker.rs | 4 ++-- lychee-lib/src/client.rs | 4 ++-- lychee-lib/src/lib.rs | 4 ++-- lychee-lib/src/quirks/mod.rs | 4 ++-- .../src/types/basic_auth/credentials.rs | 4 ++-- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 6935275b4e..59ea75c864 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -3,11 +3,11 @@ //! lychee is based on a chain of responsibility, where each handler can modify //! a request and decide if it should be passed to the next element or not. //! -//! The chain is implemented as a vector of [`Chainable`] handlers. It is +//! The chain is implemented as a vector of [`Handler`] handlers. It is //! traversed by calling [`Chain::traverse`], which will call -//! [`Chainable::chain`] on each handler in the chain consecutively. +//! [`Handler::chain`] on each handler in the chain consecutively. //! -//! To add external handlers, you can implement the [`Chainable`] trait and add +//! To add external handlers, you can implement the [`Handler`] trait and add //! the handler to the chain. //! //! [pattern]: https://github.com/lpxxn/rust-design-pattern/blob/master/behavioral/chain_of_responsibility.rs @@ -44,10 +44,10 @@ pub(crate) type RequestChain = Chain; /// This holds all handlers, which were chained together. /// Handlers are traversed in order. /// -/// Each handler needs to implement the `Chainable` trait and be `Send`, because +/// Each handler needs to implement the `Handler` trait and be `Send`, because /// the chain is traversed concurrently and the handlers can be sent between /// threads. -pub(crate) type InnerChain = Vec + Send>>; +pub(crate) type InnerChain = Vec + Send>>; /// The outer chain type. /// @@ -99,7 +99,7 @@ impl Chain { } } -/// Chainable trait for implementing request handlers +/// Handler trait for implementing request handlers /// /// This trait needs to be implemented by all chainable handlers. /// It is the only requirement to handle requests in lychee. @@ -112,7 +112,7 @@ impl Chain { /// handler. This allows for modifying the request, such as adding headers or /// changing the URL (e.g. for remapping or filtering). #[async_trait] -pub trait Chainable: Debug { +pub trait Handler: Debug { /// Given an input request, return a [`ChainResult`] to continue or stop the /// chain. /// @@ -122,7 +122,7 @@ pub trait Chainable: Debug { /// # Example /// /// ``` - /// use lychee_lib::{Chainable, ChainResult, Status}; + /// use lychee_lib::{Handler, ChainResult, Status}; /// use reqwest::Request; /// use async_trait::async_trait; /// @@ -130,7 +130,7 @@ pub trait Chainable: Debug { /// struct AddHeader; /// /// #[async_trait] - /// impl Chainable for AddHeader { + /// impl Handler for AddHeader { /// async fn chain(&mut self, mut request: Request) -> ChainResult { /// // You can modify the request however you like here /// request.headers_mut().append("X-Header", "value".parse().unwrap()); @@ -183,7 +183,7 @@ mod test { use super::{ ChainResult, ChainResult::{Done, Next}, - Chainable, + Handler, }; use async_trait::async_trait; @@ -194,7 +194,7 @@ mod test { struct Result(usize); #[async_trait] - impl Chainable for Add { + impl Handler for Add { async fn chain(&mut self, req: Result) -> ChainResult { let added = req.0 + self.0; if added > 100 { diff --git a/lychee-lib/src/checker.rs b/lychee-lib/src/checker.rs index 695c00f11d..ba89c1ce74 100644 --- a/lychee-lib/src/checker.rs +++ b/lychee-lib/src/checker.rs @@ -1,5 +1,5 @@ use crate::{ - chain::{ChainResult, Chainable}, + chain::{ChainResult, Handler}, retry::RetryExt, Status, }; @@ -73,7 +73,7 @@ fn clone_unwrap(request: &Request) -> Request { } #[async_trait] -impl Chainable for Checker { +impl Handler for Checker { async fn chain(&mut self, input: Request) -> ChainResult { ChainResult::Done(self.retry_request(input).await) } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 0bbc422103..15f9397cfb 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -750,7 +750,7 @@ mod tests { use super::ClientBuilder; use crate::{ - chain::{ChainResult, Chainable, RequestChain}, + chain::{ChainResult, Handler, RequestChain}, mock_server, test_utils::get_mock_client_response, Request, Status, Uri, @@ -1086,7 +1086,7 @@ mod tests { struct ExampleHandler(); #[async_trait] - impl Chainable for ExampleHandler { + impl Handler for ExampleHandler { async fn chain(&mut self, _: Request) -> ChainResult { ChainResult::Done(Status::Excluded) } diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 272bd78a7b..98a0701485 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -85,8 +85,8 @@ use openssl_sys as _; // required for vendored-openssl feature #[doc(inline)] pub use crate::{ basic_auth::BasicAuthExtractor, - // Expose the `Chainable` trait to allow defining external handlers (plugins) - chain::{ChainResult, Chainable}, + // Expose the `Handler` trait to allow defining external handlers (plugins) + chain::{ChainResult, Handler}, // Constants get exposed so that the CLI can use the same defaults as the library client::{ check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, diff --git a/lychee-lib/src/quirks/mod.rs b/lychee-lib/src/quirks/mod.rs index 333b921310..69976fcc4c 100644 --- a/lychee-lib/src/quirks/mod.rs +++ b/lychee-lib/src/quirks/mod.rs @@ -1,5 +1,5 @@ use crate::{ - chain::{ChainResult, Chainable}, + chain::{ChainResult, Handler}, Status, }; use async_trait::async_trait; @@ -92,7 +92,7 @@ impl Quirks { } #[async_trait] -impl Chainable for Quirks { +impl Handler for Quirks { async fn chain(&mut self, input: Request) -> ChainResult { ChainResult::Next(self.apply(input)) } diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 906357f671..74a4531617 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -8,7 +8,7 @@ use reqwest::Request; use serde::Deserialize; use thiserror::Error; -use crate::chain::{ChainResult, Chainable}; +use crate::chain::{ChainResult, Handler}; use crate::Status; #[derive(Copy, Clone, Debug, Error, PartialEq)] @@ -75,7 +75,7 @@ impl BasicAuthCredentials { } #[async_trait] -impl Chainable for Option { +impl Handler for Option { async fn chain(&mut self, mut request: Request) -> ChainResult { if let Some(credentials) = self { request