diff --git a/README.md b/README.md index 0efc73cf2b..2872fa96fc 100644 --- a/README.md +++ b/README.md @@ -389,6 +389,13 @@ Options: --remap Remap URI matching pattern to different URI + --check-extensions + Test the specified file extensions for URIs when checking files locally. + Multiple extensions can be separated by commas. Extensions will be checked in + order of appearance. + + Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi + --header
Custom request header diff --git a/fixtures/check-extensions/index.html b/fixtures/check-extensions/index.html new file mode 100644 index 0000000000..40e77ff714 --- /dev/null +++ b/fixtures/check-extensions/index.html @@ -0,0 +1,10 @@ + + + + + For Testing pretty URLs + + + other + + diff --git a/fixtures/check-extensions/other.htm b/fixtures/check-extensions/other.htm new file mode 100644 index 0000000000..6dce1543ba --- /dev/null +++ b/fixtures/check-extensions/other.htm @@ -0,0 +1,10 @@ + + + + + For Testing pretty URLs + + + index + + diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index a7f53c2db7..93abef6425 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - .require_https(cfg.require_https) .cookie_jar(cookie_jar.cloned()) .include_fragments(cfg.include_fragments) + .check_extensions(cfg.check_extensions.clone()) .build() .client() .context("Failed to create request client") diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 925023a576..e5bc486704 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -300,6 +300,19 @@ pub(crate) struct Config { #[arg(long)] pub(crate) remap: Vec, + /// Automatically append file extensions to `file://` URIs as needed + #[serde(default)] + #[arg( + long, + value_delimiter = ',', + long_help = "Test the specified file extensions for URIs when checking files locally. +Multiple extensions can be separated by commas. Extensions will be checked in +order of appearance. + +Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi" + )] + pub(crate) check_extensions: Vec, + /// Custom request header #[arg(long)] #[serde(default)] @@ -439,6 +452,7 @@ impl Config { exclude_loopback: false; exclude_mail: false; remap: Vec::::new(); + check_extensions: Vec::::new(); header: Vec::::new(); timeout: DEFAULT_TIMEOUT_SECS; retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS; diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 931783a668..7c476450cc 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1556,4 +1556,17 @@ mod cli { // 3 failures because of missing fragments .stdout(contains("3 Errors")); } + + #[test] + fn test_check_extensions() { + let mut cmd = main_command(); + let input = fixtures_path().join("check-extensions"); + + cmd.arg("--verbose") + .arg("--check-extensions=htm,html") + .arg(input) + .assert() + .success() + .stdout(contains("0 Errors")); + } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 42cfa4b730..495b458336 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -95,6 +95,9 @@ pub struct ClientBuilder { /// make sure rules don't conflict with each other. remaps: Option, + /// Automatically append file extensions to `file://` URIs as needed + check_extensions: Vec, + /// Links matching this set of regular expressions are **always** checked. /// /// This has higher precedence over [`ClientBuilder::excludes`], **but** @@ -384,6 +387,7 @@ impl ClientBuilder { reqwest_client, github_client, remaps: self.remaps, + check_extensions: self.check_extensions, filter, max_retries: self.max_retries, retry_wait_time: self.retry_wait_time, @@ -412,6 +416,9 @@ pub struct Client { /// Optional remapping rules for URIs matching pattern. remaps: Option, + /// Automatically append file extensions to `file://` URIs as needed + check_extensions: Vec, + /// Rules to decided whether each link should be checked or ignored. filter: Filter, @@ -654,14 +661,28 @@ impl Client { let Ok(path) = uri.url.to_file_path() else { return ErrorKind::InvalidFilePath(uri.clone()).into(); }; - if !path.exists() { - return ErrorKind::InvalidFilePath(uri.clone()).into(); - } - if self.include_fragments { - self.check_fragment(&path, uri).await + if path.exists() { + if self.include_fragments { + return self.check_fragment(&path, uri).await; + } } else { - Status::Ok(StatusCode::OK) + if path.extension().is_some() { + return ErrorKind::InvalidFilePath(uri.clone()).into(); + } + + // if the path has no file extension, try to append some + let mut path_buf = path.clone(); + for ext in &self.check_extensions { + path_buf.set_extension(ext); + if path_buf.exists() { + if self.include_fragments { + return self.check_fragment(&path_buf, uri).await; + } + break; + } + } } + Status::Ok(StatusCode::OK) } /// Checks a `file` URI's fragment.