Skip to content

Commit

Permalink
Add a way to handle "pretty URLs", i.e. URIs without .html extension
Browse files Browse the repository at this point in the history
In many circumstances (GitHub Pages, Apache configured with MultiViews,
etc), web servers process URIs by appending the `.html` file extension
when no file is found at the path specified by the URI but a `.html`
file corresponding to that path _is_ found.

To allow Lychee to use the fast, offline method of checking such files
locally via the `file://` scheme, let's handle this scenario gracefully
by adding the `--check-extensions=html` option.

Note: This new option can take a list of file extensions to use; The
first one for which a corresponding file is found is then used.

Signed-off-by: Johannes Schindelin <[email protected]>
  • Loading branch information
dscho committed May 18, 2024
1 parent 9e031b6 commit 8148e03
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 6 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,13 @@ Options:
--remap <REMAP>
Remap URI matching pattern to different URI
--check-extensions <CHECK_EXTENSIONS>
Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi
--header <HEADER>
Custom request header
Expand Down
10 changes: 10 additions & 0 deletions fixtures/check-extensions/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="other">other</a>
</body>
</html>
10 changes: 10 additions & 0 deletions fixtures/check-extensions/other.htm
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="index">index</a>
</body>
</html>
1 change: 1 addition & 0 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
.require_https(cfg.require_https)
.cookie_jar(cookie_jar.cloned())
.include_fragments(cfg.include_fragments)
.check_extensions(cfg.check_extensions.clone())
.build()
.client()
.context("Failed to create request client")
Expand Down
14 changes: 14 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,19 @@ pub(crate) struct Config {
#[arg(long)]
pub(crate) remap: Vec<String>,

/// Automatically append file extensions to `file://` URIs as needed
#[serde(default)]
#[arg(
long,
value_delimiter = ',',
long_help = "Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --check-extensions html,htm,php,asp,aspx,jsp,cgi"
)]
pub(crate) check_extensions: Vec<String>,

/// Custom request header
#[arg(long)]
#[serde(default)]
Expand Down Expand Up @@ -439,6 +452,7 @@ impl Config {
exclude_loopback: false;
exclude_mail: false;
remap: Vec::<String>::new();
check_extensions: Vec::<String>::new();
header: Vec::<String>::new();
timeout: DEFAULT_TIMEOUT_SECS;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
Expand Down
13 changes: 13 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1556,4 +1556,17 @@ mod cli {
// 3 failures because of missing fragments
.stdout(contains("3 Errors"));
}

#[test]
fn test_check_extensions() {
let mut cmd = main_command();
let input = fixtures_path().join("check-extensions");

cmd.arg("--verbose")
.arg("--check-extensions=htm,html")
.arg(input)
.assert()
.success()
.stdout(contains("0 Errors"));
}
}
34 changes: 28 additions & 6 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub struct ClientBuilder {
/// make sure rules don't conflict with each other.
remaps: Option<Remaps>,

/// Automatically append file extensions to `file://` URIs as needed
check_extensions: Vec<String>,

/// Links matching this set of regular expressions are **always** checked.
///
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
Expand Down Expand Up @@ -384,6 +387,7 @@ impl ClientBuilder {
reqwest_client,
github_client,
remaps: self.remaps,
check_extensions: self.check_extensions,
filter,
max_retries: self.max_retries,
retry_wait_time: self.retry_wait_time,
Expand Down Expand Up @@ -412,6 +416,9 @@ pub struct Client {
/// Optional remapping rules for URIs matching pattern.
remaps: Option<Remaps>,

/// Automatically append file extensions to `file://` URIs as needed
check_extensions: Vec<String>,

/// Rules to decided whether each link should be checked or ignored.
filter: Filter,

Expand Down Expand Up @@ -654,13 +661,28 @@ impl Client {
let Ok(path) = uri.url.to_file_path() else {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
if !path.exists() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
if self.include_fragments {
self.check_fragment(&path, uri).await
if path.exists() {
if self.include_fragments {
return self.check_fragment(&path, uri).await;
}
return Status::Ok(StatusCode::OK)
} else {
Status::Ok(StatusCode::OK)
if path.extension().is_some() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}

// if the path has no file extension, try to append some
let mut path_buf = path.clone();
for ext in &self.check_extensions {
path_buf.set_extension(ext);
if path_buf.exists() {
if self.include_fragments {
return self.check_fragment(&path_buf, uri).await;
}
return Status::Ok(StatusCode::OK)
}
}
ErrorKind::InvalidFilePath(uri.clone()).into()
}
}

Expand Down

0 comments on commit 8148e03

Please sign in to comment.