Skip to content

Commit

Permalink
Introduce fragment checking for links to markdown files. (#1126)
Browse files Browse the repository at this point in the history
- Implemented enhancements to include fragments in file links
- Checked links to markdown files with fragments, generating unique kebab case and heading attributes.
- Made code more idiomatic and added an integration test.
- Updated documentation.
- Fixed issues with heading attributes fragments and ensured proper handling of file errors.
  • Loading branch information
HU90m authored Jul 31, 2023
1 parent 518fb27 commit 8e63693
Show file tree
Hide file tree
Showing 18 changed files with 374 additions and 60 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ Options:
-a, --accept <ACCEPT>
Comma-separated list of accepted status codes for valid links
--include-fragments
Enable the checking of fragments in links
-t, --timeout <TIMEOUT>
Website timeout in seconds from connect to response finished
Expand Down
2 changes: 1 addition & 1 deletion fixtures/TEST.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Check file link
![Logo](../assets/banner.svg)

![Anchors should be ignored](#awesome)
![Fragment only link](#awesome)

Normal link, which should work as expected.
[Wikipedia](https://en.wikipedia.org/wiki/Static_program_analysis)
Expand Down
Empty file added fixtures/fragments/empty_file
Empty file.
42 changes: 42 additions & 0 deletions fixtures/fragments/file1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Fragment Test File 1

This is a test file for the fragment loader.

## Fragment 1

[Link to fragment 2](#fragment-2)

## Fragment 2

[Link to fragment 1 in file2](file2.md#fragment-1)

## Fragment 3

[Link to missing fragment](#missing-fragment)

[Link to missing fragment in file2](file2.md#missing-fragment)

## HTML Fragments

Explicit fragment links are currently not supported.
Therefore we put the test into a code block for now to prevent false positives.

```
<a name="explicit-fragment"></a>
[Link to explicit fragment](#explicit-fragment)
```

## Custom Fragments

[Custom fragment id in file2](file2.md#custom-id)

# Kebab Case Fragment

[Link to kebab-case fragment](#kebab-case-fragment)

[Link to second kebab-case fragment](#kebab-case-fragment-1)

# Kebab Case Fragment

[Link to another file type](empty_file#fragment)
7 changes: 7 additions & 0 deletions fixtures/fragments/file2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Fragment Test File 2

This is a test file for the fragment loader.

### Some other heading with custom id {#custom-id}

#### Fragment 1
1 change: 1 addition & 0 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
.accepted(accepted)
.require_https(cfg.require_https)
.cookie_jar(cookie_jar.cloned())
.include_fragments(cfg.include_fragments)
.build()
.client()
.context("Failed to create request client")
Expand Down
6 changes: 6 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) accept: Option<HashSet<u16>>,

/// Enable the checking of fragments in links.
#[arg(long)]
#[serde(default)]
pub(crate) include_fragments: bool,

/// Website timeout in seconds from connect to response finished
#[arg(short, long, default_value = &TIMEOUT_STR)]
#[serde(default = "timeout")]
Expand Down Expand Up @@ -426,6 +431,7 @@ impl Config {
output: None;
require_https: false;
cookie_jar: None;
include_fragments: false;
}

if self
Expand Down
38 changes: 32 additions & 6 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ mod cli {
.env_clear()
.assert()
.success()
.stdout(contains("3 Total"))
.stdout(contains("3 OK"));
.stdout(contains("4 Total"))
.stdout(contains("4 OK"));
}

#[test]
Expand Down Expand Up @@ -489,8 +489,8 @@ mod cli {
test_json_output!(
"TEST.md",
MockResponseStats {
total: 11,
successful: 9,
total: 12,
successful: 10,
excludes: 2,
..MockResponseStats::default()
}
Expand Down Expand Up @@ -518,7 +518,7 @@ mod cli {
// Running the command from the command line will print 9 links,
// because the actual `--dump` command filters out the two
// http(s)://example.com links
assert_eq!(output.lines().count(), 11);
assert_eq!(output.lines().count(), 12);
fs::remove_file(outfile)?;
Ok(())
}
Expand All @@ -534,7 +534,7 @@ mod cli {
.arg(".*")
.assert()
.success()
.stdout(contains("11 Excluded"));
.stdout(contains("12 Excluded"));

Ok(())
}
Expand Down Expand Up @@ -1399,4 +1399,30 @@ mod cli {

Ok(())
}

#[test]
fn test_fragments() {
let mut cmd = main_command();
let input = fixtures_path().join("fragments");

cmd.arg("--verbose")
.arg("--include-fragments")
.arg(input)
.assert()
.failure()
.stderr(contains("fixtures/fragments/file1.md#fragment-2"))
.stderr(contains("fixtures/fragments/file2.md#custom-id"))
.stderr(contains("fixtures/fragments/file1.md#missing-fragment"))
.stderr(contains("fixtures/fragments/file2.md#fragment-1"))
.stderr(contains("fixtures/fragments/file1.md#kebab-case-fragment"))
.stderr(contains("fixtures/fragments/file2.md#missing-fragment"))
.stderr(contains("fixtures/fragments/empty_file#fragment"))
.stderr(contains(
"fixtures/fragments/file1.md#kebab-case-fragment-1",
))
.stdout(contains("8 Total"))
.stdout(contains("6 OK"))
// 2 failures because of missing fragments
.stdout(contains("2 Errors"));
}
}
45 changes: 37 additions & 8 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
clippy::default_trait_access,
clippy::used_underscore_binding
)]
use std::{collections::HashSet, sync::Arc, time::Duration};
use std::{collections::HashSet, path::Path, sync::Arc, time::Duration};

#[cfg(all(feature = "email-check", feature = "native-tls"))]
use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
Expand All @@ -22,7 +22,7 @@ use http::{
header::{HeaderMap, HeaderValue, AUTHORIZATION},
StatusCode,
};
use log::debug;
use log::{debug, warn};
use octocrab::Octocrab;
use regex::RegexSet;
use reqwest::{header, redirect, Url};
Expand All @@ -36,6 +36,7 @@ use crate::{
remap::Remaps,
retry::RetryExt,
types::uri::github::GithubUri,
utils::fragment_checker::FragmentChecker,
BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
};

Expand Down Expand Up @@ -270,6 +271,9 @@ pub struct ClientBuilder {
///
/// See https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.cookie_store
cookie_jar: Option<Arc<CookieStoreMutex>>,

/// Enable the checking of fragments in links.
include_fragments: bool,
}

impl Default for ClientBuilder {
Expand Down Expand Up @@ -383,6 +387,8 @@ impl ClientBuilder {
accepted: self.accepted,
require_https: self.require_https,
quirks,
include_fragments: self.include_fragments,
fragment_checker: FragmentChecker::new(),
})
}
}
Expand Down Expand Up @@ -429,6 +435,12 @@ pub struct Client {

/// Override behaviors for certain known issues with special URIs.
quirks: Quirks,

/// Enable the checking of fragments in links.
include_fragments: bool,

/// Caches Fragments
fragment_checker: FragmentChecker,
}

impl Client {
Expand Down Expand Up @@ -472,7 +484,7 @@ impl Client {
}

let status = match uri.scheme() {
_ if uri.is_file() => self.check_file(uri),
_ if uri.is_file() => self.check_file(uri).await,
_ if uri.is_mail() => self.check_mail(uri).await,
_ => self.check_website(uri, credentials).await?,
};
Expand Down Expand Up @@ -659,13 +671,30 @@ impl Client {
}

/// Check a `file` URI.
pub fn check_file(&self, uri: &Uri) -> Status {
if let Ok(path) = uri.url.to_file_path() {
if path.exists() {
return Status::Ok(StatusCode::OK);
pub async fn check_file(&self, uri: &Uri) -> Status {
let Ok(path) = uri.url.to_file_path() else {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
if !path.exists() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
if self.include_fragments {
self.check_fragment(&path, uri).await
} else {
Status::Ok(StatusCode::OK)
}
}

/// Checks a `file` URI's fragment.
pub async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status {
match self.fragment_checker.check(path, &uri.url).await {
Ok(true) => Status::Ok(StatusCode::OK),
Ok(false) => ErrorKind::InvalidFragment(uri.clone()).into(),
Err(err) => {
warn!("Skipping fragment check due to the following error: {err}");
Status::Ok(StatusCode::OK)
}
}
ErrorKind::InvalidFilePath(uri.clone()).into()
}

/// Check a mail address, or equivalently a `mailto` URI.
Expand Down
Loading

0 comments on commit 8e63693

Please sign in to comment.