Skip to content

Commit

Permalink
Adds sanitize_html, a whitelist based HTML sanitizer. (#171)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kapu1178 authored May 30, 2024
1 parent bd5c3af commit 6ef3516
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 0 deletions.
182 changes: 182 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ pathfinding = { version = "4.9", optional = true }
num-integer = { version = "0.1.46", optional = true }
dmi = { version = "0.3.5", optional = true }
tracy_full = { version = "1.7.1", optional = true }
ammonia = { version = "4.0.0", optional = true }

[features]
default = [
Expand All @@ -77,6 +78,7 @@ default = [
"log",
"noise",
"rustls_tls",
"sanitize",
"sql",
"time",
"toml",
Expand All @@ -95,6 +97,7 @@ all = [
"log",
"noise",
"rustls_tls",
"sanitize",
"sql",
"time",
"toml",
Expand All @@ -117,6 +120,7 @@ git = ["gix", "chrono"]
http = ["reqwest", "serde", "serde_json", "once_cell", "jobs"]
json = ["serde", "serde_json"]
log = ["chrono"]
sanitize = ["ammonia", "serde_json"]
sql = ["mysql", "serde", "serde_json", "once_cell", "dashmap", "jobs"]
time = []
toml = ["serde", "serde_json", "toml-dep"]
Expand Down
9 changes: 9 additions & 0 deletions dmsrc/sanitize.dm
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
* Takes in a string and json_encode()"d lists to produce a sanitized string.
* This function operates on whitelists, there is currently no way to blacklist.
* Args:
* * text: the string to sanitize.
* * attribute_whitelist_json: a json_encode()'d list of HTML attributes to allow in the final string.
* * tag_whitelist_json: a json_encode()'d list of HTML tags to allow in the final string.
*/
#define rustg_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) RUSTG_CALL(RUST_G, "sanitize_html")(text, attribute_whitelist_json, tag_whitelist_json)
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ pub mod pathfinder;
pub mod redis_pubsub;
#[cfg(feature = "redis_reliablequeue")]
pub mod redis_reliablequeue;
#[cfg(feature = "sanitize")]
pub mod sanitize;
#[cfg(feature = "sql")]
pub mod sql;
#[cfg(feature = "time")]
Expand Down
32 changes: 32 additions & 0 deletions src/sanitize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use crate::error::Result;
use std::collections::HashSet;

byond_fn!(fn sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) {
match seriously_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) {
Ok(r) => return Some(r),
Err(e) => return Some(e.to_string())
}
});

fn seriously_sanitize_html(
text: &str,
attribute_whitelist_json: &str,
tag_whitelist_json: &str,
) -> Result<String> {
let attribute_whitelist: HashSet<&str> = serde_json::from_str(attribute_whitelist_json)?;
let tag_whitelist: HashSet<&str> = serde_json::from_str(tag_whitelist_json)?;

let mut prune_url_schemes = ammonia::Builder::default().clone_url_schemes();
prune_url_schemes.insert("byond");

let sanitized = ammonia::Builder::empty()
.clean_content_tags(HashSet::from_iter(["script", "style"])) // Completely forbid script and style attributes.
.link_rel(Some("noopener")) // https://mathiasbynens.github.io/rel-noopener/
.url_schemes(prune_url_schemes)
.generic_attributes(attribute_whitelist)
.tags(tag_whitelist)
.clean(text)
.to_string();

Ok(sanitized)
}

0 comments on commit 6ef3516

Please sign in to comment.