diff --git a/Cargo.lock b/Cargo.lock index 72eac249..01de9c15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,19 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "ammonia" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab99eae5ee58501ab236beb6f20f6ca39be615267b014899c89b2f0bc18a459" +dependencies = [ + "html5ever", + "maplit", + "once_cell", + "tendril", + "url", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -887,6 +900,16 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -1535,6 +1558,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "http" version = "1.1.0" @@ -1882,6 +1919,32 @@ dependencies = [ "crc", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "markup5ever" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7940b09815a02810a42b9e1bc41c069880a87de68e9b1dcbe754a3ba3b47c20" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2062,6 +2125,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "noise" version = "0.9.0" @@ -2260,6 +2329,63 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -2323,6 +2449,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-crate" version = "3.1.0" @@ -2636,6 +2768,7 @@ name = "rust-g" version = "3.2.0" dependencies = [ "aho-corasick", + "ammonia", "base64 0.22.0", "chrono", "const-random", @@ -2958,6 +3091,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2998,6 +3137,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.10.0" @@ -3078,6 +3243,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -3386,6 +3562,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "uuid" version = "1.8.0" diff --git a/Cargo.toml b/Cargo.toml index 89f1054d..f8dd75bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ pathfinding = { version = "4.9", optional = true } num-integer = { version = "0.1.46", optional = true } dmi = { version = "0.3.5", optional = true } tracy_full = { version = "1.7.1", optional = true } +ammonia = { version = "4.0.0", optional = true } [features] default = [ @@ -77,6 +78,7 @@ default = [ "log", "noise", "rustls_tls", + "sanitize", "sql", "time", "toml", @@ -95,6 +97,7 @@ all = [ "log", "noise", "rustls_tls", + "sanitize", "sql", "time", "toml", @@ -117,6 +120,7 @@ git = ["gix", "chrono"] http = ["reqwest", "serde", "serde_json", "once_cell", "jobs"] json = ["serde", "serde_json"] log = ["chrono"] +sanitize = ["ammonia", "serde_json"] sql = ["mysql", "serde", "serde_json", "once_cell", "dashmap", "jobs"] time = [] toml = ["serde", "serde_json", "toml-dep"] diff --git a/dmsrc/sanitize.dm b/dmsrc/sanitize.dm new file mode 100644 index 00000000..ca42fb7b --- /dev/null +++ b/dmsrc/sanitize.dm @@ -0,0 +1,9 @@ +/* + * Takes in a string and json_encode()"d lists to produce a sanitized string. + * This function operates on whitelists, there is currently no way to blacklist. + * Args: + * * text: the string to sanitize. + * * attribute_whitelist_json: a json_encode()'d list of HTML attributes to allow in the final string. + * * tag_whitelist_json: a json_encode()'d list of HTML tags to allow in the final string. + */ +#define rustg_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) RUSTG_CALL(RUST_G, "sanitize_html")(text, attribute_whitelist_json, tag_whitelist_json) diff --git a/src/lib.rs b/src/lib.rs index 51b46708..6ca077f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,8 @@ pub mod pathfinder; pub mod redis_pubsub; #[cfg(feature = "redis_reliablequeue")] pub mod redis_reliablequeue; +#[cfg(feature = "sanitize")] +pub mod sanitize; #[cfg(feature = "sql")] pub mod sql; #[cfg(feature = "time")] diff --git a/src/sanitize.rs b/src/sanitize.rs new file mode 100644 index 00000000..21273ae7 --- /dev/null +++ b/src/sanitize.rs @@ -0,0 +1,32 @@ +use crate::error::Result; +use std::collections::HashSet; + +byond_fn!(fn sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) { + match seriously_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) { + Ok(r) => return Some(r), + Err(e) => return Some(e.to_string()) + } +}); + +fn seriously_sanitize_html( + text: &str, + attribute_whitelist_json: &str, + tag_whitelist_json: &str, +) -> Result { + let attribute_whitelist: HashSet<&str> = serde_json::from_str(attribute_whitelist_json)?; + let tag_whitelist: HashSet<&str> = serde_json::from_str(tag_whitelist_json)?; + + let mut prune_url_schemes = ammonia::Builder::default().clone_url_schemes(); + prune_url_schemes.insert("byond"); + + let sanitized = ammonia::Builder::empty() + .clean_content_tags(HashSet::from_iter(["script", "style"])) // Completely forbid script and style attributes. + .link_rel(Some("noopener")) // https://mathiasbynens.github.io/rel-noopener/ + .url_schemes(prune_url_schemes) + .generic_attributes(attribute_whitelist) + .tags(tag_whitelist) + .clean(text) + .to_string(); + + Ok(sanitized) +}