Skip to content

Commit

Permalink
Partially through messing with how to take decisions
Browse files Browse the repository at this point in the history
  • Loading branch information
faern committed Jul 26, 2024
1 parent d4bda6d commit dde4727
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 29 deletions.
72 changes: 72 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ unic-char-range = "0.9.0"
toml = "0.8.14"
serde = { version = "1.0.203", features = ["derive"] }
walkdir = "2.5.0"
anyhow = "1.0.86"
glob = "0.3.1"
phf = { version = "0.11.2", features = ["macros"] }

[dev-dependencies]
trycmd = "0.15.5"
42 changes: 30 additions & 12 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,42 +60,60 @@ fn unicode_notation_to_char(unicode_notation: &str) -> Result<char, InvalidChara

#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
enum CodeType {
pub enum CodeType {
Comment,
StringLiteral,
Identifiers,
}

#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
enum Language {
pub enum Language {
Rust,
Javascript,
Python,
}

static RUST_CODE_TYPES: phf::Map<&'static str, CodeType> = phf::phf_map! {
"comment" => CodeType::Comment,
"block_comment" => CodeType::Comment,
};

impl Language {
pub fn lookup_code_type(&self, tree_sitter_code_type: &str) -> Option<CodeType> {
match self {
Language::Rust => RUST_CODE_TYPES.get(tree_sitter_code_type).copied(),
_ => None,
}
}
}

#[derive(Debug, Eq, PartialEq, Default, serde::Deserialize)]
struct ConfigRules {
pub struct ConfigRules {
#[serde(default)]
default: RuleSet,
pub default: RuleSet,
#[serde(flatten)]
code_type_rules: HashMap<CodeType, RuleSet>,
pub code_type_rules: HashMap<CodeType, RuleSet>,
}

#[derive(Debug, Eq, PartialEq, serde::Deserialize)]
struct LanguageRules {
pub struct LanguageRules {
// None = Inherit default path globs
// Some([]) = No paths will ever match this language
// Some([...]) = Match every file against these glob patterns.
// Run this language parser if at least one matches.
#[serde(default)]
paths: Vec<String>,
pub paths: Option<Vec<String>>,
#[serde(flatten)]
rules: ConfigRules,
pub rules: ConfigRules,
}

#[derive(Debug, Eq, PartialEq, Default, serde::Deserialize)]
struct Config {
pub struct Config {
#[serde(default)]
global: ConfigRules,
pub global: ConfigRules,
#[serde(default)]
language: HashMap<Language, LanguageRules>,
pub language: HashMap<Language, LanguageRules>,
}

#[cfg(test)]
Expand Down Expand Up @@ -170,7 +188,7 @@ deny = ["Tibetan"]
language: HashMap::from([(
Language::Rust,
LanguageRules {
paths: vec![],
paths: None,
rules: ConfigRules {
default: RuleSet {
allow: vec![
Expand Down
116 changes: 115 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,91 @@
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io;
use std::path::Path;

use anyhow::Context;
use config::CodeType;
use config::Config;
use config::Language;
use miette::{miette, LabeledSpan, NamedSource, Severity};
use rules::Decision;
use rules::RuleSet;
use unic_ucd_name::Name;

mod config;
mod rules;

fn main() {
// Replaces the previous idea of "RuleChain"s.
struct RuleDispatcher {
user_config: Config,
default_config: Config,
}

impl RuleDispatcher {
pub fn decision(&self, c: char, language: Language, code_type: Option<CodeType>) -> Decision {
if let Some(decision) = Self::decision_for_config(&self.user_config, c, language, code_type)
{
return decision;
}
if let Some(decision) =
Self::decision_for_config(&self.default_config, c, language, code_type)
{
return decision;
}
Decision::Deny
}

// Rulechain:
// 1. Code type specific ruleset for specific language
// 2. Default ruleset for specific language
// 3. Code type specific ruleset in global section
// 4. Default rules in global section
fn decision_for_config(
config: &Config,
c: char,
language: Language,
code_type: Option<CodeType>,
) -> Option<Decision> {
if let Some(language_rules) = config.language.get(&language) {
// 1.
if let Some(language_code_type_rules) =
code_type.and_then(|ct| language_rules.rules.code_type_rules.get(&ct))
{
if let Some(decision) = language_code_type_rules.decision(c) {
return Some(decision);
}
}
// 2.
if let Some(decision) = language_rules.rules.default.decision(c) {
return Some(decision);
}
}
// 3.
if let Some(global_code_type_rules) =
code_type.and_then(|ct| config.global.code_type_rules.get(&ct))
{
if let Some(decision) = global_code_type_rules.decision(c) {
return Some(decision);
}
}
// 4.
if let Some(decision) = config.global.default.decision(c) {
return Some(decision);
}
// This config does not have any opinion on this character
None
}
}

fn main() -> anyhow::Result<()> {
let mut args: Vec<String> = env::args().skip(1).collect();
if args.is_empty() {
args = vec![String::from(".")]
}

let _config = get_config()?;

for arg in args {
for entry in walkdir::WalkDir::new(arg) {
match entry {
Expand All @@ -22,6 +95,7 @@ fn main() {
}
}
}
Ok(())
}

fn check_file(path: &Path) {
Expand Down Expand Up @@ -83,3 +157,43 @@ fn detect_language(path: &Path) -> Option<tree_sitter::Language> {
_ => None,
}
}

fn get_config() -> anyhow::Result<Config> {
match std::fs::read_to_string("./unicop.toml") {
Ok(config_str) => toml::from_str(&config_str).context("Failed to parse config"),
Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(get_default_config()),
Err(e) => Err(e).context("Failed to read config file"),
}
}

/// Comments and string literals allow all unicode except Bidi characters,
/// all other kinds of code deny all unicode.
fn get_default_config() -> Config {
Config {
global: config::ConfigRules {
default: RuleSet {
allow: vec![],
deny: vec![],
},
code_type_rules: [
(
config::CodeType::Comment,
RuleSet {
allow: vec![rules::CharacterType::Anything],
deny: vec![rules::CharacterType::Bidi],
},
),
(
config::CodeType::StringLiteral,
RuleSet {
allow: vec![rules::CharacterType::Anything],
deny: vec![rules::CharacterType::Bidi],
},
),
]
.into_iter()
.collect(),
},
language: HashMap::new(),
}
}
17 changes: 1 addition & 16 deletions src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,6 @@ pub enum Decision {
Deny,
}

pub struct RuleChain {
pub rules: Vec<RuleSet>,
}

impl RuleChain {
pub fn decision(&self, c: char) -> Decision {
for ruleset in &self.rules {
if let Some(decision) = ruleset.decision(c) {
return decision;
}
}
Decision::Deny
}
}

#[derive(Debug, Eq, PartialEq, Default, serde::Deserialize)]
pub struct RuleSet {
#[serde(default)]
Expand All @@ -27,7 +12,7 @@ pub struct RuleSet {
}

impl RuleSet {
fn decision(&self, c: char) -> Option<Decision> {
pub fn decision(&self, c: char) -> Option<Decision> {
let allow_specificity = self
.allow
.iter()
Expand Down

0 comments on commit dde4727

Please sign in to comment.