Skip to content

Commit

Permalink
Merge branch 'hackday2'
Browse files Browse the repository at this point in the history
  • Loading branch information
faern committed Aug 30, 2024
2 parents d4bda6d + c32ac66 commit a4ff26e
Show file tree
Hide file tree
Showing 5 changed files with 352 additions and 51 deletions.
83 changes: 83 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ unic-char-range = "0.9.0"
toml = "0.8.14"
serde = { version = "1.0.203", features = ["derive"] }
walkdir = "2.5.0"
anyhow = "1.0.86"
glob = "0.3.1"
phf = { version = "0.11.2", features = ["macros"] }
tree-sitter-rust = "0.21.2"

[dev-dependencies]
trycmd = "0.15.5"
87 changes: 74 additions & 13 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,42 +60,100 @@ fn unicode_notation_to_char(unicode_notation: &str) -> Result<char, InvalidChara

#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
enum CodeType {
pub enum CodeType {
Comment,
StringLiteral,
Identifiers,
}

#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
enum Language {
pub enum Language {
Rust,
Javascript,
Python,
}

static RUST_CODE_TYPES: phf::Map<&'static str, CodeType> = phf::phf_map! {
"doc_comment" => CodeType::Comment,
"line_comment" => CodeType::Comment,
"string_content" => CodeType::StringLiteral,
"char_literal" => CodeType::StringLiteral,
};

static JAVASCRIPT_CODE_TYPES: phf::Map<&'static str, CodeType> = phf::phf_map! {
"comment" => CodeType::Comment,
"block_comment" => CodeType::Comment,
"string_fragment" => CodeType::StringLiteral,
};

static PYTHON_CODE_TYPES: phf::Map<&'static str, CodeType> = phf::phf_map! {
"string_content" => CodeType::StringLiteral,
"comment" => CodeType::Comment,
};

impl Language {
pub fn lookup_code_type(&self, tree_sitter_code_type: &str) -> Option<CodeType> {
match self {
Language::Javascript => JAVASCRIPT_CODE_TYPES.get(tree_sitter_code_type).copied(),
Language::Rust => RUST_CODE_TYPES.get(tree_sitter_code_type).copied(),
Language::Python => PYTHON_CODE_TYPES.get(tree_sitter_code_type).copied(),
}
}

pub fn grammar(&self) -> tree_sitter::Language {
match self {
Language::Javascript => tree_sitter_javascript::language(),
Language::Python => tree_sitter_python::language(),
Language::Rust => tree_sitter_rust::language(),
}
}
}

#[derive(Debug, Eq, PartialEq, Default, serde::Deserialize)]
struct ConfigRules {
pub struct ConfigRules {
#[serde(default)]
default: RuleSet,
pub default: RuleSet,
#[serde(flatten)]
code_type_rules: HashMap<CodeType, RuleSet>,
pub code_type_rules: HashMap<CodeType, RuleSet>,
}

#[derive(Debug, Eq, PartialEq, serde::Deserialize)]
struct LanguageRules {
#[serde(default)]
paths: Vec<String>,
pub struct LanguageRules {
// None = Inherit default path globs
// Some([]) = No paths will ever match this language
// Some([...]) = Match every file against these glob patterns.
// Run this language parser if at least one matches.
#[serde(default, deserialize_with = "deserialize_pattern")]
pub paths: Option<Vec<glob::Pattern>>,
#[serde(flatten)]
rules: ConfigRules,
pub rules: ConfigRules,
}

fn deserialize_pattern<'de, D>(deserializer: D) -> Result<Option<Vec<glob::Pattern>>, D::Error>
where
D: serde::Deserializer<'de>,
{
let s: Option<Vec<String>> = serde::Deserialize::deserialize(deserializer)?;
match s {
None => Ok(None),
Some(v) => {
let res = v
.iter()
.map(|s| glob::Pattern::new(s))
.collect::<Result<Vec<glob::Pattern>, _>>()
.map_err(serde::de::Error::custom)?;
Ok(Some(res))
}
}
}

#[derive(Debug, Eq, PartialEq, Default, serde::Deserialize)]
struct Config {
pub struct Config {
#[serde(default)]
global: ConfigRules,
pub global: ConfigRules,
#[serde(default)]
language: HashMap<Language, LanguageRules>,
pub language: HashMap<Language, LanguageRules>,
}

#[cfg(test)]
Expand Down Expand Up @@ -141,6 +199,9 @@ deny = ["*"]
allow = ["*"]
deny = ["bidi"]
[language.rust]
paths = ["**/*.rs"]
[language.rust.default]
allow = ["Tibetan", "U+9000"]
deny = ["U+5000..U+5004"]
Expand Down Expand Up @@ -170,7 +231,7 @@ deny = ["Tibetan"]
language: HashMap::from([(
Language::Rust,
LanguageRules {
paths: vec![],
paths: Some(vec![glob::Pattern::new("**/*.rs").unwrap()]),
rules: ConfigRules {
default: RuleSet {
allow: vec![
Expand Down
Loading

0 comments on commit a4ff26e

Please sign in to comment.