bee-san · bee-san · Dec 27, 2022 · Dec 27, 2022 · Dec 27, 2022 · May 1, 2023
diff --git a/src/checkers/english.rs b/src/checkers/english.rs
@@ -47,9 +47,10 @@ impl Check for Checker<EnglishChecker> {
             link: self.link,
         };
 
-        // After we've normalised our string, if we find it's a length 0 we don't do anything
+        // After we've normalised our string, if we find it's a length 2 we don't do anything
         // This can happen if our string is a single puncuation mark, for example.
-        if input.is_empty() {
+        // There are no words of length 2 or more in the dict, so it's not worth checking.
+        if input.len() < 2 {
             return result;
         }
 
@@ -106,6 +107,9 @@ fn normalise_string(input: &str) -> String {
         .to_ascii_lowercase()
         .chars()
         .filter(|x| !x.is_ascii_punctuation())
+        // if digit is base10 filter it out
+        // Our dictionary does not contain numbers, so we want to filter them out
+        .filter(|x| !x.is_ascii_digit())
         .collect()
 }
 
@@ -187,4 +191,16 @@ mod tests {
         let checker = Checker::<EnglishChecker>::new();
         assert!(!checker.check("#").is_identified);
     }
+
+    #[test]
+    fn test_check_fail_single_number() {
+        let checker = Checker::<EnglishChecker>::new();
+        assert!(!checker.check("2").is_identified);
+    }
+
+    #[test]
+    fn test_check_fail_single_letter() {
+        let checker = Checker::<EnglishChecker>::new();
+        assert!(!checker.check("F").is_identified);
+    }
 }
diff --git a/src/storage/dictionaries/clean.py b/src/storage/dictionaries/clean.py
@@ -12,10 +12,13 @@
 f = open(file_name)
 f2 = open("modified.txt", "w")
 for line in f:
+    line = line.strip()
     if len(line) <= MIN_LENGTH:
         continue
     if len(set(line).intersection(PUNC)) != 0:
         continue
+    if not line.isalpha():
+        continue
     if LOWERCASE:
         line = line.lower()
     f2.write(line)