diff --git a/Cargo.lock b/Cargo.lock index 7092fce..1d4e35e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -813,17 +813,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" -[[package]] -name = "unic-ucd-block" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b2a16f2d7ecd25325a1053ca5a66e7fa1b68911a65c5e97f8d2e1b236b6f1d7" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - [[package]] name = "unic-ucd-hangul" version = "0.9.0" @@ -886,8 +875,6 @@ dependencies = [ "tree-sitter-python", "tree-sitter-rust", "trycmd", - "unic-char-range", - "unic-ucd-block", "unic-ucd-name", "walkdir", ] diff --git a/Cargo.toml b/Cargo.toml index 41d3660..b886adc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,8 +22,6 @@ tree-sitter = "0.22.6" tree-sitter-javascript = "0.21.2" tree-sitter-python = "0.21.0" unic-ucd-name = "0.9.0" -unic-ucd-block = "0.9.0" -unic-char-range = "0.9.0" toml = "0.8.14" serde = { version = "1.0.203", features = ["derive"] } walkdir = "2.5.0" diff --git a/hack/genblocks b/hack/genblocks new file mode 100755 index 0000000..cec5c85 --- /dev/null +++ b/hack/genblocks @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import io +import urllib.request +import re + +BLOCKDEF = re.compile( + r"^(?P[0-9A-Fa-f]+)\.\.(?P[0-9A-Fa-f]+); (?P.*)$" +) + + +resp = urllib.request.urlopen("https://www.unicode.org/Public/UNIDATA/Blocks.txt") +blocks = [] +for line in io.TextIOWrapper(resp): + if match := BLOCKDEF.match(line.strip()): + name = match.group("name") + if name in {"Low Surrogates", "High Surrogates", "High Private Use Surrogates"}: + continue + low = match.group("low") + high = match.group("high") + blocks.append((name, low, high)) + + +def constname(blockname): + return blockname.replace(" ", "_").replace("-", "_").upper() + + +print("// Code generated by hack/genblocks. DO NOT EDIT.") +print() +for name, low, high in blocks: + rustrange = f"'\\u{{{low}}}'..='\\u{{{high}}}'" + print(f"pub const {constname(name)}: std::ops::RangeInclusive = {rustrange};") + +print( + """pub static UNICODE_BLOCKS: phf::Map<&'static str, std::ops::RangeInclusive> = phf::phf_map! {""" +) +for name, _, _ in blocks: + print(f' "{name}" => {constname(name)},') +print("};") diff --git a/src/config.rs b/src/config.rs index 92f348f..3d891fb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,15 +35,13 @@ impl FromStr for CharacterType { if s == "*" { return Ok(Self::Anything); } - for block in unic_ucd_block::BlockIter::new() { - if block.name == s { - return Ok(Self::Block(block)); - } + if let Some(range) = crate::unicode_blocks::UNICODE_BLOCKS.get(s) { + return Ok(Self::Block(range)); } if let Some((low, high)) = s.split_once("..") { let low = unicode_notation_to_char(low)?; let high = unicode_notation_to_char(high)?; - return Ok(Self::Range(unic_char_range::CharRange { low, high })); + return Ok(Self::Range(low..=high)); } unicode_notation_to_char(s).map(Self::CodePoint) } @@ -158,9 +156,6 @@ pub struct Config { #[cfg(test)] mod tests { - use unic_char_range::CharRange; - use unic_ucd_block::BlockIter; - use super::*; use crate::rules::*; @@ -212,8 +207,6 @@ deny = ["Tibetan"] ) .unwrap(); - let tibetan_block = BlockIter::new().find(|b| b.name == "Tibetan").unwrap(); - let expected_config = Config { global: ConfigRules { default: RuleSet { @@ -235,19 +228,16 @@ deny = ["Tibetan"] rules: ConfigRules { default: RuleSet { allow: vec![ - CharacterType::Block(tibetan_block), + CharacterType::Block(&crate::unicode_blocks::TIBETAN), CharacterType::CodePoint('\u{9000}'), ], - deny: vec![CharacterType::Range(CharRange { - low: '\u{5000}', - high: '\u{5004}', - })], + deny: vec![CharacterType::Range('\u{5000}'..='\u{5004}')], }, code_type_rules: HashMap::from([( CodeType::StringLiteral, RuleSet { allow: vec![], - deny: vec![CharacterType::Block(tibetan_block)], + deny: vec![CharacterType::Block(&crate::unicode_blocks::TIBETAN)], }, )]), }, diff --git a/src/main.rs b/src/main.rs index bf04322..79105b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ use unic_ucd_name::Name; mod config; mod rules; +mod unicode_blocks; // Replaces the previous idea of "RuleChain"s. struct RuleDispatcher { @@ -188,13 +189,10 @@ fn get_user_config() -> anyhow::Result> { /// Comments and string literals allow all unicode except Bidi characters, /// all other kinds of code deny all unicode. fn get_default_config() -> Config { - let ascii = unic_ucd_block::BlockIter::new() - .find(|b| b.name == "Basic Latin") - .unwrap(); Config { global: config::ConfigRules { default: RuleSet { - allow: vec![rules::CharacterType::Block(ascii)], + allow: vec![rules::CharacterType::Block(&unicode_blocks::BASIC_LATIN)], deny: vec![], }, code_type_rules: [ diff --git a/src/rules.rs b/src/rules.rs index 6fb8314..5ba2318 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -45,11 +45,11 @@ pub enum CharacterType { /// Single character (eg. "U+9000") CodePoint(char), /// An inclusive range of characters (eg. "U+1400..U+1409") - Range(unic_char_range::CharRange), + Range(std::ops::RangeInclusive), /// All bidirectional control characters (right to left etc) Bidi, /// Named ranges of characters (eg. "Tibetan", "Box Drawing") - Block(unic_ucd_block::Block), + Block(&'static std::ops::RangeInclusive), /// Any possible character. Anything, } @@ -58,14 +58,14 @@ impl CharacterType { fn matches(&self, c: char) -> bool { match self { Self::CodePoint(rule_char) => *rule_char == c, - Self::Range(range) => range.contains(c), + Self::Range(range) => range.contains(&c), Self::Bidi => [ // List of bidirectional formatting characters from https://en.wikipedia.org/wiki/Trojan_Source '\u{202A}', '\u{202b}', '\u{202c}', '\u{202d}', '\u{202e}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{2069}', ] .contains(&c), - Self::Block(block) => block.range.contains(c), + Self::Block(range) => range.contains(&c), Self::Anything => true, } } @@ -88,7 +88,7 @@ impl PartialEq for CharacterType { (CodePoint(self_c), CodePoint(other_c)) => self_c == other_c, (Range(self_r), Range(other_r)) => self_r == other_r, (Bidi, Bidi) => true, - (Block(self_block), Block(other_block)) => self_block.name == other_block.name, + (Block(self_range), Block(other_range)) => self_range == other_range, (Anything, Anything) => true, _ => false, } diff --git a/src/unicode_blocks.rs b/src/unicode_blocks.rs new file mode 100644 index 0000000..bad9db3 --- /dev/null +++ b/src/unicode_blocks.rs @@ -0,0 +1,654 @@ +// Code generated by hack/genblocks. DO NOT EDIT. + +pub const BASIC_LATIN: std::ops::RangeInclusive = '\u{0000}'..='\u{007F}'; +pub const LATIN_1_SUPPLEMENT: std::ops::RangeInclusive = '\u{0080}'..='\u{00FF}'; +pub const LATIN_EXTENDED_A: std::ops::RangeInclusive = '\u{0100}'..='\u{017F}'; +pub const LATIN_EXTENDED_B: std::ops::RangeInclusive = '\u{0180}'..='\u{024F}'; +pub const IPA_EXTENSIONS: std::ops::RangeInclusive = '\u{0250}'..='\u{02AF}'; +pub const SPACING_MODIFIER_LETTERS: std::ops::RangeInclusive = '\u{02B0}'..='\u{02FF}'; +pub const COMBINING_DIACRITICAL_MARKS: std::ops::RangeInclusive = '\u{0300}'..='\u{036F}'; +pub const GREEK_AND_COPTIC: std::ops::RangeInclusive = '\u{0370}'..='\u{03FF}'; +pub const CYRILLIC: std::ops::RangeInclusive = '\u{0400}'..='\u{04FF}'; +pub const CYRILLIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{0500}'..='\u{052F}'; +pub const ARMENIAN: std::ops::RangeInclusive = '\u{0530}'..='\u{058F}'; +pub const HEBREW: std::ops::RangeInclusive = '\u{0590}'..='\u{05FF}'; +pub const ARABIC: std::ops::RangeInclusive = '\u{0600}'..='\u{06FF}'; +pub const SYRIAC: std::ops::RangeInclusive = '\u{0700}'..='\u{074F}'; +pub const ARABIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{0750}'..='\u{077F}'; +pub const THAANA: std::ops::RangeInclusive = '\u{0780}'..='\u{07BF}'; +pub const NKO: std::ops::RangeInclusive = '\u{07C0}'..='\u{07FF}'; +pub const SAMARITAN: std::ops::RangeInclusive = '\u{0800}'..='\u{083F}'; +pub const MANDAIC: std::ops::RangeInclusive = '\u{0840}'..='\u{085F}'; +pub const SYRIAC_SUPPLEMENT: std::ops::RangeInclusive = '\u{0860}'..='\u{086F}'; +pub const ARABIC_EXTENDED_B: std::ops::RangeInclusive = '\u{0870}'..='\u{089F}'; +pub const ARABIC_EXTENDED_A: std::ops::RangeInclusive = '\u{08A0}'..='\u{08FF}'; +pub const DEVANAGARI: std::ops::RangeInclusive = '\u{0900}'..='\u{097F}'; +pub const BENGALI: std::ops::RangeInclusive = '\u{0980}'..='\u{09FF}'; +pub const GURMUKHI: std::ops::RangeInclusive = '\u{0A00}'..='\u{0A7F}'; +pub const GUJARATI: std::ops::RangeInclusive = '\u{0A80}'..='\u{0AFF}'; +pub const ORIYA: std::ops::RangeInclusive = '\u{0B00}'..='\u{0B7F}'; +pub const TAMIL: std::ops::RangeInclusive = '\u{0B80}'..='\u{0BFF}'; +pub const TELUGU: std::ops::RangeInclusive = '\u{0C00}'..='\u{0C7F}'; +pub const KANNADA: std::ops::RangeInclusive = '\u{0C80}'..='\u{0CFF}'; +pub const MALAYALAM: std::ops::RangeInclusive = '\u{0D00}'..='\u{0D7F}'; +pub const SINHALA: std::ops::RangeInclusive = '\u{0D80}'..='\u{0DFF}'; +pub const THAI: std::ops::RangeInclusive = '\u{0E00}'..='\u{0E7F}'; +pub const LAO: std::ops::RangeInclusive = '\u{0E80}'..='\u{0EFF}'; +pub const TIBETAN: std::ops::RangeInclusive = '\u{0F00}'..='\u{0FFF}'; +pub const MYANMAR: std::ops::RangeInclusive = '\u{1000}'..='\u{109F}'; +pub const GEORGIAN: std::ops::RangeInclusive = '\u{10A0}'..='\u{10FF}'; +pub const HANGUL_JAMO: std::ops::RangeInclusive = '\u{1100}'..='\u{11FF}'; +pub const ETHIOPIC: std::ops::RangeInclusive = '\u{1200}'..='\u{137F}'; +pub const ETHIOPIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{1380}'..='\u{139F}'; +pub const CHEROKEE: std::ops::RangeInclusive = '\u{13A0}'..='\u{13FF}'; +pub const UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS: std::ops::RangeInclusive = '\u{1400}'..='\u{167F}'; +pub const OGHAM: std::ops::RangeInclusive = '\u{1680}'..='\u{169F}'; +pub const RUNIC: std::ops::RangeInclusive = '\u{16A0}'..='\u{16FF}'; +pub const TAGALOG: std::ops::RangeInclusive = '\u{1700}'..='\u{171F}'; +pub const HANUNOO: std::ops::RangeInclusive = '\u{1720}'..='\u{173F}'; +pub const BUHID: std::ops::RangeInclusive = '\u{1740}'..='\u{175F}'; +pub const TAGBANWA: std::ops::RangeInclusive = '\u{1760}'..='\u{177F}'; +pub const KHMER: std::ops::RangeInclusive = '\u{1780}'..='\u{17FF}'; +pub const MONGOLIAN: std::ops::RangeInclusive = '\u{1800}'..='\u{18AF}'; +pub const UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED: std::ops::RangeInclusive = '\u{18B0}'..='\u{18FF}'; +pub const LIMBU: std::ops::RangeInclusive = '\u{1900}'..='\u{194F}'; +pub const TAI_LE: std::ops::RangeInclusive = '\u{1950}'..='\u{197F}'; +pub const NEW_TAI_LUE: std::ops::RangeInclusive = '\u{1980}'..='\u{19DF}'; +pub const KHMER_SYMBOLS: std::ops::RangeInclusive = '\u{19E0}'..='\u{19FF}'; +pub const BUGINESE: std::ops::RangeInclusive = '\u{1A00}'..='\u{1A1F}'; +pub const TAI_THAM: std::ops::RangeInclusive = '\u{1A20}'..='\u{1AAF}'; +pub const COMBINING_DIACRITICAL_MARKS_EXTENDED: std::ops::RangeInclusive = '\u{1AB0}'..='\u{1AFF}'; +pub const BALINESE: std::ops::RangeInclusive = '\u{1B00}'..='\u{1B7F}'; +pub const SUNDANESE: std::ops::RangeInclusive = '\u{1B80}'..='\u{1BBF}'; +pub const BATAK: std::ops::RangeInclusive = '\u{1BC0}'..='\u{1BFF}'; +pub const LEPCHA: std::ops::RangeInclusive = '\u{1C00}'..='\u{1C4F}'; +pub const OL_CHIKI: std::ops::RangeInclusive = '\u{1C50}'..='\u{1C7F}'; +pub const CYRILLIC_EXTENDED_C: std::ops::RangeInclusive = '\u{1C80}'..='\u{1C8F}'; +pub const GEORGIAN_EXTENDED: std::ops::RangeInclusive = '\u{1C90}'..='\u{1CBF}'; +pub const SUNDANESE_SUPPLEMENT: std::ops::RangeInclusive = '\u{1CC0}'..='\u{1CCF}'; +pub const VEDIC_EXTENSIONS: std::ops::RangeInclusive = '\u{1CD0}'..='\u{1CFF}'; +pub const PHONETIC_EXTENSIONS: std::ops::RangeInclusive = '\u{1D00}'..='\u{1D7F}'; +pub const PHONETIC_EXTENSIONS_SUPPLEMENT: std::ops::RangeInclusive = '\u{1D80}'..='\u{1DBF}'; +pub const COMBINING_DIACRITICAL_MARKS_SUPPLEMENT: std::ops::RangeInclusive = '\u{1DC0}'..='\u{1DFF}'; +pub const LATIN_EXTENDED_ADDITIONAL: std::ops::RangeInclusive = '\u{1E00}'..='\u{1EFF}'; +pub const GREEK_EXTENDED: std::ops::RangeInclusive = '\u{1F00}'..='\u{1FFF}'; +pub const GENERAL_PUNCTUATION: std::ops::RangeInclusive = '\u{2000}'..='\u{206F}'; +pub const SUPERSCRIPTS_AND_SUBSCRIPTS: std::ops::RangeInclusive = '\u{2070}'..='\u{209F}'; +pub const CURRENCY_SYMBOLS: std::ops::RangeInclusive = '\u{20A0}'..='\u{20CF}'; +pub const COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS: std::ops::RangeInclusive = '\u{20D0}'..='\u{20FF}'; +pub const LETTERLIKE_SYMBOLS: std::ops::RangeInclusive = '\u{2100}'..='\u{214F}'; +pub const NUMBER_FORMS: std::ops::RangeInclusive = '\u{2150}'..='\u{218F}'; +pub const ARROWS: std::ops::RangeInclusive = '\u{2190}'..='\u{21FF}'; +pub const MATHEMATICAL_OPERATORS: std::ops::RangeInclusive = '\u{2200}'..='\u{22FF}'; +pub const MISCELLANEOUS_TECHNICAL: std::ops::RangeInclusive = '\u{2300}'..='\u{23FF}'; +pub const CONTROL_PICTURES: std::ops::RangeInclusive = '\u{2400}'..='\u{243F}'; +pub const OPTICAL_CHARACTER_RECOGNITION: std::ops::RangeInclusive = '\u{2440}'..='\u{245F}'; +pub const ENCLOSED_ALPHANUMERICS: std::ops::RangeInclusive = '\u{2460}'..='\u{24FF}'; +pub const BOX_DRAWING: std::ops::RangeInclusive = '\u{2500}'..='\u{257F}'; +pub const BLOCK_ELEMENTS: std::ops::RangeInclusive = '\u{2580}'..='\u{259F}'; +pub const GEOMETRIC_SHAPES: std::ops::RangeInclusive = '\u{25A0}'..='\u{25FF}'; +pub const MISCELLANEOUS_SYMBOLS: std::ops::RangeInclusive = '\u{2600}'..='\u{26FF}'; +pub const DINGBATS: std::ops::RangeInclusive = '\u{2700}'..='\u{27BF}'; +pub const MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A: std::ops::RangeInclusive = '\u{27C0}'..='\u{27EF}'; +pub const SUPPLEMENTAL_ARROWS_A: std::ops::RangeInclusive = '\u{27F0}'..='\u{27FF}'; +pub const BRAILLE_PATTERNS: std::ops::RangeInclusive = '\u{2800}'..='\u{28FF}'; +pub const SUPPLEMENTAL_ARROWS_B: std::ops::RangeInclusive = '\u{2900}'..='\u{297F}'; +pub const MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B: std::ops::RangeInclusive = '\u{2980}'..='\u{29FF}'; +pub const SUPPLEMENTAL_MATHEMATICAL_OPERATORS: std::ops::RangeInclusive = '\u{2A00}'..='\u{2AFF}'; +pub const MISCELLANEOUS_SYMBOLS_AND_ARROWS: std::ops::RangeInclusive = '\u{2B00}'..='\u{2BFF}'; +pub const GLAGOLITIC: std::ops::RangeInclusive = '\u{2C00}'..='\u{2C5F}'; +pub const LATIN_EXTENDED_C: std::ops::RangeInclusive = '\u{2C60}'..='\u{2C7F}'; +pub const COPTIC: std::ops::RangeInclusive = '\u{2C80}'..='\u{2CFF}'; +pub const GEORGIAN_SUPPLEMENT: std::ops::RangeInclusive = '\u{2D00}'..='\u{2D2F}'; +pub const TIFINAGH: std::ops::RangeInclusive = '\u{2D30}'..='\u{2D7F}'; +pub const ETHIOPIC_EXTENDED: std::ops::RangeInclusive = '\u{2D80}'..='\u{2DDF}'; +pub const CYRILLIC_EXTENDED_A: std::ops::RangeInclusive = '\u{2DE0}'..='\u{2DFF}'; +pub const SUPPLEMENTAL_PUNCTUATION: std::ops::RangeInclusive = '\u{2E00}'..='\u{2E7F}'; +pub const CJK_RADICALS_SUPPLEMENT: std::ops::RangeInclusive = '\u{2E80}'..='\u{2EFF}'; +pub const KANGXI_RADICALS: std::ops::RangeInclusive = '\u{2F00}'..='\u{2FDF}'; +pub const IDEOGRAPHIC_DESCRIPTION_CHARACTERS: std::ops::RangeInclusive = '\u{2FF0}'..='\u{2FFF}'; +pub const CJK_SYMBOLS_AND_PUNCTUATION: std::ops::RangeInclusive = '\u{3000}'..='\u{303F}'; +pub const HIRAGANA: std::ops::RangeInclusive = '\u{3040}'..='\u{309F}'; +pub const KATAKANA: std::ops::RangeInclusive = '\u{30A0}'..='\u{30FF}'; +pub const BOPOMOFO: std::ops::RangeInclusive = '\u{3100}'..='\u{312F}'; +pub const HANGUL_COMPATIBILITY_JAMO: std::ops::RangeInclusive = '\u{3130}'..='\u{318F}'; +pub const KANBUN: std::ops::RangeInclusive = '\u{3190}'..='\u{319F}'; +pub const BOPOMOFO_EXTENDED: std::ops::RangeInclusive = '\u{31A0}'..='\u{31BF}'; +pub const CJK_STROKES: std::ops::RangeInclusive = '\u{31C0}'..='\u{31EF}'; +pub const KATAKANA_PHONETIC_EXTENSIONS: std::ops::RangeInclusive = '\u{31F0}'..='\u{31FF}'; +pub const ENCLOSED_CJK_LETTERS_AND_MONTHS: std::ops::RangeInclusive = '\u{3200}'..='\u{32FF}'; +pub const CJK_COMPATIBILITY: std::ops::RangeInclusive = '\u{3300}'..='\u{33FF}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: std::ops::RangeInclusive = '\u{3400}'..='\u{4DBF}'; +pub const YIJING_HEXAGRAM_SYMBOLS: std::ops::RangeInclusive = '\u{4DC0}'..='\u{4DFF}'; +pub const CJK_UNIFIED_IDEOGRAPHS: std::ops::RangeInclusive = '\u{4E00}'..='\u{9FFF}'; +pub const YI_SYLLABLES: std::ops::RangeInclusive = '\u{A000}'..='\u{A48F}'; +pub const YI_RADICALS: std::ops::RangeInclusive = '\u{A490}'..='\u{A4CF}'; +pub const LISU: std::ops::RangeInclusive = '\u{A4D0}'..='\u{A4FF}'; +pub const VAI: std::ops::RangeInclusive = '\u{A500}'..='\u{A63F}'; +pub const CYRILLIC_EXTENDED_B: std::ops::RangeInclusive = '\u{A640}'..='\u{A69F}'; +pub const BAMUM: std::ops::RangeInclusive = '\u{A6A0}'..='\u{A6FF}'; +pub const MODIFIER_TONE_LETTERS: std::ops::RangeInclusive = '\u{A700}'..='\u{A71F}'; +pub const LATIN_EXTENDED_D: std::ops::RangeInclusive = '\u{A720}'..='\u{A7FF}'; +pub const SYLOTI_NAGRI: std::ops::RangeInclusive = '\u{A800}'..='\u{A82F}'; +pub const COMMON_INDIC_NUMBER_FORMS: std::ops::RangeInclusive = '\u{A830}'..='\u{A83F}'; +pub const PHAGS_PA: std::ops::RangeInclusive = '\u{A840}'..='\u{A87F}'; +pub const SAURASHTRA: std::ops::RangeInclusive = '\u{A880}'..='\u{A8DF}'; +pub const DEVANAGARI_EXTENDED: std::ops::RangeInclusive = '\u{A8E0}'..='\u{A8FF}'; +pub const KAYAH_LI: std::ops::RangeInclusive = '\u{A900}'..='\u{A92F}'; +pub const REJANG: std::ops::RangeInclusive = '\u{A930}'..='\u{A95F}'; +pub const HANGUL_JAMO_EXTENDED_A: std::ops::RangeInclusive = '\u{A960}'..='\u{A97F}'; +pub const JAVANESE: std::ops::RangeInclusive = '\u{A980}'..='\u{A9DF}'; +pub const MYANMAR_EXTENDED_B: std::ops::RangeInclusive = '\u{A9E0}'..='\u{A9FF}'; +pub const CHAM: std::ops::RangeInclusive = '\u{AA00}'..='\u{AA5F}'; +pub const MYANMAR_EXTENDED_A: std::ops::RangeInclusive = '\u{AA60}'..='\u{AA7F}'; +pub const TAI_VIET: std::ops::RangeInclusive = '\u{AA80}'..='\u{AADF}'; +pub const MEETEI_MAYEK_EXTENSIONS: std::ops::RangeInclusive = '\u{AAE0}'..='\u{AAFF}'; +pub const ETHIOPIC_EXTENDED_A: std::ops::RangeInclusive = '\u{AB00}'..='\u{AB2F}'; +pub const LATIN_EXTENDED_E: std::ops::RangeInclusive = '\u{AB30}'..='\u{AB6F}'; +pub const CHEROKEE_SUPPLEMENT: std::ops::RangeInclusive = '\u{AB70}'..='\u{ABBF}'; +pub const MEETEI_MAYEK: std::ops::RangeInclusive = '\u{ABC0}'..='\u{ABFF}'; +pub const HANGUL_SYLLABLES: std::ops::RangeInclusive = '\u{AC00}'..='\u{D7AF}'; +pub const HANGUL_JAMO_EXTENDED_B: std::ops::RangeInclusive = '\u{D7B0}'..='\u{D7FF}'; +pub const PRIVATE_USE_AREA: std::ops::RangeInclusive = '\u{E000}'..='\u{F8FF}'; +pub const CJK_COMPATIBILITY_IDEOGRAPHS: std::ops::RangeInclusive = '\u{F900}'..='\u{FAFF}'; +pub const ALPHABETIC_PRESENTATION_FORMS: std::ops::RangeInclusive = '\u{FB00}'..='\u{FB4F}'; +pub const ARABIC_PRESENTATION_FORMS_A: std::ops::RangeInclusive = '\u{FB50}'..='\u{FDFF}'; +pub const VARIATION_SELECTORS: std::ops::RangeInclusive = '\u{FE00}'..='\u{FE0F}'; +pub const VERTICAL_FORMS: std::ops::RangeInclusive = '\u{FE10}'..='\u{FE1F}'; +pub const COMBINING_HALF_MARKS: std::ops::RangeInclusive = '\u{FE20}'..='\u{FE2F}'; +pub const CJK_COMPATIBILITY_FORMS: std::ops::RangeInclusive = '\u{FE30}'..='\u{FE4F}'; +pub const SMALL_FORM_VARIANTS: std::ops::RangeInclusive = '\u{FE50}'..='\u{FE6F}'; +pub const ARABIC_PRESENTATION_FORMS_B: std::ops::RangeInclusive = '\u{FE70}'..='\u{FEFF}'; +pub const HALFWIDTH_AND_FULLWIDTH_FORMS: std::ops::RangeInclusive = '\u{FF00}'..='\u{FFEF}'; +pub const SPECIALS: std::ops::RangeInclusive = '\u{FFF0}'..='\u{FFFF}'; +pub const LINEAR_B_SYLLABARY: std::ops::RangeInclusive = '\u{10000}'..='\u{1007F}'; +pub const LINEAR_B_IDEOGRAMS: std::ops::RangeInclusive = '\u{10080}'..='\u{100FF}'; +pub const AEGEAN_NUMBERS: std::ops::RangeInclusive = '\u{10100}'..='\u{1013F}'; +pub const ANCIENT_GREEK_NUMBERS: std::ops::RangeInclusive = '\u{10140}'..='\u{1018F}'; +pub const ANCIENT_SYMBOLS: std::ops::RangeInclusive = '\u{10190}'..='\u{101CF}'; +pub const PHAISTOS_DISC: std::ops::RangeInclusive = '\u{101D0}'..='\u{101FF}'; +pub const LYCIAN: std::ops::RangeInclusive = '\u{10280}'..='\u{1029F}'; +pub const CARIAN: std::ops::RangeInclusive = '\u{102A0}'..='\u{102DF}'; +pub const COPTIC_EPACT_NUMBERS: std::ops::RangeInclusive = '\u{102E0}'..='\u{102FF}'; +pub const OLD_ITALIC: std::ops::RangeInclusive = '\u{10300}'..='\u{1032F}'; +pub const GOTHIC: std::ops::RangeInclusive = '\u{10330}'..='\u{1034F}'; +pub const OLD_PERMIC: std::ops::RangeInclusive = '\u{10350}'..='\u{1037F}'; +pub const UGARITIC: std::ops::RangeInclusive = '\u{10380}'..='\u{1039F}'; +pub const OLD_PERSIAN: std::ops::RangeInclusive = '\u{103A0}'..='\u{103DF}'; +pub const DESERET: std::ops::RangeInclusive = '\u{10400}'..='\u{1044F}'; +pub const SHAVIAN: std::ops::RangeInclusive = '\u{10450}'..='\u{1047F}'; +pub const OSMANYA: std::ops::RangeInclusive = '\u{10480}'..='\u{104AF}'; +pub const OSAGE: std::ops::RangeInclusive = '\u{104B0}'..='\u{104FF}'; +pub const ELBASAN: std::ops::RangeInclusive = '\u{10500}'..='\u{1052F}'; +pub const CAUCASIAN_ALBANIAN: std::ops::RangeInclusive = '\u{10530}'..='\u{1056F}'; +pub const VITHKUQI: std::ops::RangeInclusive = '\u{10570}'..='\u{105BF}'; +pub const LINEAR_A: std::ops::RangeInclusive = '\u{10600}'..='\u{1077F}'; +pub const LATIN_EXTENDED_F: std::ops::RangeInclusive = '\u{10780}'..='\u{107BF}'; +pub const CYPRIOT_SYLLABARY: std::ops::RangeInclusive = '\u{10800}'..='\u{1083F}'; +pub const IMPERIAL_ARAMAIC: std::ops::RangeInclusive = '\u{10840}'..='\u{1085F}'; +pub const PALMYRENE: std::ops::RangeInclusive = '\u{10860}'..='\u{1087F}'; +pub const NABATAEAN: std::ops::RangeInclusive = '\u{10880}'..='\u{108AF}'; +pub const HATRAN: std::ops::RangeInclusive = '\u{108E0}'..='\u{108FF}'; +pub const PHOENICIAN: std::ops::RangeInclusive = '\u{10900}'..='\u{1091F}'; +pub const LYDIAN: std::ops::RangeInclusive = '\u{10920}'..='\u{1093F}'; +pub const MEROITIC_HIEROGLYPHS: std::ops::RangeInclusive = '\u{10980}'..='\u{1099F}'; +pub const MEROITIC_CURSIVE: std::ops::RangeInclusive = '\u{109A0}'..='\u{109FF}'; +pub const KHAROSHTHI: std::ops::RangeInclusive = '\u{10A00}'..='\u{10A5F}'; +pub const OLD_SOUTH_ARABIAN: std::ops::RangeInclusive = '\u{10A60}'..='\u{10A7F}'; +pub const OLD_NORTH_ARABIAN: std::ops::RangeInclusive = '\u{10A80}'..='\u{10A9F}'; +pub const MANICHAEAN: std::ops::RangeInclusive = '\u{10AC0}'..='\u{10AFF}'; +pub const AVESTAN: std::ops::RangeInclusive = '\u{10B00}'..='\u{10B3F}'; +pub const INSCRIPTIONAL_PARTHIAN: std::ops::RangeInclusive = '\u{10B40}'..='\u{10B5F}'; +pub const INSCRIPTIONAL_PAHLAVI: std::ops::RangeInclusive = '\u{10B60}'..='\u{10B7F}'; +pub const PSALTER_PAHLAVI: std::ops::RangeInclusive = '\u{10B80}'..='\u{10BAF}'; +pub const OLD_TURKIC: std::ops::RangeInclusive = '\u{10C00}'..='\u{10C4F}'; +pub const OLD_HUNGARIAN: std::ops::RangeInclusive = '\u{10C80}'..='\u{10CFF}'; +pub const HANIFI_ROHINGYA: std::ops::RangeInclusive = '\u{10D00}'..='\u{10D3F}'; +pub const RUMI_NUMERAL_SYMBOLS: std::ops::RangeInclusive = '\u{10E60}'..='\u{10E7F}'; +pub const YEZIDI: std::ops::RangeInclusive = '\u{10E80}'..='\u{10EBF}'; +pub const ARABIC_EXTENDED_C: std::ops::RangeInclusive = '\u{10EC0}'..='\u{10EFF}'; +pub const OLD_SOGDIAN: std::ops::RangeInclusive = '\u{10F00}'..='\u{10F2F}'; +pub const SOGDIAN: std::ops::RangeInclusive = '\u{10F30}'..='\u{10F6F}'; +pub const OLD_UYGHUR: std::ops::RangeInclusive = '\u{10F70}'..='\u{10FAF}'; +pub const CHORASMIAN: std::ops::RangeInclusive = '\u{10FB0}'..='\u{10FDF}'; +pub const ELYMAIC: std::ops::RangeInclusive = '\u{10FE0}'..='\u{10FFF}'; +pub const BRAHMI: std::ops::RangeInclusive = '\u{11000}'..='\u{1107F}'; +pub const KAITHI: std::ops::RangeInclusive = '\u{11080}'..='\u{110CF}'; +pub const SORA_SOMPENG: std::ops::RangeInclusive = '\u{110D0}'..='\u{110FF}'; +pub const CHAKMA: std::ops::RangeInclusive = '\u{11100}'..='\u{1114F}'; +pub const MAHAJANI: std::ops::RangeInclusive = '\u{11150}'..='\u{1117F}'; +pub const SHARADA: std::ops::RangeInclusive = '\u{11180}'..='\u{111DF}'; +pub const SINHALA_ARCHAIC_NUMBERS: std::ops::RangeInclusive = '\u{111E0}'..='\u{111FF}'; +pub const KHOJKI: std::ops::RangeInclusive = '\u{11200}'..='\u{1124F}'; +pub const MULTANI: std::ops::RangeInclusive = '\u{11280}'..='\u{112AF}'; +pub const KHUDAWADI: std::ops::RangeInclusive = '\u{112B0}'..='\u{112FF}'; +pub const GRANTHA: std::ops::RangeInclusive = '\u{11300}'..='\u{1137F}'; +pub const NEWA: std::ops::RangeInclusive = '\u{11400}'..='\u{1147F}'; +pub const TIRHUTA: std::ops::RangeInclusive = '\u{11480}'..='\u{114DF}'; +pub const SIDDHAM: std::ops::RangeInclusive = '\u{11580}'..='\u{115FF}'; +pub const MODI: std::ops::RangeInclusive = '\u{11600}'..='\u{1165F}'; +pub const MONGOLIAN_SUPPLEMENT: std::ops::RangeInclusive = '\u{11660}'..='\u{1167F}'; +pub const TAKRI: std::ops::RangeInclusive = '\u{11680}'..='\u{116CF}'; +pub const AHOM: std::ops::RangeInclusive = '\u{11700}'..='\u{1174F}'; +pub const DOGRA: std::ops::RangeInclusive = '\u{11800}'..='\u{1184F}'; +pub const WARANG_CITI: std::ops::RangeInclusive = '\u{118A0}'..='\u{118FF}'; +pub const DIVES_AKURU: std::ops::RangeInclusive = '\u{11900}'..='\u{1195F}'; +pub const NANDINAGARI: std::ops::RangeInclusive = '\u{119A0}'..='\u{119FF}'; +pub const ZANABAZAR_SQUARE: std::ops::RangeInclusive = '\u{11A00}'..='\u{11A4F}'; +pub const SOYOMBO: std::ops::RangeInclusive = '\u{11A50}'..='\u{11AAF}'; +pub const UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A: std::ops::RangeInclusive = '\u{11AB0}'..='\u{11ABF}'; +pub const PAU_CIN_HAU: std::ops::RangeInclusive = '\u{11AC0}'..='\u{11AFF}'; +pub const DEVANAGARI_EXTENDED_A: std::ops::RangeInclusive = '\u{11B00}'..='\u{11B5F}'; +pub const BHAIKSUKI: std::ops::RangeInclusive = '\u{11C00}'..='\u{11C6F}'; +pub const MARCHEN: std::ops::RangeInclusive = '\u{11C70}'..='\u{11CBF}'; +pub const MASARAM_GONDI: std::ops::RangeInclusive = '\u{11D00}'..='\u{11D5F}'; +pub const GUNJALA_GONDI: std::ops::RangeInclusive = '\u{11D60}'..='\u{11DAF}'; +pub const MAKASAR: std::ops::RangeInclusive = '\u{11EE0}'..='\u{11EFF}'; +pub const KAWI: std::ops::RangeInclusive = '\u{11F00}'..='\u{11F5F}'; +pub const LISU_SUPPLEMENT: std::ops::RangeInclusive = '\u{11FB0}'..='\u{11FBF}'; +pub const TAMIL_SUPPLEMENT: std::ops::RangeInclusive = '\u{11FC0}'..='\u{11FFF}'; +pub const CUNEIFORM: std::ops::RangeInclusive = '\u{12000}'..='\u{123FF}'; +pub const CUNEIFORM_NUMBERS_AND_PUNCTUATION: std::ops::RangeInclusive = '\u{12400}'..='\u{1247F}'; +pub const EARLY_DYNASTIC_CUNEIFORM: std::ops::RangeInclusive = '\u{12480}'..='\u{1254F}'; +pub const CYPRO_MINOAN: std::ops::RangeInclusive = '\u{12F90}'..='\u{12FFF}'; +pub const EGYPTIAN_HIEROGLYPHS: std::ops::RangeInclusive = '\u{13000}'..='\u{1342F}'; +pub const EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS: std::ops::RangeInclusive = '\u{13430}'..='\u{1345F}'; +pub const ANATOLIAN_HIEROGLYPHS: std::ops::RangeInclusive = '\u{14400}'..='\u{1467F}'; +pub const BAMUM_SUPPLEMENT: std::ops::RangeInclusive = '\u{16800}'..='\u{16A3F}'; +pub const MRO: std::ops::RangeInclusive = '\u{16A40}'..='\u{16A6F}'; +pub const TANGSA: std::ops::RangeInclusive = '\u{16A70}'..='\u{16ACF}'; +pub const BASSA_VAH: std::ops::RangeInclusive = '\u{16AD0}'..='\u{16AFF}'; +pub const PAHAWH_HMONG: std::ops::RangeInclusive = '\u{16B00}'..='\u{16B8F}'; +pub const MEDEFAIDRIN: std::ops::RangeInclusive = '\u{16E40}'..='\u{16E9F}'; +pub const MIAO: std::ops::RangeInclusive = '\u{16F00}'..='\u{16F9F}'; +pub const IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION: std::ops::RangeInclusive = '\u{16FE0}'..='\u{16FFF}'; +pub const TANGUT: std::ops::RangeInclusive = '\u{17000}'..='\u{187FF}'; +pub const TANGUT_COMPONENTS: std::ops::RangeInclusive = '\u{18800}'..='\u{18AFF}'; +pub const KHITAN_SMALL_SCRIPT: std::ops::RangeInclusive = '\u{18B00}'..='\u{18CFF}'; +pub const TANGUT_SUPPLEMENT: std::ops::RangeInclusive = '\u{18D00}'..='\u{18D7F}'; +pub const KANA_EXTENDED_B: std::ops::RangeInclusive = '\u{1AFF0}'..='\u{1AFFF}'; +pub const KANA_SUPPLEMENT: std::ops::RangeInclusive = '\u{1B000}'..='\u{1B0FF}'; +pub const KANA_EXTENDED_A: std::ops::RangeInclusive = '\u{1B100}'..='\u{1B12F}'; +pub const SMALL_KANA_EXTENSION: std::ops::RangeInclusive = '\u{1B130}'..='\u{1B16F}'; +pub const NUSHU: std::ops::RangeInclusive = '\u{1B170}'..='\u{1B2FF}'; +pub const DUPLOYAN: std::ops::RangeInclusive = '\u{1BC00}'..='\u{1BC9F}'; +pub const SHORTHAND_FORMAT_CONTROLS: std::ops::RangeInclusive = '\u{1BCA0}'..='\u{1BCAF}'; +pub const ZNAMENNY_MUSICAL_NOTATION: std::ops::RangeInclusive = '\u{1CF00}'..='\u{1CFCF}'; +pub const BYZANTINE_MUSICAL_SYMBOLS: std::ops::RangeInclusive = '\u{1D000}'..='\u{1D0FF}'; +pub const MUSICAL_SYMBOLS: std::ops::RangeInclusive = '\u{1D100}'..='\u{1D1FF}'; +pub const ANCIENT_GREEK_MUSICAL_NOTATION: std::ops::RangeInclusive = '\u{1D200}'..='\u{1D24F}'; +pub const KAKTOVIK_NUMERALS: std::ops::RangeInclusive = '\u{1D2C0}'..='\u{1D2DF}'; +pub const MAYAN_NUMERALS: std::ops::RangeInclusive = '\u{1D2E0}'..='\u{1D2FF}'; +pub const TAI_XUAN_JING_SYMBOLS: std::ops::RangeInclusive = '\u{1D300}'..='\u{1D35F}'; +pub const COUNTING_ROD_NUMERALS: std::ops::RangeInclusive = '\u{1D360}'..='\u{1D37F}'; +pub const MATHEMATICAL_ALPHANUMERIC_SYMBOLS: std::ops::RangeInclusive = '\u{1D400}'..='\u{1D7FF}'; +pub const SUTTON_SIGNWRITING: std::ops::RangeInclusive = '\u{1D800}'..='\u{1DAAF}'; +pub const LATIN_EXTENDED_G: std::ops::RangeInclusive = '\u{1DF00}'..='\u{1DFFF}'; +pub const GLAGOLITIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{1E000}'..='\u{1E02F}'; +pub const CYRILLIC_EXTENDED_D: std::ops::RangeInclusive = '\u{1E030}'..='\u{1E08F}'; +pub const NYIAKENG_PUACHUE_HMONG: std::ops::RangeInclusive = '\u{1E100}'..='\u{1E14F}'; +pub const TOTO: std::ops::RangeInclusive = '\u{1E290}'..='\u{1E2BF}'; +pub const WANCHO: std::ops::RangeInclusive = '\u{1E2C0}'..='\u{1E2FF}'; +pub const NAG_MUNDARI: std::ops::RangeInclusive = '\u{1E4D0}'..='\u{1E4FF}'; +pub const ETHIOPIC_EXTENDED_B: std::ops::RangeInclusive = '\u{1E7E0}'..='\u{1E7FF}'; +pub const MENDE_KIKAKUI: std::ops::RangeInclusive = '\u{1E800}'..='\u{1E8DF}'; +pub const ADLAM: std::ops::RangeInclusive = '\u{1E900}'..='\u{1E95F}'; +pub const INDIC_SIYAQ_NUMBERS: std::ops::RangeInclusive = '\u{1EC70}'..='\u{1ECBF}'; +pub const OTTOMAN_SIYAQ_NUMBERS: std::ops::RangeInclusive = '\u{1ED00}'..='\u{1ED4F}'; +pub const ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS: std::ops::RangeInclusive = '\u{1EE00}'..='\u{1EEFF}'; +pub const MAHJONG_TILES: std::ops::RangeInclusive = '\u{1F000}'..='\u{1F02F}'; +pub const DOMINO_TILES: std::ops::RangeInclusive = '\u{1F030}'..='\u{1F09F}'; +pub const PLAYING_CARDS: std::ops::RangeInclusive = '\u{1F0A0}'..='\u{1F0FF}'; +pub const ENCLOSED_ALPHANUMERIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{1F100}'..='\u{1F1FF}'; +pub const ENCLOSED_IDEOGRAPHIC_SUPPLEMENT: std::ops::RangeInclusive = '\u{1F200}'..='\u{1F2FF}'; +pub const MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: std::ops::RangeInclusive = '\u{1F300}'..='\u{1F5FF}'; +pub const EMOTICONS: std::ops::RangeInclusive = '\u{1F600}'..='\u{1F64F}'; +pub const ORNAMENTAL_DINGBATS: std::ops::RangeInclusive = '\u{1F650}'..='\u{1F67F}'; +pub const TRANSPORT_AND_MAP_SYMBOLS: std::ops::RangeInclusive = '\u{1F680}'..='\u{1F6FF}'; +pub const ALCHEMICAL_SYMBOLS: std::ops::RangeInclusive = '\u{1F700}'..='\u{1F77F}'; +pub const GEOMETRIC_SHAPES_EXTENDED: std::ops::RangeInclusive = '\u{1F780}'..='\u{1F7FF}'; +pub const SUPPLEMENTAL_ARROWS_C: std::ops::RangeInclusive = '\u{1F800}'..='\u{1F8FF}'; +pub const SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS: std::ops::RangeInclusive = '\u{1F900}'..='\u{1F9FF}'; +pub const CHESS_SYMBOLS: std::ops::RangeInclusive = '\u{1FA00}'..='\u{1FA6F}'; +pub const SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A: std::ops::RangeInclusive = '\u{1FA70}'..='\u{1FAFF}'; +pub const SYMBOLS_FOR_LEGACY_COMPUTING: std::ops::RangeInclusive = '\u{1FB00}'..='\u{1FBFF}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B: std::ops::RangeInclusive = '\u{20000}'..='\u{2A6DF}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C: std::ops::RangeInclusive = '\u{2A700}'..='\u{2B73F}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D: std::ops::RangeInclusive = '\u{2B740}'..='\u{2B81F}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E: std::ops::RangeInclusive = '\u{2B820}'..='\u{2CEAF}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F: std::ops::RangeInclusive = '\u{2CEB0}'..='\u{2EBEF}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I: std::ops::RangeInclusive = '\u{2EBF0}'..='\u{2EE5F}'; +pub const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT: std::ops::RangeInclusive = '\u{2F800}'..='\u{2FA1F}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G: std::ops::RangeInclusive = '\u{30000}'..='\u{3134F}'; +pub const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H: std::ops::RangeInclusive = '\u{31350}'..='\u{323AF}'; +pub const TAGS: std::ops::RangeInclusive = '\u{E0000}'..='\u{E007F}'; +pub const VARIATION_SELECTORS_SUPPLEMENT: std::ops::RangeInclusive = '\u{E0100}'..='\u{E01EF}'; +pub const SUPPLEMENTARY_PRIVATE_USE_AREA_A: std::ops::RangeInclusive = '\u{F0000}'..='\u{FFFFF}'; +pub const SUPPLEMENTARY_PRIVATE_USE_AREA_B: std::ops::RangeInclusive = '\u{100000}'..='\u{10FFFF}'; +pub static UNICODE_BLOCKS: phf::Map<&'static str, std::ops::RangeInclusive> = phf::phf_map! { + "Basic Latin" => BASIC_LATIN, + "Latin-1 Supplement" => LATIN_1_SUPPLEMENT, + "Latin Extended-A" => LATIN_EXTENDED_A, + "Latin Extended-B" => LATIN_EXTENDED_B, + "IPA Extensions" => IPA_EXTENSIONS, + "Spacing Modifier Letters" => SPACING_MODIFIER_LETTERS, + "Combining Diacritical Marks" => COMBINING_DIACRITICAL_MARKS, + "Greek and Coptic" => GREEK_AND_COPTIC, + "Cyrillic" => CYRILLIC, + "Cyrillic Supplement" => CYRILLIC_SUPPLEMENT, + "Armenian" => ARMENIAN, + "Hebrew" => HEBREW, + "Arabic" => ARABIC, + "Syriac" => SYRIAC, + "Arabic Supplement" => ARABIC_SUPPLEMENT, + "Thaana" => THAANA, + "NKo" => NKO, + "Samaritan" => SAMARITAN, + "Mandaic" => MANDAIC, + "Syriac Supplement" => SYRIAC_SUPPLEMENT, + "Arabic Extended-B" => ARABIC_EXTENDED_B, + "Arabic Extended-A" => ARABIC_EXTENDED_A, + "Devanagari" => DEVANAGARI, + "Bengali" => BENGALI, + "Gurmukhi" => GURMUKHI, + "Gujarati" => GUJARATI, + "Oriya" => ORIYA, + "Tamil" => TAMIL, + "Telugu" => TELUGU, + "Kannada" => KANNADA, + "Malayalam" => MALAYALAM, + "Sinhala" => SINHALA, + "Thai" => THAI, + "Lao" => LAO, + "Tibetan" => TIBETAN, + "Myanmar" => MYANMAR, + "Georgian" => GEORGIAN, + "Hangul Jamo" => HANGUL_JAMO, + "Ethiopic" => ETHIOPIC, + "Ethiopic Supplement" => ETHIOPIC_SUPPLEMENT, + "Cherokee" => CHEROKEE, + "Unified Canadian Aboriginal Syllabics" => UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, + "Ogham" => OGHAM, + "Runic" => RUNIC, + "Tagalog" => TAGALOG, + "Hanunoo" => HANUNOO, + "Buhid" => BUHID, + "Tagbanwa" => TAGBANWA, + "Khmer" => KHMER, + "Mongolian" => MONGOLIAN, + "Unified Canadian Aboriginal Syllabics Extended" => UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, + "Limbu" => LIMBU, + "Tai Le" => TAI_LE, + "New Tai Lue" => NEW_TAI_LUE, + "Khmer Symbols" => KHMER_SYMBOLS, + "Buginese" => BUGINESE, + "Tai Tham" => TAI_THAM, + "Combining Diacritical Marks Extended" => COMBINING_DIACRITICAL_MARKS_EXTENDED, + "Balinese" => BALINESE, + "Sundanese" => SUNDANESE, + "Batak" => BATAK, + "Lepcha" => LEPCHA, + "Ol Chiki" => OL_CHIKI, + "Cyrillic Extended-C" => CYRILLIC_EXTENDED_C, + "Georgian Extended" => GEORGIAN_EXTENDED, + "Sundanese Supplement" => SUNDANESE_SUPPLEMENT, + "Vedic Extensions" => VEDIC_EXTENSIONS, + "Phonetic Extensions" => PHONETIC_EXTENSIONS, + "Phonetic Extensions Supplement" => PHONETIC_EXTENSIONS_SUPPLEMENT, + "Combining Diacritical Marks Supplement" => COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, + "Latin Extended Additional" => LATIN_EXTENDED_ADDITIONAL, + "Greek Extended" => GREEK_EXTENDED, + "General Punctuation" => GENERAL_PUNCTUATION, + "Superscripts and Subscripts" => SUPERSCRIPTS_AND_SUBSCRIPTS, + "Currency Symbols" => CURRENCY_SYMBOLS, + "Combining Diacritical Marks for Symbols" => COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, + "Letterlike Symbols" => LETTERLIKE_SYMBOLS, + "Number Forms" => NUMBER_FORMS, + "Arrows" => ARROWS, + "Mathematical Operators" => MATHEMATICAL_OPERATORS, + "Miscellaneous Technical" => MISCELLANEOUS_TECHNICAL, + "Control Pictures" => CONTROL_PICTURES, + "Optical Character Recognition" => OPTICAL_CHARACTER_RECOGNITION, + "Enclosed Alphanumerics" => ENCLOSED_ALPHANUMERICS, + "Box Drawing" => BOX_DRAWING, + "Block Elements" => BLOCK_ELEMENTS, + "Geometric Shapes" => GEOMETRIC_SHAPES, + "Miscellaneous Symbols" => MISCELLANEOUS_SYMBOLS, + "Dingbats" => DINGBATS, + "Miscellaneous Mathematical Symbols-A" => MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, + "Supplemental Arrows-A" => SUPPLEMENTAL_ARROWS_A, + "Braille Patterns" => BRAILLE_PATTERNS, + "Supplemental Arrows-B" => SUPPLEMENTAL_ARROWS_B, + "Miscellaneous Mathematical Symbols-B" => MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, + "Supplemental Mathematical Operators" => SUPPLEMENTAL_MATHEMATICAL_OPERATORS, + "Miscellaneous Symbols and Arrows" => MISCELLANEOUS_SYMBOLS_AND_ARROWS, + "Glagolitic" => GLAGOLITIC, + "Latin Extended-C" => LATIN_EXTENDED_C, + "Coptic" => COPTIC, + "Georgian Supplement" => GEORGIAN_SUPPLEMENT, + "Tifinagh" => TIFINAGH, + "Ethiopic Extended" => ETHIOPIC_EXTENDED, + "Cyrillic Extended-A" => CYRILLIC_EXTENDED_A, + "Supplemental Punctuation" => SUPPLEMENTAL_PUNCTUATION, + "CJK Radicals Supplement" => CJK_RADICALS_SUPPLEMENT, + "Kangxi Radicals" => KANGXI_RADICALS, + "Ideographic Description Characters" => IDEOGRAPHIC_DESCRIPTION_CHARACTERS, + "CJK Symbols and Punctuation" => CJK_SYMBOLS_AND_PUNCTUATION, + "Hiragana" => HIRAGANA, + "Katakana" => KATAKANA, + "Bopomofo" => BOPOMOFO, + "Hangul Compatibility Jamo" => HANGUL_COMPATIBILITY_JAMO, + "Kanbun" => KANBUN, + "Bopomofo Extended" => BOPOMOFO_EXTENDED, + "CJK Strokes" => CJK_STROKES, + "Katakana Phonetic Extensions" => KATAKANA_PHONETIC_EXTENSIONS, + "Enclosed CJK Letters and Months" => ENCLOSED_CJK_LETTERS_AND_MONTHS, + "CJK Compatibility" => CJK_COMPATIBILITY, + "CJK Unified Ideographs Extension A" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, + "Yijing Hexagram Symbols" => YIJING_HEXAGRAM_SYMBOLS, + "CJK Unified Ideographs" => CJK_UNIFIED_IDEOGRAPHS, + "Yi Syllables" => YI_SYLLABLES, + "Yi Radicals" => YI_RADICALS, + "Lisu" => LISU, + "Vai" => VAI, + "Cyrillic Extended-B" => CYRILLIC_EXTENDED_B, + "Bamum" => BAMUM, + "Modifier Tone Letters" => MODIFIER_TONE_LETTERS, + "Latin Extended-D" => LATIN_EXTENDED_D, + "Syloti Nagri" => SYLOTI_NAGRI, + "Common Indic Number Forms" => COMMON_INDIC_NUMBER_FORMS, + "Phags-pa" => PHAGS_PA, + "Saurashtra" => SAURASHTRA, + "Devanagari Extended" => DEVANAGARI_EXTENDED, + "Kayah Li" => KAYAH_LI, + "Rejang" => REJANG, + "Hangul Jamo Extended-A" => HANGUL_JAMO_EXTENDED_A, + "Javanese" => JAVANESE, + "Myanmar Extended-B" => MYANMAR_EXTENDED_B, + "Cham" => CHAM, + "Myanmar Extended-A" => MYANMAR_EXTENDED_A, + "Tai Viet" => TAI_VIET, + "Meetei Mayek Extensions" => MEETEI_MAYEK_EXTENSIONS, + "Ethiopic Extended-A" => ETHIOPIC_EXTENDED_A, + "Latin Extended-E" => LATIN_EXTENDED_E, + "Cherokee Supplement" => CHEROKEE_SUPPLEMENT, + "Meetei Mayek" => MEETEI_MAYEK, + "Hangul Syllables" => HANGUL_SYLLABLES, + "Hangul Jamo Extended-B" => HANGUL_JAMO_EXTENDED_B, + "Private Use Area" => PRIVATE_USE_AREA, + "CJK Compatibility Ideographs" => CJK_COMPATIBILITY_IDEOGRAPHS, + "Alphabetic Presentation Forms" => ALPHABETIC_PRESENTATION_FORMS, + "Arabic Presentation Forms-A" => ARABIC_PRESENTATION_FORMS_A, + "Variation Selectors" => VARIATION_SELECTORS, + "Vertical Forms" => VERTICAL_FORMS, + "Combining Half Marks" => COMBINING_HALF_MARKS, + "CJK Compatibility Forms" => CJK_COMPATIBILITY_FORMS, + "Small Form Variants" => SMALL_FORM_VARIANTS, + "Arabic Presentation Forms-B" => ARABIC_PRESENTATION_FORMS_B, + "Halfwidth and Fullwidth Forms" => HALFWIDTH_AND_FULLWIDTH_FORMS, + "Specials" => SPECIALS, + "Linear B Syllabary" => LINEAR_B_SYLLABARY, + "Linear B Ideograms" => LINEAR_B_IDEOGRAMS, + "Aegean Numbers" => AEGEAN_NUMBERS, + "Ancient Greek Numbers" => ANCIENT_GREEK_NUMBERS, + "Ancient Symbols" => ANCIENT_SYMBOLS, + "Phaistos Disc" => PHAISTOS_DISC, + "Lycian" => LYCIAN, + "Carian" => CARIAN, + "Coptic Epact Numbers" => COPTIC_EPACT_NUMBERS, + "Old Italic" => OLD_ITALIC, + "Gothic" => GOTHIC, + "Old Permic" => OLD_PERMIC, + "Ugaritic" => UGARITIC, + "Old Persian" => OLD_PERSIAN, + "Deseret" => DESERET, + "Shavian" => SHAVIAN, + "Osmanya" => OSMANYA, + "Osage" => OSAGE, + "Elbasan" => ELBASAN, + "Caucasian Albanian" => CAUCASIAN_ALBANIAN, + "Vithkuqi" => VITHKUQI, + "Linear A" => LINEAR_A, + "Latin Extended-F" => LATIN_EXTENDED_F, + "Cypriot Syllabary" => CYPRIOT_SYLLABARY, + "Imperial Aramaic" => IMPERIAL_ARAMAIC, + "Palmyrene" => PALMYRENE, + "Nabataean" => NABATAEAN, + "Hatran" => HATRAN, + "Phoenician" => PHOENICIAN, + "Lydian" => LYDIAN, + "Meroitic Hieroglyphs" => MEROITIC_HIEROGLYPHS, + "Meroitic Cursive" => MEROITIC_CURSIVE, + "Kharoshthi" => KHAROSHTHI, + "Old South Arabian" => OLD_SOUTH_ARABIAN, + "Old North Arabian" => OLD_NORTH_ARABIAN, + "Manichaean" => MANICHAEAN, + "Avestan" => AVESTAN, + "Inscriptional Parthian" => INSCRIPTIONAL_PARTHIAN, + "Inscriptional Pahlavi" => INSCRIPTIONAL_PAHLAVI, + "Psalter Pahlavi" => PSALTER_PAHLAVI, + "Old Turkic" => OLD_TURKIC, + "Old Hungarian" => OLD_HUNGARIAN, + "Hanifi Rohingya" => HANIFI_ROHINGYA, + "Rumi Numeral Symbols" => RUMI_NUMERAL_SYMBOLS, + "Yezidi" => YEZIDI, + "Arabic Extended-C" => ARABIC_EXTENDED_C, + "Old Sogdian" => OLD_SOGDIAN, + "Sogdian" => SOGDIAN, + "Old Uyghur" => OLD_UYGHUR, + "Chorasmian" => CHORASMIAN, + "Elymaic" => ELYMAIC, + "Brahmi" => BRAHMI, + "Kaithi" => KAITHI, + "Sora Sompeng" => SORA_SOMPENG, + "Chakma" => CHAKMA, + "Mahajani" => MAHAJANI, + "Sharada" => SHARADA, + "Sinhala Archaic Numbers" => SINHALA_ARCHAIC_NUMBERS, + "Khojki" => KHOJKI, + "Multani" => MULTANI, + "Khudawadi" => KHUDAWADI, + "Grantha" => GRANTHA, + "Newa" => NEWA, + "Tirhuta" => TIRHUTA, + "Siddham" => SIDDHAM, + "Modi" => MODI, + "Mongolian Supplement" => MONGOLIAN_SUPPLEMENT, + "Takri" => TAKRI, + "Ahom" => AHOM, + "Dogra" => DOGRA, + "Warang Citi" => WARANG_CITI, + "Dives Akuru" => DIVES_AKURU, + "Nandinagari" => NANDINAGARI, + "Zanabazar Square" => ZANABAZAR_SQUARE, + "Soyombo" => SOYOMBO, + "Unified Canadian Aboriginal Syllabics Extended-A" => UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, + "Pau Cin Hau" => PAU_CIN_HAU, + "Devanagari Extended-A" => DEVANAGARI_EXTENDED_A, + "Bhaiksuki" => BHAIKSUKI, + "Marchen" => MARCHEN, + "Masaram Gondi" => MASARAM_GONDI, + "Gunjala Gondi" => GUNJALA_GONDI, + "Makasar" => MAKASAR, + "Kawi" => KAWI, + "Lisu Supplement" => LISU_SUPPLEMENT, + "Tamil Supplement" => TAMIL_SUPPLEMENT, + "Cuneiform" => CUNEIFORM, + "Cuneiform Numbers and Punctuation" => CUNEIFORM_NUMBERS_AND_PUNCTUATION, + "Early Dynastic Cuneiform" => EARLY_DYNASTIC_CUNEIFORM, + "Cypro-Minoan" => CYPRO_MINOAN, + "Egyptian Hieroglyphs" => EGYPTIAN_HIEROGLYPHS, + "Egyptian Hieroglyph Format Controls" => EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, + "Anatolian Hieroglyphs" => ANATOLIAN_HIEROGLYPHS, + "Bamum Supplement" => BAMUM_SUPPLEMENT, + "Mro" => MRO, + "Tangsa" => TANGSA, + "Bassa Vah" => BASSA_VAH, + "Pahawh Hmong" => PAHAWH_HMONG, + "Medefaidrin" => MEDEFAIDRIN, + "Miao" => MIAO, + "Ideographic Symbols and Punctuation" => IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, + "Tangut" => TANGUT, + "Tangut Components" => TANGUT_COMPONENTS, + "Khitan Small Script" => KHITAN_SMALL_SCRIPT, + "Tangut Supplement" => TANGUT_SUPPLEMENT, + "Kana Extended-B" => KANA_EXTENDED_B, + "Kana Supplement" => KANA_SUPPLEMENT, + "Kana Extended-A" => KANA_EXTENDED_A, + "Small Kana Extension" => SMALL_KANA_EXTENSION, + "Nushu" => NUSHU, + "Duployan" => DUPLOYAN, + "Shorthand Format Controls" => SHORTHAND_FORMAT_CONTROLS, + "Znamenny Musical Notation" => ZNAMENNY_MUSICAL_NOTATION, + "Byzantine Musical Symbols" => BYZANTINE_MUSICAL_SYMBOLS, + "Musical Symbols" => MUSICAL_SYMBOLS, + "Ancient Greek Musical Notation" => ANCIENT_GREEK_MUSICAL_NOTATION, + "Kaktovik Numerals" => KAKTOVIK_NUMERALS, + "Mayan Numerals" => MAYAN_NUMERALS, + "Tai Xuan Jing Symbols" => TAI_XUAN_JING_SYMBOLS, + "Counting Rod Numerals" => COUNTING_ROD_NUMERALS, + "Mathematical Alphanumeric Symbols" => MATHEMATICAL_ALPHANUMERIC_SYMBOLS, + "Sutton SignWriting" => SUTTON_SIGNWRITING, + "Latin Extended-G" => LATIN_EXTENDED_G, + "Glagolitic Supplement" => GLAGOLITIC_SUPPLEMENT, + "Cyrillic Extended-D" => CYRILLIC_EXTENDED_D, + "Nyiakeng Puachue Hmong" => NYIAKENG_PUACHUE_HMONG, + "Toto" => TOTO, + "Wancho" => WANCHO, + "Nag Mundari" => NAG_MUNDARI, + "Ethiopic Extended-B" => ETHIOPIC_EXTENDED_B, + "Mende Kikakui" => MENDE_KIKAKUI, + "Adlam" => ADLAM, + "Indic Siyaq Numbers" => INDIC_SIYAQ_NUMBERS, + "Ottoman Siyaq Numbers" => OTTOMAN_SIYAQ_NUMBERS, + "Arabic Mathematical Alphabetic Symbols" => ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, + "Mahjong Tiles" => MAHJONG_TILES, + "Domino Tiles" => DOMINO_TILES, + "Playing Cards" => PLAYING_CARDS, + "Enclosed Alphanumeric Supplement" => ENCLOSED_ALPHANUMERIC_SUPPLEMENT, + "Enclosed Ideographic Supplement" => ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, + "Miscellaneous Symbols and Pictographs" => MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, + "Emoticons" => EMOTICONS, + "Ornamental Dingbats" => ORNAMENTAL_DINGBATS, + "Transport and Map Symbols" => TRANSPORT_AND_MAP_SYMBOLS, + "Alchemical Symbols" => ALCHEMICAL_SYMBOLS, + "Geometric Shapes Extended" => GEOMETRIC_SHAPES_EXTENDED, + "Supplemental Arrows-C" => SUPPLEMENTAL_ARROWS_C, + "Supplemental Symbols and Pictographs" => SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, + "Chess Symbols" => CHESS_SYMBOLS, + "Symbols and Pictographs Extended-A" => SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, + "Symbols for Legacy Computing" => SYMBOLS_FOR_LEGACY_COMPUTING, + "CJK Unified Ideographs Extension B" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, + "CJK Unified Ideographs Extension C" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, + "CJK Unified Ideographs Extension D" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, + "CJK Unified Ideographs Extension E" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, + "CJK Unified Ideographs Extension F" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, + "CJK Unified Ideographs Extension I" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, + "CJK Compatibility Ideographs Supplement" => CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, + "CJK Unified Ideographs Extension G" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, + "CJK Unified Ideographs Extension H" => CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, + "Tags" => TAGS, + "Variation Selectors Supplement" => VARIATION_SELECTORS_SUPPLEMENT, + "Supplementary Private Use Area-A" => SUPPLEMENTARY_PRIVATE_USE_AREA_A, + "Supplementary Private Use Area-B" => SUPPLEMENTARY_PRIVATE_USE_AREA_B, +};