diff --git a/data/ca-ba/g2p/model.crf b/data/ca-ba/g2p/model.crf
new file mode 100644
index 0000000..5de981e
Binary files /dev/null and b/data/ca-ba/g2p/model.crf differ
diff --git a/data/ca-ba/language.yml b/data/ca-ba/language.yml
new file mode 100644
index 0000000..39f4528
--- /dev/null
+++ b/data/ca-ba/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Balear Catalan"
+  code: "ca-ba"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-ba/lexicon.db b/data/ca-ba/lexicon.db
new file mode 100644
index 0000000..c961947
Binary files /dev/null and b/data/ca-ba/lexicon.db differ
diff --git a/data/ca-ba/phonemes.txt b/data/ca-ba/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-ba/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/data/ca-ce/g2p/model.crf b/data/ca-ce/g2p/model.crf
new file mode 100644
index 0000000..1e8f7a8
Binary files /dev/null and b/data/ca-ce/g2p/model.crf differ
diff --git a/data/ca-ce/language.yml b/data/ca-ce/language.yml
new file mode 100644
index 0000000..4da8506
--- /dev/null
+++ b/data/ca-ce/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Central Catalan"
+  code: "ca-ce"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-ce/lexicon.db b/data/ca-ce/lexicon.db
new file mode 100644
index 0000000..f3fd9df
Binary files /dev/null and b/data/ca-ce/lexicon.db differ
diff --git a/data/ca-ce/phonemes.txt b/data/ca-ce/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-ce/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/data/ca-no/g2p/model.crf b/data/ca-no/g2p/model.crf
new file mode 100644
index 0000000..f7cdbdb
Binary files /dev/null and b/data/ca-no/g2p/model.crf differ
diff --git a/data/ca-no/language.yml b/data/ca-no/language.yml
new file mode 100644
index 0000000..413ee3b
--- /dev/null
+++ b/data/ca-no/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Nord-Occidental Catalan"
+  code: "ca-no"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-no/lexicon.db b/data/ca-no/lexicon.db
new file mode 100644
index 0000000..6cb9e0e
Binary files /dev/null and b/data/ca-no/lexicon.db differ
diff --git a/data/ca-no/phonemes.txt b/data/ca-no/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-no/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/data/ca-va/g2p/model.crf b/data/ca-va/g2p/model.crf
new file mode 100644
index 0000000..417e24c
Binary files /dev/null and b/data/ca-va/g2p/model.crf differ
diff --git a/data/ca-va/language.yml b/data/ca-va/language.yml
new file mode 100644
index 0000000..1d2074b
--- /dev/null
+++ b/data/ca-va/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Valencià Catalan"
+  code: "ca-va"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-va/lexicon.db b/data/ca-va/lexicon.db
new file mode 100644
index 0000000..5b6518d
Binary files /dev/null and b/data/ca-va/lexicon.db differ
diff --git a/data/ca-va/phonemes.txt b/data/ca-va/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-va/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/gruut-lang-ca/LANGUAGE b/gruut-lang-ca/LANGUAGE
new file mode 100644
index 0000000..d9e4b40
--- /dev/null
+++ b/gruut-lang-ca/LANGUAGE
@@ -0,0 +1 @@
+ca-ce Catalan
diff --git a/gruut-lang-ca/README.md b/gruut-lang-ca/README.md
new file mode 100644
index 0000000..6266c01
--- /dev/null
+++ b/gruut-lang-ca/README.md
@@ -0,0 +1,3 @@
+# gruut Catalan
+
+Language-specific files for Catalan (ca) in [gruut](https://github.com/rhasspy/gruut)
diff --git a/gruut-lang-ca/gruut_lang_ca/VERSION b/gruut-lang-ca/gruut_lang_ca/VERSION
new file mode 100644
index 0000000..77d6f4c
--- /dev/null
+++ b/gruut-lang-ca/gruut_lang_ca/VERSION
@@ -0,0 +1 @@
+0.0.0
diff --git a/gruut-lang-ca/gruut_lang_ca/__init__.py b/gruut-lang-ca/gruut_lang_ca/__init__.py
new file mode 100644
index 0000000..bed0ab8
--- /dev/null
+++ b/gruut-lang-ca/gruut_lang_ca/__init__.py
@@ -0,0 +1,22 @@
+"""Catalan language resources"""
+import os
+import typing
+from pathlib import Path
+
+try:
+    import importlib.resources
+
+    files = importlib.resources.files
+except (ImportError, AttributeError):
+    # Backport for Python < 3.9
+    import importlib_resources  # type: ignore
+
+    files = importlib_resources.files
+
+_PACKAGE = "gruut_lang_ca"
+_DIR = Path(typing.cast(os.PathLike, files(_PACKAGE)))
+
+
+def get_lang_dir() -> Path:
+    """Get directory with language resources"""
+    return _DIR
diff --git a/gruut-lang-ca/gruut_lang_ca/g2p/model.crf b/gruut-lang-ca/gruut_lang_ca/g2p/model.crf
new file mode 100644
index 0000000..1e8f7a8
Binary files /dev/null and b/gruut-lang-ca/gruut_lang_ca/g2p/model.crf differ
diff --git a/gruut-lang-ca/gruut_lang_ca/lexicon.db b/gruut-lang-ca/gruut_lang_ca/lexicon.db
new file mode 100644
index 0000000..f3fd9df
Binary files /dev/null and b/gruut-lang-ca/gruut_lang_ca/lexicon.db differ
diff --git a/gruut-lang-ca/gruut_lang_ca/phonemes.txt b/gruut-lang-ca/gruut_lang_ca/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/gruut-lang-ca/gruut_lang_ca/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/gruut-lang-ca/setup.py b/gruut-lang-ca/setup.py
new file mode 100644
index 0000000..ce5814b
--- /dev/null
+++ b/gruut-lang-ca/setup.py
@@ -0,0 +1,58 @@
+"""Setup file for gruut_lang_ca"""
+from pathlib import Path
+
+import setuptools
+
+module_name = "gruut_lang_ca"
+
+this_dir = Path(__file__).parent
+module_dir = this_dir / module_name
+
+# -----------------------------------------------------------------------------
+
+# Load README in as long description
+long_description: str = ""
+readme_path = this_dir / "README.md"
+if readme_path.is_file():
+    long_description = readme_path.read_text(encoding="utf-8")
+
+version_path = module_dir / "VERSION"
+with open(version_path, "r", encoding="utf-8") as version_file:
+    version = version_file.read().strip()
+
+
+# Extra package data files
+extra_files = []
+maybe_extra_files = ["pos/model.crf", "pos/postagger.model"]
+for maybe_extra_str in maybe_extra_files:
+    extra_path = module_dir / maybe_extra_str
+    if extra_path.is_file():
+        extra_files.append(maybe_extra_str)
+
+# -----------------------------------------------------------------------------
+
+setuptools.setup(
+    name=module_name,
+    description="Catalan language files for gruut tokenizer/phonemizer",
+    version=version,
+    author="Michael Hansen",
+    author_email="mike@rhasspy.org",
+    url="https://github.com/rhasspy/gruut",
+    packages=setuptools.find_packages(),
+    package_data={
+        module_name: [
+            "VERSION",
+            "lexicon.db",
+            "g2p/model.crf",
+            #"espeak/lexicon.db",
+            #"espeak/g2p/model.crf",
+        ]
+        + extra_files
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+    ],
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+)
diff --git a/gruut/__main__.py b/gruut/__main__.py
index e78d16b..5d907b5 100755
--- a/gruut/__main__.py
+++ b/gruut/__main__.py
@@ -64,7 +64,6 @@ def main():
         args.model_prefix = "espeak"
 
     # -------------------------------------------------------------------------
-
     text_processor = TextProcessor(
         default_lang=args.language, model_prefix=args.model_prefix,
     )
@@ -132,7 +131,7 @@ def output_sentences(sentences, writer, text_data=None):
             for sentence in sentences:
                 sentence_dict = dataclasses.asdict(sentence)
                 writer.write(sentence_dict)
-
+        
     for text, text_data in input_text(lines):
         try:
             graph, root = text_processor(
@@ -166,8 +165,9 @@ def output_sentences(sentences, writer, text_data=None):
                     punctuations=(not args.no_punctuation),
                 )
             )
-
+ 
             output_sentences(sentences, writer, text_data)
+
         except Exception as e:
             _LOGGER.exception(text)
 
diff --git a/gruut/const.py b/gruut/const.py
index dcc4486..dad195e 100644
--- a/gruut/const.py
+++ b/gruut/const.py
@@ -16,6 +16,11 @@
 # alias -> full language name
 LANG_ALIASES = {
     "ar": "ar",
+    "ca": "ca-ce",
+    "ca-ce": "ca-ce",
+    "ca-ba": "ca-ba",
+    "ca-no": "ca-no",
+    "ca-va": "ca-va",
     "cs": "cs-cz",
     "de": "de-de",
     "en": "en-us",
diff --git a/gruut/lang.py b/gruut/lang.py
index cbd54d2..ccfffaa 100644
--- a/gruut/lang.py
+++ b/gruut/lang.py
@@ -15,7 +15,7 @@
 from gruut.text_processor import InterpretAsFormat, TextProcessorSettings
 from gruut.utils import find_lang_dir, remove_non_word_chars, resolve_lang
 
-_LOGGER = logging.getLogger("gruut")
+_LOGGER = logging.getLogger("gruut.lang")
 
 # -----------------------------------------------------------------------------
 
@@ -115,6 +115,10 @@ def get_settings(
         # Arabic
         return get_ar_settings(lang_dir, **settings_args)
 
+    if lang_only in {"ca-ce", "ca-ba", "ca-no", "ca-va"}:
+        # Catalan
+        return get_ca_settings(lang_dir, **settings_args)
+
     if lang_only == "cs-cz":
         # Czech
         return get_cs_settings(lang_dir, **settings_args)
@@ -828,7 +832,1434 @@ def get_zh_settings(lang_dir=None, **settings_args) -> TextProcessorSettings:
 
 
 # -----------------------------------------------------------------------------
+# Catalan (ca, Catalan)
+# -----------------------------------------------------------------------------
+
+# Pre-Process constants
+# Same for all accents in this version
+VOWEL_CHARS = ['a', 'ä', 'à', 'e', 'ë', 'é', 'è', 'i', 'í', 'ï', 'o', 'ö', 'ó', 'ò', 'u', 'ü', 'ú']
+ACCENTED_VOWEL_CHARS = ['à', 'é', 'è', 'í', 'ó', 'ò', 'ú']
+NUCLITIC_CHARS = ['a', 'à', 'e', 'é', 'è', 'í', 'ï', 'o', 'ó', 'ò', 'ú']
+ACCENT_CHANGES = {
+    "a" : "à",
+    "e" : "é",
+    "i" : "í",
+    "ï" : "í",
+    "o" : "ó",
+    "u" : "ú",
+    "ü" : "ú", 
+}
+INSEPARABLES = [
+    'bh', 'bl', 'br', 'ch', 'cl', 'cr', 'dh', 'dj', 'dr', 'fh', 'fh', 'fl', 'fr', \
+    'gh', 'gl', 'gr', 'gu', 'gü', 'jh', 'kh', 'kl', 'kr', 'lh', 'll', 'mh', \
+    'nh', 'ny', 'ph', 'pl', 'pr', 'qu', 'qü', 'rh', 'sh', 'th', 'th', 'tr', \
+    'vh', 'wh', 'xh', 'xh', 'yh', 'zh',
+]
+VOC_IR = ["cuir", "vair"]
+EINESGRAM = [
+    '-de-', '-en', '-hi', '-ho', '-i', '-i-', '-la', '-les', '-li', '-lo', '-los', '-me', '-ne', '-nos', \
+    '-se', '-te', '-us', '-vos', 'a', 'a-', 'al', 'als', 'amb', 'bi-', 'co', 'de', 'de-', 'del', 'dels', \
+    'el', 'els', 'em', 'en', 'ens', 'es', 'et', 'hi', 'ho', 'i', 'i-', 'la', 'les', 'li', 'lo', 'ma', \
+    'me', 'mon', 'na', 'pel', 'pels', 'per', 'que', 're', 'sa', 'se', 'ses', 'si', 'sos', 'sub', \
+    'ta', 'te', 'tes', 'ton', 'un', 'uns', 'us',
+]
+EXCEP_ACC = {
+    'antropologico': 'antropològico', 'arterio': 'artèrio', 'artistico': 'artístico', 'basquet': 'bàsquet', 'cardio': 'càrdio', \
+    'catolico': 'catòlico', 'cientifico': 'científico', 'circum': 'círcum', 'civico': 'cívico', 'democrata': 'demòcrata', \
+    'democratico': 'democràtico', 'dumping': 'dúmping', 'economico': 'econòmico', 'edgar': 'èdgar', 'fenicio': 'fenício', \
+    'filosofico': 'filosòfico', 'fisico': 'físico', 'fisio': 'físio', 'geografico': 'geogràfico', 'hetero': 'hétero', \
+    'higenico': 'higènico', 'higienico': 'higiènico', 'hiper': 'híper', 'historico': 'històrico', 'ibero': 'íbero', \
+    'ideologico': 'ideològico', 'input': 'ínput', 'inter': 'ínter', 'jonatan': 'jònatan', 'juridico': 'jurídico', 'labio': 'làbio', \
+    'linguo': 'línguo', 'literario': 'literàrio', 'logico': 'lògico', 'magico': 'màgico', 'maniaco': 'maníaco', 'marketing': 'màrketing', \
+    'oxido': 'òxido', 'petroleo': 'petròleo', 'politico': 'político', 'quantum': 'quàntum', 'quimico': 'químico', 'quimio': 'químio', \
+    'radio': 'ràdio', 'romanico': 'romànico', 'simbolico': 'simbòlico', 'socio': 'sòcio', 'super': 'súper', 'tecnico': 'tècnico', \
+    'teorico': 'teòrico', 'tragico': 'tràgico', 'traqueo': 'tràqueo',
+} 
+DIFT_DECR = ["au", "ai", "eu", "ei", "ou", "oi", "iu", "àu", "ui"]
+VOC_SOLA = ["a", "e", "i", "o", "u", "ï", "ü"]
+VOC_MES_S = ["as", "es", "is", "os", "us", "às", "ès"]
+EN_IN = ["en", "in", "àn"]
+
+# Pre-Process functions and classes
+
+from collections import deque
+
+# TODO review all functions, may need refactor
+# TODO define depending the dialect
+def vocal(carac: str) -> bool:
+    
+    return carac in VOWEL_CHARS
+
+def acaba_en_vocal(prefix: str) -> bool:
+    darrer = prefix[-1]
+    return vocal(darrer)
+
+def post_prefix_ok(resta: str) -> bool:
+
+    mida = len(resta)
+    primer = resta[0]
+    segon = '\0'
+    if mida > 1:
+        segon = resta[1]
+
+    if primer in "iu":
+        return True
+    elif primer in "rs":
+        if mida > 1 and vocal(segon):
+            return True
+    return False
+
+def nuclitica(carac: str) -> bool:
+    return carac in NUCLITIC_CHARS
+
+def gicf_suf(mot: str, pos: int, mots_voc_ir: typing.List[str]) -> bool:
+        
+    mida = len(mot)
+    
+    if mot[pos:].endswith("isme") and len(mot) - pos == 4:
+        return True
+    elif mot[pos:].endswith("ista") and len(mot) - pos == 4:
+        return True
+    elif mot[pos:].endswith("ismes") and len(mot) - pos == 5:
+        return True
+    elif mot[pos:].endswith("istes") and len(mot) - pos == 5:
+        return True
+
+    i1 = mot.find("ir")
+    if i1 == pos and len(mot) - pos == 2:
+        if mot in mots_voc_ir:
+            return False
+        else:
+            return True
+
+    i1 = mot.find("int")
+    if i1 == pos and len(mot) - pos == 3:
+        return True
+
+    i1 = mot.find("iré")
+    if i1 == pos and len(mot) - pos == 3:
+        return True
+
+    i1 = mot.find("iràs")
+    if i1 == pos and len(mot) - pos == 4:
+        return True
+
+    i1 = mot.find("irà")
+    if i1 == pos and len(mot) - pos == 3:
+        return True
+
+    i1 = mot.find("irem")
+    if i1 == pos and len(mot) - pos == 4:
+        return True
+
+    i1 = mot.find("ireu")
+    if i1 == pos and len(mot) - pos == 4:
+        return True
+
+    i1 = mot.find("iran")
+    if i1 == pos and len(mot) - pos == 4:
+        return True
+
+    i1 = mot.find("iria")
+    if i1 == pos and len(mot) - pos == 4:
+        return True
+
+    i1 = mot.find("iries")
+    if i1 == pos and len(mot) - pos == 5:
+        return True
+
+    i1 = mot.find("iríem")
+    if i1 == pos and len(mot) - pos == 5:
+        return True
+
+    i1 = mot.find("iríeu")
+    if i1 == pos and len(mot) - pos == 5:
+        return True
+
+    i1 = mot.find("irien")
+    if i1 == pos and len(mot) - pos == 5:
+        return True
+
+    return False
+
+
+class Sillaba:
+    
+    def __init__(self, sil: str):
+
+        self.text_ = sil
+        self.tonica_ = False
+        self.grafnuc_ = -1
+        self.fonnuc_ = -1
+        self.fons_ = deque()
+
+    def grafnuc(self, nuc: int):
+        self.grafnuc_ = nuc
+
+    def get_grafnuc(self) -> int:
+        return self.grafnuc_
+
+    def get_text(self) -> str:
+        return self.text_
+
+    def get_text_at_index(self, idx: int) -> str:
+        return self.text_[idx]
+
+    def sizetext(self) -> int:
+        return len(self.text_)
+
+    def tonica(self) -> bool:
+        self.tonica_ = True
+
+    def asktonica(self) -> bool:
+        return self.tonica_
+
+    def es_sil_tonica(self) -> bool:
+        
+        if self.tonica_:
+            return "sí"
+        else:
+            return "no"
+
+    def numfons(self) -> int:
+        return len(self.fons_)
+
+    def allofon(self, fonidx: int) -> str:
+        return self.fons_[fonidx]
+
+    def allofons(self) -> deque:
+        return self.fons_
+
+    def push_back(self, fon: str):
+        self.fons_.append(fon)
+
+    def push_front(self, fon: str):
+        self.fons_.insert(0, fon)
+
+    def pop_front(self):
+        self.fons_.popleft()
+
+    def pop_back(self):
+        self.fons_.pop()
+
+    def empty(self) -> bool:
+        return len(self.fons_) == 0
+
+    def fonnuc(self, fnuc: int):
+        self.fonnuc_ = fnuc
+
+    def get_fonnuc(self) -> int:
+        return self.fonnuc_
+
+
+class Part:
+    
+    def __init__(self, tros: str):
+        self.text_ = tros
+        self.transsil_ = deque() # It will be a deque structure with Sillaba instances as elements
+
+    def push_back(self, sil: Sillaba):
+        self.transsil_.append(sil)
+
+    def pop_back(self):
+        self.transsil_.pop()
+
+    def pop_front(self):
+        self.transsil_.popleft()
+
+    def empty(self) -> bool:
+        return len(self.transsil_) == 0
+
+    def size(self) -> int:
+        return len(self.transsil_)
+
+    def tonica(self, silidx: int) -> bool:
+        # self.transsil_[silidx] is an Sillaba instance, which has the attribute tonica_
+        return self.transsil_[silidx].tonica_ 
+
+    def idxgrafnucli(self, silidx: int) -> int:
+        # self.transsil_[silidx] is an Sillaba instance, which has the attribute grafnuc_
+        return self.transsil_[silidx].grafnuc_
+
+    def grafnucli(self, silidx: int) -> str:
+        # self.transsil_[silidx] is an Sillaba instance, which has an attributes text_ and grafnuc_
+        return self.transsil_[silidx].text_[self.transsil_[silidx].grafnuc_]
+
+    def sil(self, silnum: int) -> Sillaba:
+        return self.transsil_[silnum]
+
+    def sils(self) -> deque:
+        return self.transsil_
+
+    def text(self) -> str:
+        return self.text_
+
+    def textinici(self, silindex: int, charindex: int) -> str:
+
+        # Gives the text of the previous syllables, and from the syllable silindex to charindex not included
+
+        mot = ""
+        for i in range(silindex):
+            mot += self.transsil_[i].text_
+        if charindex:
+            mot += self.transsil_[silindex].text_[:charindex]
+        return mot
+
+    def textfinal(self, silindex: int, charindex: int) -> str:
+
+        # Gives the text starting from the syllable silindex and the character charindex (included) and up to the end of the word
+
+        mot = self.transsil_[silindex].text_[charindex:]
+        for i in range(silindex + 1, len(self.transsil_)):
+            mot += self.transsil_[i].text_
+        return mot
+
+    def textsilini(self, silindex: int, charindex: int) -> str:
+
+        # gives the text of the syllable silindex, from the beginning to the character charindex not included
+        return self.transsil_[silindex].text_[:charindex]
+
+    def textsilfinal(self, silindex: int, charindex: int) -> str:
+
+        # Gives the text of the syllable silindex, from charindex inclusive to the end
+        return self.transsil_[silindex].text_[charindex:]
+
+    def charidxsilini(self, silindex: int) -> int:
+
+        car = self.transsil_[silindex].text_[0]
+        if car == "'" or car == '-':
+            return 1
+        else:
+            return 0
+
+    def charidxsilfinal(self, silindex: int) -> int:
+
+        siltxt = self.transsil_[silindex].text_
+        car = siltxt[-1]
+        if car == "'" or car == '-':
+            return len(siltxt) - 2
+        else:
+            return len(siltxt) - 1
+
+
+class MotNuclis:
+
+    def __init__(self, mot: str, es_adverbi: bool):
+        
+        self.adverbi_ = es_adverbi
+        self.el_mot = mot
+        self.pos_nuclis = []
+
+        self.load_insep()
+
+    def load_insep(self):
+
+        # Set self.insep_ and self.mots_voc_ir_
+        
+        self.insep_ = INSEPARABLES
+        self.mots_voc_ir_ = VOC_IR
+
+    def troba_nuclis_mot(self):
+
+        mida = len(self.el_mot)
+        adjectiu = ""
+
+        if self.adverbi_:
+            adjectiu = self.el_mot[0:mida - 4]
+            self.el_mot = adjectiu
+            mida = len(self.el_mot)
+
+        gr = 0
+        while gr < mida:
+
+            car = self.el_mot[gr]
+
+            if nuclitica(car):
+                self.pos_nuclis.append(gr)
+                gr = gr + 1
+                continue
+
+            elif car == 'i':
+                # gicf o sufix
+                if gicf_suf(self.el_mot, gr, self.mots_voc_ir_):
+                    self.pos_nuclis.append(gr)
+                    gr = gr + 1
+                    continue
+                else:
+                    abans = self.el_mot[0:gr]
+                    premida = len(abans)
+
+                    if (premida == 0) or (premida == 1 and abans == "h"):
+                        # casos iode o hiena, i, hi
+                        if gr == mida - 1:
+                            # i, hi
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        elif vocal(self.el_mot[gr+1]):
+                            # hiena iode
+                            gr = gr + 1
+                            continue
+                        # hissar, ira
+                        else:
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+
+                    elif (premida == 1) and (abans == "u"):
+
+                        if gr == mida - 1 or self.el_mot[gr + 1] == 'x':
+                            gr = gr + 1
+                            continue
+                        else:
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+
+                    elif (premida == 2) and (abans == "hu"):
+                        
+                        if gr == mida - 1:
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+
+                        if self.el_mot[gr + 1] == 'x':
+                            gr = gr + 1
+                            continue
+                        else:
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+
+                    elif self.el_mot[gr - 1] == 'u':
+                        # tres vocals seguides vocal+u+i, la u es consonant i la "i" es nucli
+                        if (premida > 1) and vocal(self.el_mot[gr - 2]):
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        elif (premida > 1) and (self.el_mot[gr - 2] == 'q' or self.el_mot[gr - 2] == 'g'):
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        else:
+                            # ui tot sol
+                            gr = gr + 1
+                            continue
+
+                    elif self.el_mot[gr - 1] == 'ü':
+
+                        if (premida > 1) and (self.el_mot[gr - 2] == 'q' or self.el_mot[gr - 2] == 'g'):
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        else:
+                            # üi no precedit de g,q 
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+
+                    elif vocal(self.el_mot[gr - 1]):
+                        # vocal + i, la i no es nucli
+                        gr = gr + 1
+                        continue
+
+                    else:
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                
+            elif car == 'u':
+
+                abans = self.el_mot[0:gr]
+                premida = len(abans)
+
+                if (premida == 0) or (premida == 1 and abans == "h"):
+                    # casos uadi o hu+vocal, u, hu
+                    if gr == mida - 1:
+                        # u, hu
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                    elif self.el_mot == "ui" or self.el_mot == "uix":
+                        # potser se n'han d'afegir mes
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                    elif (pos := self.el_mot.find("ix")) != -1 and pos == gr + 1:
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                    elif vocal(self.el_mot[gr+1]):
+                        # uadi hu+vocal
+                        gr = gr + 1
+                        continue
+                    else:
+                        # huns, una
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                
+                elif (premida == 1) and (abans == "i"):
+                    self.pos_nuclis.append(gr)
+                    gr = gr + 1
+                    continue
+
+                elif self.el_mot[gr - 1] == 'i':
+                    # tres vocals seguides vocal+i+u, la i es consonant i la "u" es nucli
+                    if premida > 2:
+                        boci = self.el_mot[gr - 3 : gr - 1]
+
+                        if boci == "gu" or boci == "qu":
+                            gr = gr + 1
+                            continue
+
+                        elif vocal(self.el_mot[gr - 2]):
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        
+                        else:
+                            gr = gr + 1
+                            continue
+
+                    elif premida == 2:
+                        if vocal(self.el_mot[gr - 2]):
+                            self.pos_nuclis.append(gr)
+                            gr = gr + 1
+                            continue
+                        else:
+                            gr = gr + 1
+                            continue
+                    else:
+                        gr = gr + 1
+                        continue
+
+                elif self.el_mot[gr - 1] == 'g' or self.el_mot[gr - 1] == 'q':
+                    if gr == mida - 1:
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+
+                    elif vocal(self.el_mot[gr + 1]):
+                        gr = gr + 1
+                        continue
+
+                    else:
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+
+                elif self.el_mot[gr - 1] == 'ü':
+                    if (premida > 1) and (self.el_mot[gr - 2] == 'q' or self.el_mot[gr - 2] == 'g'):
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+                    else:
+                        # üu no precedit de g,q 
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+
+                elif vocal(self.el_mot[gr - 1]):
+                    # vocal + u, la u no es nucli
+                    gr = gr + 1
+                    continue
+
+                else:
+                    # tancara l'else de quan no es sufix
+                    self.pos_nuclis.append(gr)
+                    gr = gr + 1
+                    continue
+            
+            elif car == 'ü':
+
+                pos = 0
+
+                if (pos := self.el_mot.find("argü")) != -1:
+                    if pos + 3 == gr:
+                        self.pos_nuclis.append(gr)
+                        self.pos_nuclis.append(gr + 1)
+                        gr += 1
+                        gr = gr + 1
+                        continue
+                    else:
+                        gr = gr + 1
+                        continue
+                elif gr > 0:
+                    if self.el_mot[gr - 1] == 'g' or self.el_mot[gr - 1] == 'q':
+                        gr = gr + 1
+                        continue
+                    else:
+                        self.pos_nuclis.append(gr)
+                        gr = gr + 1
+                        continue
+
+            else:
+                gr = gr + 1
+                continue
+     
+        if self.adverbi_:
+            self.el_mot += "ment"
+            mida = len(self.el_mot)
+            self.pos_nuclis.append(mida - 3)
+
+    def inseparable(self, tros: str) -> bool:
+        return tros in self.insep_
+    
+    def separa_sillabes(self, vec_sil: typing.List[str], els_nuclis: typing.List[int]) -> typing.Tuple[typing.List[str], typing.List[int]]:
+        
+        fronteres = []
+
+        if len(self.pos_nuclis) == 1:
+
+            vec_sil.append(self.el_mot)
+            els_nuclis.append(self.pos_nuclis[0])
+
+            return vec_sil, els_nuclis
+
+        # Set the fronteres vector
+        for i in range(len(self.pos_nuclis) - 1):
+
+            longi = self.pos_nuclis[i + 1] - self.pos_nuclis[i] - 1
+            tros = self.el_mot[self.pos_nuclis[i] + 1: self.pos_nuclis[i] + 1 + longi]
+
+            # vocals contigues
+            if longi == 0:
+                fronteres.append(self.pos_nuclis[i])
+
+            elif longi == 1:
+                fronteres.append(self.pos_nuclis[i])
+
+            elif longi == 2:
+                if self.inseparable(self.el_mot[self.pos_nuclis[i] + 1: self.pos_nuclis[i] + 1 + 2]):
+                    fronteres.append(self.pos_nuclis[i])
+                elif self.el_mot[self.pos_nuclis[i] + 2] == 'h':
+                    fronteres.append(self.pos_nuclis[i])
+                else:
+                    fronteres.append(self.pos_nuclis[i] + 1)
+
+            elif longi == 3:
+                if self.inseparable(self.el_mot[self.pos_nuclis[i] + 2: self.pos_nuclis[i] + 2 + 2]):
+                    if self.el_mot[self.pos_nuclis[i] + 1] == '-':
+                        fronteres.append(self.pos_nuclis[i])
+                    else:
+                        fronteres.append(self.pos_nuclis[i] + 1)
+                else:
+                    if self.el_mot[self.pos_nuclis[i] + 3] == '-':
+                        fronteres.append(self.pos_nuclis[i] + 1)
+                    else:
+                        fronteres.append(self.pos_nuclis[i] + 2)
+
+            elif longi == 4:
+                pos = 0
+
+                if (pos := tros.find("s")) != -1:
+                    fronteres.append(self.pos_nuclis[i] + pos + 1)
+                else:
+                    fronteres.append(self.pos_nuclis[i] + 2)
+
+            elif longi == 5:
+                fronteres.append(self.pos_nuclis[i] + 3)
+
+            else:
+                _LOGGER.debug(f"No puc separar en sillabes el mot {self.el_mot}, cluster massa gran, de longitud {longi}")
+                exit(1)   
+
+        numsil = len(fronteres)
+        for i in range(numsil):
+            if i == 0:
+                if fronteres[i] != 0:
+                    esta_sil = self.el_mot[0:fronteres[i] + 1]
+                    vec_sil.append(esta_sil)
+                else:
+                    esta_sil = self.el_mot[0]
+                    vec_sil.append(esta_sil)
+            else:
+                esta_sil = self.el_mot[fronteres[i - 1] + 1 : fronteres[i] + 1] 
+                vec_sil.append(esta_sil)
+
+        esta_sil = self.el_mot[fronteres[numsil - 1] + 1:]
+        vec_sil.append(esta_sil)
+
+        els_nuclis.append(self.pos_nuclis[0])
+        longitud = len(vec_sil[0])
+
+        for i in range(1, len(self.pos_nuclis)):
+            this_nucli = self.pos_nuclis[i] - longitud
+            els_nuclis.append(this_nucli)
+            longitud += len(vec_sil[i])
+
+        return vec_sil, els_nuclis
+
+    def empty(self) -> bool:
+        return len(self.pos_nuclis) == 0
+
+    def mot(self) -> str:
+        return self.el_mot
+
+    def nucli(self, i: int) -> typing.Union[int, None]:
+        if 0 <= i < len(self.pos_nuclis):
+            return self.pos_nuclis[i]
+        return None
+
+    def size(self) -> int:
+        return len(self.pos_nuclis)
+
+    def nuclis(self) -> typing.List[int]:
+        return self.pos_nuclis
+   
+
+class Transcripcio:
+
+    def __init__(self, mot: str):
+
+        self.motorig_ = mot
+
+        self.prefixos_ = []
+        self.pref_atons = []
+        self.excepcions_prefs = {}
+        self.excepcions_gen = set()
+        self.einesgram_ = set()
+        self.excep_acc = {}
+        self.trossos_ = []
+        self.transpart_ = []
+        
+        self.carrega_einesgram()
+        self.carrega_exc_accent()
+
+    def carrega_einesgram(self):
+        # Set self.einesgram_
+        self.einesgram_ = EINESGRAM
+  
+    def carrega_exc_accent(self):
+        # Set self.excep_acc (excepcions d'accentuacio)
+        self.excep_acc =   EXCEP_ACC
+
+    def normalize_word(self, word: str) -> str:
+
+        word = word.lower()
+
+        return word
+
+    def segmenta(self, mot: str, final: typing.List[str]) -> typing.List[str]:
+
+        # Word with prefixes segmentation 
+
+        no_te_prefix = True
+        for prefix in self.prefixos_:
+            lon = len(prefix)
+            pos = mot.find(prefix)
+            if pos != -1 and pos == 0:
+                no_te_prefix = False
+
+                if lon == len(mot):
+                    final.append(mot)
+                    return final
+                elif lon == len(mot) - 1 and mot[lon] == '-':
+                    final.append(mot)
+                    return final
+                else:
+                    # If there are no exceptions split it
+                    if prefix not in self.excepcions_prefs:
+                        final.append(prefix)
+                        resta = mot[lon:]
+                        self.segmenta(resta, final)
+                        return final
+                    # If there are exceptions check that it is not part of it
+                    else:
+                        if mot not in self.excepcions_prefs[prefix]:
+                            final.append(prefix)
+                            resta = mot[lon:]
+                            self.segmenta(resta, final)
+                            return final
+                        else:
+                            final.append(mot)
+                            return final
+
+        for prefix in self.pref_atons:
+            lon = len(prefix)
+            pos = mot.find(prefix)
+            if pos != -1 and pos == 0:
+                no_te_prefix = False
+
+                if lon == len(mot):
+                    final.append(mot)
+                    return final
+                elif lon == len(mot) - 1 and mot[lon] == '-':
+                    final.append(mot)
+                    return final
+                else:
+                    # It should only be started if: 
+                    #   if the prefix ends in a vowel
+                    #   only if the word continues with i, u, -r+vowel, -s+vowel
+                    #   if the prefix always ends in a consonant
+                    #   except in both cases
+                    #   if it is part of the exceptions, if there are any
+                    if acaba_en_vocal(prefix):
+                        resta = mot[lon:]
+                        if post_prefix_ok(resta):
+                            if prefix not in self.excepcions_prefs:
+                                final.append(prefix)
+                                self.segmenta(resta, final)
+                                return final
+                            else:
+                                if mot not in self.excepcions_prefs[prefix]:
+                                    final.append(prefix)
+                                    self.segmenta(resta, final)
+                                    return final
+                                else:
+                                    final.append(mot)
+                                    return final
+                        else:
+                            final.append(mot)
+                            return final
+                    # It is not an exception
+                    else:
+                        if prefix not in self.excepcions_prefs:
+                            final.append(prefix)
+                            queda = mot[lon:]
+                            self.segmenta(queda, final)
+                            return final
+                        else:
+                            if mot not in self.excepcions_prefs[prefix]:
+                                final.append(prefix)
+                                queda = mot[lon:]
+                                self.segmenta(queda, final)
+                                return final
+                            else:
+                                final.append(mot)
+                                return final
+
+        if no_te_prefix:
+            final.append(mot)
+            return final
+     
+    def tracta_prefixos(self, inici: typing.List[str], final: typing.List[str]) -> typing.List[str]:
+
+            # For each start word, 
+            # if there is a prefix at the beginning and the word is not part of the exception list, 
+            # split it after the prefix, unless after the prefix there is a hyphen
+
+            for mot in inici:
+                final = self.segmenta(mot, final)
+            
+            return final
+
+    def parteix_mot(self):
+        
+        # Set parts
+        parts = [self.motnorm_]
+        
+        self.trossos_ = self.tracta_prefixos(parts, self.trossos_)
+        
+        for tros in self.trossos_:
+            partmot = Part(tros)
+            self.transpart_.append(partmot)
+
+    def no_es_nom_ment(self, mot: str) -> bool:
+            
+            if mot not in self.excepcions_gen:
+                return True
+            else:
+                return False
+     
+    def es_adverbi(self, mot: str) -> bool:
+        
+        pos = 0
+        tros = "ment"
+        pos = mot.rfind(tros)
+        if pos != -1:
+            if pos == len(mot) - len(tros):
+                if self.no_es_nom_ment(mot):
+                    return True
+                else:
+                    return False
+            else:
+                return False
+        else:
+            return False
+     
+    def es_exc_accent(self, mot: str) -> str:
+
+        if mot in self.excep_acc:
+            mot = self.excep_acc[mot]
+
+        return mot
+
+    def troba_nuclis_mot(self):
+
+        for i in range(len(self.trossos_)):
+            
+            self.trossos_[i] = self.es_exc_accent(self.trossos_[i])
+
+            # Determine if it's an adverb and pass the information to mot_amb_nuclis
+            is_adverb = self.es_adverbi(self.trossos_[i])
+
+            mot_amb_nuclis = MotNuclis(
+                mot = self.trossos_[i], 
+                es_adverbi = is_adverb,
+                )
+
+            mot_amb_nuclis.troba_nuclis_mot()
+            
+            sillabes, nuclis = [], []
+            if not mot_amb_nuclis.empty():
+                sillabes, nuclis = mot_amb_nuclis.separa_sillabes(sillabes, nuclis)
+                for sil in range(len(sillabes)):
+                    sillab = Sillaba(sillabes[sil])
+                    sillab.grafnuc(nuclis[sil])
+                    self.transpart_[i].push_back(sillab)
+            else:
+                sillab = Sillaba(self.trossos_[i])
+                self.transpart_[i].push_back(sillab)
+
+    def dotze_term(self, pnum: int) -> bool:
+
+        # retorna cert quan es mot pla (paroxiton) ja sigui per les dotze terminacions o per ser un diftong decreixent
+
+        dift_decr = DIFT_DECR
+        voc_sola = VOC_SOLA
+        voc_mes_s = VOC_MES_S
+        en_in = EN_IN
+
+        numsil = self.transpart_[pnum].size()
+        darsil = self.transpart_[pnum].transsil_[numsil - 1].get_text()
+        darsil = darsil.lower()  # Convert to lowercase for case-insensitive comparison
+
+        mida = len(darsil)
+
+        # mida de la sillaba 2 o +
+        if mida >= 2:
+            last_dos = darsil[-2:]
+
+            # diftong decreixent, inclou gui, qui
+            for dift in dift_decr:
+                es_dift_decr = last_dos == dift
+                # diftong decreixent i nucli -> agut
+                # diftong decreixent i no es nucli (ex: preui)-> pla
+                if es_dift_decr and (self.transpart_[pnum].transsil_[numsil - 1].grafnuc_ == mida - 2):
+                    return False
+                elif es_dift_decr:
+                    return True
+
+            # vocal sola
+            last_voc = darsil[-1:]
+            if last_voc in voc_sola:
+                return True
+
+            # si la dar sil acaba en s (mida 2 o + encara)
+            if darsil[-1:] == 's':
+                if mida >= 3:
+                    last_dos = darsil[-3:-1]
+                    for dift in dift_decr:
+                        es_dift_decr = last_dos == dift
+                        if es_dift_decr and (self.transpart_[pnum].transsil_[numsil - 1].grafnuc_ == mida - 3):
+                            return False
+                        elif es_dift_decr:
+                            return True
+
+                last_dos = darsil[-2:]
+                if last_dos in voc_mes_s:
+                    return True
+
+            last_dos = darsil[-2:]
+            if last_dos in en_in:
+                return True
+
+        last_voc = darsil[-1:]
+        if last_voc in voc_sola:
+            return True
+
+        return False
+  
+    def accentua_mot(self, pnum: int):
+
+        numsil = self.transpart_[pnum].size()
+        
+        if self.dotze_term(pnum):
+            # If it ends with a vowel or vowel+s, or with o or in, it's flat (plana)
+            # Vowels are aeiouàèéíòóúü
+            self.transpart_[pnum].transsil_[numsil - 2].tonica()
+        else:
+            # Otherwise, it's acute (aguda)
+            self.transpart_[pnum].transsil_[numsil - 1].tonica()
+    
+    def einagram(self, mot: str) -> bool:
+
+        if mot not in self.einesgram_:
+            return False
+        else:
+            return True 
+
+    def troba_accent_tonic_mot(self):
+
+        vocaccent = ACCENTED_VOWEL_CHARS
+        
+        for pnum in range(len(self.trossos_)):
+
+            if not self.transpart_[pnum]:
+                # es una particula sense vocal
+                continue
+
+            numsil = self.transpart_[pnum].size()
+            accent_grafic = False
+            # bucle sobre les sillabes per veure si hi ha accent grafic
+            for snum in range(numsil):
+                sillaba = self.transpart_[pnum].transsil_[snum].get_text()
+                pos = 0
+                if any(accented_vowel in sillaba for accented_vowel in vocaccent):
+                    
+                    last_sil = self.transpart_[pnum].transsil_[numsil - 1].get_text()
+                    accent_grafic = True
+
+                    if last_sil == "ment":
+                        self.transpart_[pnum].transsil_[snum].tonica()
+                        self.transpart_[pnum].transsil_[numsil - 1].tonica()
+                    else:
+                        self.transpart_[pnum].transsil_[snum].tonica()
+                    
+                    break
+
+            if not accent_grafic:
+                
+                # si es monosillab es tonic a menys que sigui eina gramatical
+                # tonic car es morfema lexematic d'una sillaba
+                # si te mes d'una sillaba, estudiar la terminacio, descartant abans
+                # un guio que hi pugui haver al final
+                # prefixos que poden ser d'una o dues sillabes tenen nomes
+                # accent secundari si son tonics i funcionen realment com a prefix
+
+                if numsil == 1:
+                    sillaba = self.transpart_[pnum].transsil_[0].get_text()
+                    if (self.transpart_[pnum].transsil_[0].grafnuc_ == -1):
+                        # es part de mot sense nucli
+                        continue
+                    elif self.einagram(sillaba):
+                        #amb les parts de mot
+                        continue
+                    else:
+                        # soliem mirar si era un prefix tonic o un lexema, ja no cal
+                        self.transpart_[pnum].transsil_[0].tonica()
+                else:
+                    # no es monosillab
+
+                    last_sil = self.transpart_[pnum].transsil_[numsil - 1].get_text()
+                    # no es referencia, last_sil, car volem guardar el valor
+
+                    if last_sil == "ment":
+                        # no cal tractar diferent els prefixos tonics
+                        if self.no_es_nom_ment(self.trossos_[pnum]) and self.no_es_nom_ment(self.motnorm_):
+                            if numsil - 1 > 1:
+                                self.transpart_[pnum].pop_back()  # Remove the last syllable
+                                self.accentua_mot(pnum)  # Accentuate from the syllables
+                                darsil = Sillaba(last_sil)  # Create a syllable like before
+                                self.transpart_[pnum].push_back(darsil)  # Add it and make it tonic
+                                self.transpart_[pnum].transsil_[numsil - 1].tonica()
+                                self.transpart_[pnum].transsil_[numsil - 1].grafnuc_ = 1
+                                # # es la e de ment
+                            else:
+                                self.transpart_[pnum].transsil_[0].tonica()
+                                self.transpart_[pnum].transsil_[numsil - 1].tonica()
+                    else:
+                        self.accentua_mot(pnum)
+
+    def sillaba_accentua_mot(self):
+            
+            self.parteix_mot()
+            self.troba_nuclis_mot()
+            self.troba_accent_tonic_mot()
+    
+    def stress_tonic(self) -> str:
+
+        accent_changes = ACCENT_CHANGES
+
+        all_vowels = VOWEL_CHARS
+        accented_vowels = ACCENTED_VOWEL_CHARS
+        unaccented_vowels = list(set(all_vowels) - set(accented_vowels))
+
+        original_word = ""
+        stressed_word = ""
+
+        for i in range(len(self.transpart_)):
+
+            word = self.transpart_[i].text_
+
+            if any(ext in word for ext in accented_vowels):
+                stressed_word = stressed_word + word
+            else:
+                for j in range(self.transpart_[i].size()):
+                    sil = self.transpart_[i].transsil_[j]
+                    sillaba_text = sil.get_text()
+                    idxgrafnucli = sil.get_grafnuc()
+                    graf_nucli = sil.get_text_at_index(idxgrafnucli)
+                    is_tonic = sil.es_sil_tonica()
+
+                    if is_tonic == "sí":
+                        sillaba_list = list(sillaba_text)
+                        if sillaba_list[idxgrafnucli] in unaccented_vowels:
+                            if sillaba_list[idxgrafnucli] == "e":
+                                if j == self.transpart_[i].size() - 1:
+                                    # for accute words almost always this is the correct accented e
+                                    sillaba_list[idxgrafnucli] = "è"
+                                elif j == self.transpart_[i].size() - 2:
+                                    # the word has its accent in the penultimate sillabe
+                                    # almost always this is the correct accented e
+                                    sillaba_list[idxgrafnucli] = "è"
+                                else:
+                                    # proparoxytone
+                                    # almost always this is the correct accented e
+                                    sillaba_list[idxgrafnucli] = "è"
+                            elif sillaba_list[idxgrafnucli] == "o":
+                                if j == self.transpart_[i].size() - 1:
+                                    # the word has its accent in the last sillabe
+                                    # almost always this is the correct accented o
+                                    sillaba_list[idxgrafnucli] = "ó"
+                                elif j == self.transpart_[i].size() - 2:
+                                    # the word has its accent in the penultimate sillabe
+                                    # almost always this is the correct accented o
+                                    sillaba_list[idxgrafnucli] = "ò"
+                                else:
+                                    # proparoxytone
+                                    # almost always this is the correct accented o
+                                    sillaba_list[idxgrafnucli] = "ò"
+                            else:
+                                sillaba_list[idxgrafnucli] = accent_changes[sillaba_list[idxgrafnucli]]
+                            
+                            sillaba_text = "".join(sillaba_list)
+
+                    stressed_word = stressed_word + sillaba_text
+            
+            original_word = original_word + word
+        
+        return stressed_word
+
+    def stress_word(self) -> str:
+        
+        self.motnorm_ = self.normalize_word(self.motorig_)
+        
+        self.sillaba_accentua_mot()
+
+        self.stressed_word = self.stress_tonic()
+        
+        return self.stressed_word
+
+    
+class CatalanPreProcessText:
+    """Pre-processes text"""
+
+    # The preprocessing is the same for all accents in this version (variable lang is not used)
+
+    def __init__(self, lookup_phonemes, settings_values: dict, lang: str):
+
+        self.lookup_phonemes = lookup_phonemes
+        self.settings_values = settings_values
+        self.lang = lang
+    
+
+    def __call__(self, text: str) -> str:
+        
+        breaks = [" "]
+        breaks = breaks + list(self.settings_values["major_breaks"])
+        breaks = breaks + list(self.settings_values["minor_breaks"])
+        breaks = breaks + list(self.settings_values["word_breaks"])
+        breaks = breaks + list(self.settings_values["begin_punctuations"])
+        breaks = breaks + list(self.settings_values["end_punctuations"])
+
+        tokens = [text.strip()]
+        for char_break in breaks:
+            tokens = [re.split(f"(\{char_break})", item) for item in tokens]
+            tokens = [item for sublist in tokens for item in sublist if item != ""]
+        
+        preprocessed_tokens = []
+        for token in tokens:
+            
+            try:
+                if token in breaks:
+                    processed_token = token
+                else:
+                    is_in_lexicon = self.lookup_phonemes(token) is not None
+                    if is_in_lexicon:
+                        processed_token = token
+                    else:
+                        tr = Transcripcio(token)
+                        processed_token = tr.stress_word()                    
+            except:
+                processed_token = token
+                _LOGGER.debug(f"Unable to stress token {token}.")
+
+            preprocessed_tokens.append(processed_token)
+        
+        processed_text = "".join(preprocessed_tokens)
+
+        _LOGGER.debug(f"{text} preprocessed obtaining: {processed_text}")
+
+        return processed_text
+
+
+# Post-Process constants
+# Only defined for "ca", "ca-ce" accent.
+# For the rest of accents, not post-processing is done
+
+PHONEME_VOWELS = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u", "ə", "i", "u"]
+PHONEME_STRESSED_VOWELS = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u"]
+PHONEME_HIGH_VOWELS = ["i", "u", "'i", "'u"]
+PHONEME_NEUTRAL_VOWELS = ["ə"]
+
+# Post-Process functions and classes
+
+from gruut.text_processor import DATA_PROP, WordNode, BreakWordNode, BreakNode, PunctuationWordNode
+from gruut.utils import sliding_window
+
+def identify_lang(nodes: typing.List[typing.Union[WordNode, BreakWordNode, BreakNode, PunctuationWordNode]]) -> str:
+
+    from gruut.text_processor import WordNode
+    
+    try:
+        for node in nodes:
+            if isinstance(node, WordNode):
+                lang = node.lang
+                break
+    except:
+        lang = "ca"
+    
+    return lang
+
+def phoneme_is_vowel(phoneme: str) -> bool:
+    return phoneme in PHONEME_VOWELS
+
+def phoneme_is_stressed_vowel(phoneme: str) -> bool:
+    return phoneme in PHONEME_STRESSED_VOWELS
+
+def phoneme_is_unstressed_vowel(phoneme: str) -> bool:
+    return phoneme_is_vowel(phoneme) and not phoneme_is_stressed_vowel(phoneme)
+
+def phoneme_is_high_vowel(phoneme: str) -> bool:
+    return phoneme in PHONEME_HIGH_VOWELS
+
+def phoneme_is_high_stressed_vowel(phoneme: str) -> bool:
+    return phoneme_is_high_vowel(phoneme) and phoneme_is_stressed_vowel(phoneme)
 
+def phoneme_is_high_unstressed_vowel(phoneme: str) -> bool:
+    return phoneme_is_high_vowel(phoneme) and phoneme_is_unstressed_vowel(phoneme)
+
+def phoneme_is_neutral_vowel(phoneme: str) -> bool:
+    return phoneme in PHONEME_NEUTRAL_VOWELS
+
+def fusion_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
+
+    if lang in ["ca", "ca-ce"]:
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
+
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
+
+            # Case 1: high unstressed vowel + stressed vowel of the same timbre
+            if phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_stressed_vowel(first_phoneme_word_2) \
+                and last_phoneme_word_1 == first_phoneme_word_2.replace("'", ""):
+                # Case [i] + [i'] = [i'] or [u] + [u'] = [u']
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                
+            # Case 2: high unstressed vowel + high unstressed vowel of the same timbre
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2) \
+                and last_phoneme_word_1 == first_phoneme_word_2:
+                # Case [i] + [i] = [i] or [u] + [u] = [u]
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+
+            # Case 3: neutral vowel + neutral vowel (except if any of the vowels is the proposition "a")
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2) \
+                and node_1.text != "a" and node_2.text != "a":
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+    else:
+        pass
+            
+def elision_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
+
+    if lang in ["ca", "ca-ce"]:
+
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
+
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
+
+            # Case 1: stressed vowel ['a], ['ɛ], ['e], ['o] or ['ɔ] + neutral vowel (except if any of the vowels is the proposition "a")
+            if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
+                and (phoneme_is_neutral_vowel(first_phoneme_word_2) and node_2.text != "a"):
+                node_2.phonemes.pop(0)
+                _LOGGER.debug(f"ELISION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+            
+            # Case 2: neutral vowel + stressed vowel ['a], ['ɛ], ['e], ['o] or ['ɔ]
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) \
+                and (phoneme_is_stressed_vowel(first_phoneme_word_2) and not phoneme_is_high_vowel(first_phoneme_word_2)):
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"ELISION CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+    else:
+        pass    
+
+def diphthong_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
+
+    if lang in ["ca", "ca-ce"]:
+
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
+
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
+            
+            # Case 1: stressed vowel + high unstressed vowel
+            if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
+                and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
+                if first_phoneme_word_2 == "i":
+                    # Case [stressed vowel] + [i] = [stressed vowel + j], stressed vowel not 'i or 'u 
+                    node_2.phonemes[0] = "j"
+                    _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif first_phoneme_word_2 == "u":
+                    # Case [stressed vowel] + [u] = [stressed vowel + uw], stressed vowel not 'i or 'u 
+                    node_2.phonemes[0] = "uw"
+                    _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+
+            # Case 2: high unstressed vowel + stressed vowel
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_stressed_vowel(first_phoneme_word_2):
+                if last_phoneme_word_1 == "i" and first_phoneme_word_2 not in ["'i"] and node_1.text in ["hi", "ho", "i"]:
+                    # Case [i] + [stressed] = [y + stressed vowel], i only from "hi", "ho" or "i"  
+                    node_1.phonemes[-1] = "y" 
+                    _LOGGER.debug(f"DIPTHONG CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif last_phoneme_word_1 == "u" and first_phoneme_word_2 not in ["'u"] and node_1.text in ["hi", "ho", "i"]:
+                    # Case [u] + [stressed] = [u + stressed vowel], i only from "hi", "ho" or "i"  
+                    pass
+            
+            # Case 3: unstressed vowel + high unstressed vowel
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
+                if first_phoneme_word_2 == "i":
+                    # Case [neutral vowel] + [i] = [neutral vowel + j]
+                    node_2.phonemes[0] = "j"
+                    _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif first_phoneme_word_2 == "u":
+                    # Case [neutral vowel] + [u] = [neutral vowel + uw]
+                    node_2.phonemes[0] = "uw"
+                    _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+            # Case 4: unstressed vowel + high unstressed vowel
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2):
+                pass
+    else:
+        pass   
+
+def ca_post_process_sentence(
+    graph: GraphType, sent_node: SentenceNode, settings: TextProcessorSettings
+):
+    
+    # Create a list of relevant nodes
+    nodes = []
+    for dfs_node in nx.dfs_preorder_nodes(graph, sent_node.node):
+        
+        node = graph.nodes[dfs_node][DATA_PROP]
+        
+        if not graph.out_degree(dfs_node) == 0:
+            # Only leave
+            continue
+
+        node = graph.nodes[dfs_node][DATA_PROP]
+        if isinstance(node, WordNode):
+            nodes.append(typing.cast(WordNode, node))
+        if isinstance(node, BreakWordNode):
+            nodes.append(typing.cast(BreakWordNode, node))
+        if isinstance(node, BreakNode):
+            nodes.append(typing.cast(BreakNode, node))
+        if isinstance(node, PunctuationWordNode):
+            nodes.append(typing.cast(PunctuationWordNode, node))
+        
+    lang = identify_lang(nodes)
+
+    # HACK
+    # Training corpora includes an invalid sequence of phonemes: l ʎ l
+    # We fix that here, in the next iteration will be properly solved
+    phonemes_to_fix = "l ʎ l"
+    fixed_phonemes = "l l"
+    for node in nodes:
+
+        if node is None:
+            continue
+
+        if isinstance(node, WordNode):
+            if not (node.text and node.phonemes):
+                continue
+            phonemes_text = " ".join(node.phonemes)
+            if phonemes_to_fix in phonemes_text:
+                phonemes_text = phonemes_text.replace(phonemes_to_fix, fixed_phonemes)
+                node.phonemes = phonemes_text.split(" ")
+                _LOGGER.debug(f"FIX: phoneme sequence '{phonemes_to_fix}' fixed at {node.text}. Fixed transcription: {node.phonemes}")
+        
+    # Create a list of contiguous word nodes
+    contiguous_word_nodes = []
+    for node_1, node_2 in sliding_window(nodes, 2):
+        
+        if node_1 is None or node_2 is None:
+            continue
+
+        if isinstance(node_1, WordNode) and isinstance(node_2, WordNode):
+            if not (node_1.text and node_1.phonemes and node_2.text and node_2.phonemes):
+                continue
+            contiguous_word_nodes.append([node_1, node_2])
+
+    for (node_1, node_2) in contiguous_word_nodes:
+
+        diphthong_if_needed(node_1, node_2, lang)
+        fusion_if_needed(node_1, node_2, lang)
+        elision_if_needed(node_1, node_2, lang)
+
+
+# Settings
+
+def get_ca_settings(lang_dir=None, **settings_args) -> TextProcessorSettings:
+    
+    """Create settings for Catalan"""
+
+    try:
+        lang = str(lang_dir).split("/")[-1]
+        main_lang, lang_version = lang.split("-")
+        lang = f"{main_lang.lower()}-{lang_version.upper()}"
+    except:
+        lang = "ca"
+
+    lookup_phonemes = settings_args["lookup_phonemes"]
+    
+    settings_values = {
+        "major_breaks": {".", "?", "!"},
+        "minor_breaks": {",", ";", ":", "..."},
+        "word_breaks": {"_"},
+        "begin_punctuations": {'"', "“", "«", "[", "(", "<", "¡", "¿"},
+        "end_punctuations": {'"', "”", "»", "]", ")", ">", "!", "?"},
+        "default_currency": "EUR",
+        "default_date_format": InterpretAsFormat.DATE_DMY,
+        "replacements": [
+            ("’", "'"), # normalize apostrophe
+            ("'", ""), # remove orthographic apostrophe
+            ("-", ""),
+            ("l·l", "l"),
+            ], 
+    }
+    
+    settings_args = {
+        **settings_values, 
+        "pre_process_text": CatalanPreProcessText(lookup_phonemes, settings_values, lang),
+        "post_process_sentence": ca_post_process_sentence,
+        **settings_args,
+    }
+    
+    return TextProcessorSettings(lang="ca", **settings_args)
+
+# -----------------------------------------------------------------------------
 
 class DelayedGraphemesToPhonemes:
     """Grapheme to phoneme guesser that loads on first use"""
@@ -897,4 +2328,4 @@ def __call__(
             self.phonemizer = SqlitePhonemizer(db_conn=db_conn, **self.phonemizer_args)
 
         assert self.phonemizer is not None
-        return self.phonemizer(word, role=role, do_transforms=do_transforms)
+        return self.phonemizer(word, role=role, do_transforms=do_transforms)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index b24952b..78c4439 100644
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@
 # Create language-specific extras
 for lang in [
     "ar",
+    "ca",
     "cs",
     "de",
     "es",