Skip to content

Commit

Permalink
Merge pull request #43 from fedecosta/master
Browse files Browse the repository at this point in the history
Adding Catalan (4 accents)
  • Loading branch information
synesthesiam authored Jul 3, 2024
2 parents 2da911c + ce86ad5 commit e677b33
Show file tree
Hide file tree
Showing 28 changed files with 1,947 additions and 5 deletions.
Binary file added data/ca-ba/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-ba/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Balear Catalan"
code: "ca-ba"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-ba/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-ba/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
Binary file added data/ca-ce/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-ce/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Central Catalan"
code: "ca-ce"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-ce/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-ce/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
Binary file added data/ca-no/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-no/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Nord-Occidental Catalan"
code: "ca-no"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-no/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-no/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
Binary file added data/ca-va/g2p/model.crf
Binary file not shown.
50 changes: 50 additions & 0 deletions data/ca-va/language.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---

language:
name: "Valencià Catalan"
code: "ca-va"
phonemes: !env "${config_dir}/phonemes.txt"
keep_stress: true

lexicon: !env "${config_dir}/lexicon.db"

g2p:
model: !env "${config_dir}/g2p.fst"

symbols:
casing: "lower"
number_regex: "^-?\\d+([,.]\\d+)*$"
token_split: "\\s+"
token_join: " "
minor_breaks:
- ","
- ":"
- ";"
- "..."
major_breaks:
- "."
- "?"
- "!"
replace:
"[\\<\\>\\(\\)\\[\\]\"]+": ""
"\\B'": "\""
"'\\B": "\""
"": "'"
"'": ""
"-": ""
"l·l": "l"
punctuations:
- "\""
- ""
- ""
- ""
- "«"
- "»"
- ","
- ":"
- ";"
- "."
- "?"
- "¿"
- "!"
- "¡"
Binary file added data/ca-va/lexicon.db
Binary file not shown.
44 changes: 44 additions & 0 deletions data/ca-va/phonemes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# https://en.wikipedia.org/wiki/Catalan_phonology
# Catalan phonemes

p [p]ala
b [b]ala
t [t]ela
d [d]onar
k [k]ala
ɡ [g]ala
m [m]ala
ɲ fa[ng]
β aca[b]a
ð ca[d]a
ɣ ama[g]ar
f [f]als
v a[f]ganès
s [s]ala
z ca[s]a
ʃ [x]oc
ʒ mà[g]ic
tʃ co[tx]e
dʒ me[tg]e
l [l]íquid
ʎ [ll]amp
r ca[rr]o
ɾ ca[r]a
w ve[u]en
uw ca[u]re
j ca[i]re
y [i]a[i]a
n [n]ena
ŋ pi[n]güí
ts po[ts]er
dz do[tz]e

# Vowels
i r[i]c
e c[e]c
ɛ s[e]c
a s[a]c
ɔ f[o]c
o s[ó]c
u s[u]c
ə [a]mor
1 change: 1 addition & 0 deletions gruut-lang-ca/LANGUAGE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ca-ce Catalan
3 changes: 3 additions & 0 deletions gruut-lang-ca/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# gruut Catalan

Language-specific files for Catalan (ca) in [gruut](https://github.com/rhasspy/gruut)
1 change: 1 addition & 0 deletions gruut-lang-ca/gruut_lang_ca/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.0
Loading

0 comments on commit e677b33

Please sign in to comment.