Skip to content
This repository has been archived by the owner on May 12, 2024. It is now read-only.

Commit

Permalink
commas and spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Oct 30, 2023
1 parent 4cb2449 commit 5298b6d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 6 deletions.
4 changes: 2 additions & 2 deletions baln/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from multiprocessing import Process, freeze_support

VERSION="0.3.48"
NOTES="multi-lingual UD"
VERSION="0.3.49"
NOTES="comma and spaces"

#################### OPTIONS ################################

Expand Down
25 changes: 22 additions & 3 deletions baln/ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,16 @@ def handler(word):
def handler__PRON(word):
# get the features
feats = parse_feats(word)
person = str(feats.get("Person", 1))

if person == "0":
person = '4'

# parse
return (handler(word)+"-"+
feats.get("PronType", "Int")+"-"+
feats.get("Case", "Acc").replace(",", "")+"-"+
feats.get("Number", "S")[0]+str(feats.get("Person", 1)))
feats.get("Number", "S")[0]+person)

def handler__DET(word):
# get the features
Expand All @@ -138,6 +143,9 @@ def handler__ADJ(word):
case = feats.get("Case", "").replace(",", "")
number = feats.get("Number", "S")[0]
person = str(feats.get("Person", 1))
if person == "0":
person = '4'

return handler(word)+stringify_feats(deg, case, number, person)

def handler__NOUN(word):
Expand Down Expand Up @@ -175,7 +183,11 @@ def handler__VERB(word):
# append tense
aspect = feats.get("Aspect", "")
mood = feats.get("Mood", "")
person = feats.get("Person", "")
person = str(feats.get("Person", ""))

if person == "0":
person = '4'

tense = feats.get("Tense", "")
polarity = feats.get("Polarity", "")
polite = feats.get("Polite", "")
Expand Down Expand Up @@ -326,14 +338,21 @@ def parse_sentence(sentence, delimiter=".", special_forms=[], lang="$nospecial$"
actual_indicies.append(root) # TODO janky but if anybody refers to a skipped
# word they are root now.
# normal parsing
elif mor_word or "xbxxx" in word.text.strip():
elif mor_word or word.text.strip() in ["xbxxx", '‡', '„']:
if word.text.strip() == '‡':
mor_word = "cm|begin"
elif word.text.strip() == '„':
mor_word = "cm|end"


# specivl forms: recall the special form marker is xbxxx
if "xbxxx" in word.text.strip():
form = special_forms.pop(0)
mor.append(f"x|{form.strip()}")
special_form_ids.append(word.id)
else:
mor.append(mor_word)

# +1 because we are 1-indexed
# and .head is also 1-indexed already
deprel = word.deprel.upper()
Expand Down
2 changes: 1 addition & 1 deletion meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "batchalign" %}
{% set version = "0.3.48" %}
{% set version = "0.3.49" %}

package:
name: {{ name }}
Expand Down

0 comments on commit 5298b6d

Please sign in to comment.