Skip to content

Commit

Permalink
improve unicode decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
ilude committed Apr 22, 2024
1 parent f1df184 commit a4c92b3
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion app/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def from_int(x: Any) -> int:
return x

def normalize_text(text: str) -> str:
return re.sub(r'\s+|\n|\r', ' ', unidecode.unidecode(text)).strip()
text = unidecode.unidecode(text)
text = re.sub(r'“|”', '"', text)
text = re.sub(r'’|‘', "'", text)
return re.sub(r'\s+|\n|\r', ' ',text).strip()

def calculate_sha1_hash(value: str) -> str:
sha1 = hashlib.sha1()
Expand Down

0 comments on commit a4c92b3

Please sign in to comment.