From 11b3a396be4a2b70b72549fc9da51ef0a4f07aa4 Mon Sep 17 00:00:00 2001
From: thatDudo <thatdudo@proton.me>
Date: Mon, 13 Nov 2023 15:44:51 +0100
Subject: [PATCH 1/5] Wrap hyphenator exceptions (Closes #500)

---
 manga_translator/rendering/text_render.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py
index b0c13b1e..1278adbb 100644
--- a/manga_translator/rendering/text_render.py
+++ b/manga_translator/rendering/text_render.py
@@ -352,7 +352,10 @@ def select_hyphenator(lang: str):
                 break
         else:
             return None
-    return Hyphenator(lang)
+    try:
+        return Hyphenator(lang)
+    except Exception:
+        return None
 
 # @functools.lru_cache(maxsize = 1024, typed = True)
 def get_char_offset_x(font_size: int, cdpt: str):
@@ -403,8 +406,11 @@ def calc_horizontal(font_size: int, text: str, max_width: int, max_height: int,
     hyphenator = select_hyphenator(language)
     for i, word in enumerate(words):
         new_syls = []
-        if hyphenator:
-            new_syls = hyphenator.syllables(word)
+        if hyphenator and len(word) <= 100:
+            try:
+                new_syls = hyphenator.syllables(word)
+            except Exception:
+                new_syls = []
         if len(new_syls) == 0:
             if len(word) <= 3:
                 new_syls = [word]

From 7d39909124b88003873e54e63d06b852fb244376 Mon Sep 17 00:00:00 2001
From: thatDudo <thatdudo@proton.me>
Date: Thu, 16 Nov 2023 21:11:18 +0100
Subject: [PATCH 2/5] Fix waifu2x

---
 manga_translator/translators/chatgpt.py | 7 +++++--
 manga_translator/upscaling/waifu2x.py   | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
index aa0fed26..ffca1349 100644
--- a/manga_translator/translators/chatgpt.py
+++ b/manga_translator/translators/chatgpt.py
@@ -1,6 +1,9 @@
 import re
-import openai
-import openai.error
+try:
+    import openai
+    import openai.error
+except ImportError:
+    openai = None
 import asyncio
 import time
 from typing import List, Dict
diff --git a/manga_translator/upscaling/waifu2x.py b/manga_translator/upscaling/waifu2x.py
index 8ea7849e..5c654b3c 100644
--- a/manga_translator/upscaling/waifu2x.py
+++ b/manga_translator/upscaling/waifu2x.py
@@ -51,7 +51,7 @@
 # https://github.com/nihui/waifu2x-ncnn-vulkan
 class Waifu2xUpscaler(OfflineUpscaler): # ~2GB of vram
     _MODEL_MAPPING = model_mapping
-    _VALID_UPSCALE_RATIOS = [1, 2, 4, 8, 16, 32]
+    _VALID_UPSCALE_RATIOS = [2, 4, 8, 16, 32]
 
     def __init__(self, *args, **kwargs):
         os.makedirs(self.model_dir, exist_ok=True)

From 5cec578cebd7b68294aab95faa27b5bbd80ef7d0 Mon Sep 17 00:00:00 2001
From: thatDudo <thatdudo@proton.me>
Date: Fri, 17 Nov 2023 22:56:09 +0100
Subject: [PATCH 3/5] Set correct openai version (0.28) in requirements.txt

We need to update the google translator to use the newer httpx version
that openai relies on.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 61818977..8b213d0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,7 +29,7 @@ websockets
 protobuf
 ctranslate2
 colorama
-openai
+openai==0.28
 open_clip_torch
 safetensors
 pandas

From 45001cc6eb7a02479b80deb7a0df37d8a2a27f42 Mon Sep 17 00:00:00 2001
From: thatDudo <thatdudo@proton.me>
Date: Sun, 19 Nov 2023 22:30:27 +0100
Subject: [PATCH 4/5] Filter after merge and skip text thats already in the
 target language

---
 README.md                              |  7 ++--
 README_CN.md                           |  7 ++--
 manga_translator/args.py               |  1 +
 manga_translator/manga_translator.py   | 48 ++++++++++++++------------
 manga_translator/rendering/__init__.py |  2 +-
 manga_translator/utils/generic.py      |  2 +-
 manga_translator/utils/textblock.py    | 21 ++++++-----
 7 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 9bca7646..539af5e7 100644
--- a/README.md
+++ b/README.md
@@ -398,7 +398,7 @@ THA: Thai
 --detector {default,ctd,craft,none}          Text detector used for creating a text mask from an
                                              image, DO NOT use craft for manga, it's not designed
                                              for it
---ocr {48px,32px,48px_ctc}                   Optical character recognition (OCR) model to use
+--ocr {32px,48px,48px_ctc}                   Optical character recognition (OCR) model to use
 --inpainter {default,lama_large,lama_mpe,sd,none,original}
                                              Inpainting model to use
 --upscaler {waifu2x,esrgan,4xultrasharp}     Upscaler to use. --upscale-ratio has to be set for it
@@ -431,10 +431,11 @@ THA: Thai
 --box-threshold BOX_THRESHOLD                Threshold for bbox generation
 --text-threshold TEXT_THRESHOLD              Threshold for text detection
 --min-text-length MIN_TEXT_LENGTH            Minimum text length of a text region
+--no-text-lang-skip                          Dont skip text that is seemingly already in the target
+                                             language.
 --inpainting-size INPAINTING_SIZE            Size of image used for inpainting (too large will
                                              result in OOM)
---inpainting-precision INPAINTING_PRECISION  Inpainting precision for lama, 
-                                             use bf16 while you can.
+--inpainting-precision {fp32,fp16,bf16}      Inpainting precision for lama, use bf16 while you can.
 --colorization-size COLORIZATION_SIZE        Size of image used for colorization. Set to -1 to use
                                              full image size
 --denoise-sigma DENOISE_SIGMA                Used by colorizer and affects color strength, range
diff --git a/README_CN.md b/README_CN.md
index 204582a5..e5a48551 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -130,7 +130,7 @@ THA: Thai
 --detector {default,ctd,craft,none}          Text detector used for creating a text mask from an
                                              image, DO NOT use craft for manga, it's not designed
                                              for it
---ocr {48px,32px,48px_ctc}                   Optical character recognition (OCR) model to use
+--ocr {32px,48px,48px_ctc}                   Optical character recognition (OCR) model to use
 --inpainter {default,lama_large,lama_mpe,sd,none,original}
                                              Inpainting model to use
 --upscaler {waifu2x,esrgan,4xultrasharp}     Upscaler to use. --upscale-ratio has to be set for it
@@ -163,10 +163,11 @@ THA: Thai
 --box-threshold BOX_THRESHOLD                Threshold for bbox generation
 --text-threshold TEXT_THRESHOLD              Threshold for text detection
 --min-text-length MIN_TEXT_LENGTH            Minimum text length of a text region
+--no-text-lang-skip                          Dont skip text that is seemingly already in the target
+                                             language.
 --inpainting-size INPAINTING_SIZE            Size of image used for inpainting (too large will
                                              result in OOM)
---inpainting-precision INPAINTING_PRECISION  Inpainting precision for lama, 
-                                             use bf16 while you can.
+--inpainting-precision {fp32,fp16,bf16}      Inpainting precision for lama, use bf16 while you can.
 --colorization-size COLORIZATION_SIZE        Size of image used for colorization. Set to -1 to use
                                              full image size
 --denoise-sigma DENOISE_SIGMA                Used by colorizer and affects color strength, range
diff --git a/manga_translator/args.py b/manga_translator/args.py
index fe757ad6..e7841218 100644
--- a/manga_translator/args.py
+++ b/manga_translator/args.py
@@ -125,6 +125,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:
 parser.add_argument('--box-threshold', default=0.7, type=float, help='Threshold for bbox generation')
 parser.add_argument('--text-threshold', default=0.5, type=float, help='Threshold for text detection')
 parser.add_argument('--min-text-length', default=0, type=int, help='Minimum text length of a text region')
+parser.add_argument('--no-text-lang-skip', action='store_true', help='Dont skip text that is seemingly already in the target language.')
 parser.add_argument('--inpainting-size', default=2048, type=int, help='Size of image used for inpainting (too large will result in OOM)')
 parser.add_argument('--inpainting-precision', default='fp32', type=str, help='Inpainting precision for lama, use bf16 while you can.', choices=['fp32', 'fp16', 'bf16'])
 parser.add_argument('--colorization-size', default=576, type=int, help='Size of image used for colorization. Set to -1 to use full image size')
diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 420a0f00..b81a9d1d 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -5,7 +5,7 @@
 import cv2
 from aiohttp.web_middlewares import middleware
 from omegaconf import OmegaConf
-import py3langid as langid
+import langcodes
 import requests
 import os
 import re
@@ -469,15 +469,9 @@ async def _run_detection(self, ctx: Context):
     async def _run_ocr(self, ctx: Context):
         textlines = await dispatch_ocr(ctx.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose)
 
-        # Filter out regions by original text
         new_textlines = []
         for textline in textlines:
-            text = textline.text
-            if (ctx.filter_text and re.search(ctx.filter_text, text)) \
-                    or not is_valuable_text(text):
-                if text.strip():
-                    logger.info(f'Filtered out: {text}')
-            else:
+            if textline.text.strip():
                 if ctx.font_color_fg:
                     textline.fg_r, textline.fg_g, textline.fg_b = ctx.font_color_fg
                 if ctx.font_color_bg:
@@ -488,12 +482,19 @@ async def _run_ocr(self, ctx: Context):
     async def _run_textline_merge(self, ctx: Context):
         text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0],
                                                      verbose=self.verbose)
-        text_regions = [region for region in text_regions if len(''.join(region.text)) >= ctx.min_text_length]
-
+        new_text_regions = []
         for region in text_regions:
-            if ctx.font_color_fg or ctx.font_color_bg:
-                if ctx.font_color_bg:
-                    region.adjust_bg_color = False
+            if len(region.text) >= ctx.min_text_length \
+                    and not is_valuable_text(region.text) \
+                    or (not ctx.no_text_lang_skip and langcodes.tag_distance(region.source_lang, ctx.target_lang) == 0):
+                if region.text.strip():
+                    logger.info(f'Filtered out: {region.text}')
+            else:
+                if ctx.font_color_fg or ctx.font_color_bg:
+                    if ctx.font_color_bg:
+                        region.adjust_bg_color = False
+                new_text_regions.append(region)
+        text_regions = new_text_regions
 
         # Sort ctd (comic text detector) regions left to right. Otherwise right to left.
         # Sorting will improve text translation quality.
@@ -501,10 +502,11 @@ async def _run_textline_merge(self, ctx: Context):
         return text_regions
 
     async def _run_text_translation(self, ctx: Context):
-        translated_sentences = await dispatch_translation(ctx.translator,
-                                                          [region.get_text() for region in ctx.text_regions],
-                                                          ctx.use_mtpe,
-                                                          ctx, 'cpu' if self._cuda_limited_memory else self.device)
+        translated_sentences = \
+            await dispatch_translation(ctx.translator,
+                                       [region.text for region in ctx.text_regions],
+                                       ctx.use_mtpe,
+                                       ctx, 'cpu' if self._cuda_limited_memory else self.device)
 
         for region, translation in zip(ctx.text_regions, translated_sentences):
             if ctx.uppercase:
@@ -521,8 +523,8 @@ async def _run_text_translation(self, ctx: Context):
         for region in ctx.text_regions:
             # TODO: Maybe print reasons for filtering
             if not ctx.translator == 'none' and (region.translation.isnumeric() \
-                                                 or ctx.filter_text and re.search(ctx.filter_text, region.translation)
-                                                 or not ctx.translator == 'original' and region.get_text().lower().strip() == region.translation.lower().strip()):
+                    or ctx.filter_text and re.search(ctx.filter_text, region.translation)
+                    or not ctx.translator == 'original' and region.text.lower().strip() == region.translation.lower().strip()):
                 if region.translation.strip():
                     logger.info(f'Filtered out: {region.translation}')
             else:
@@ -618,7 +620,7 @@ def identify_colors(fg_rgb: List[int]):
 
             s += f'\n-- {i + 1} --\n'
             s += f'color: #{color_id}: {color_name} (fg, bg: {rgb2hex(*fore)} {rgb2hex(*back)})\n'
-            s += f'text:  {region.get_text()}\n'
+            s += f'text:  {region.text}\n'
             s += f'trans: {region.translation}\n'
             for line in region.lines:
                 s += f'coords: {list(line.ravel())}\n'
@@ -743,7 +745,7 @@ async def _run_text_translation(self, ctx: Context):
             requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={
                 'task_id': self._task_id,
                 'nonce': self.nonce,
-                'texts': [r.get_text() for r in text_regions],
+                'texts': [r.text for r in text_regions],
                 'translations': [r.translation for r in text_regions],
             }, timeout=20)
 
@@ -1225,7 +1227,7 @@ def format_translate(self, ctx: Context, return_image: bool):
                 trans = {key: value[i] for key, value in ctx['translations'].items()}
             else:
                 trans = {}
-            trans["originalText"] = text_regions[i].get_text()
+            trans["originalText"] = text_regions[i].text
             if inpaint is not None:
                 overlay = inpaint[minY:maxY, minX:maxX]
 
@@ -1248,7 +1250,7 @@ def format_translate(self, ctx: Context, return_image: bool):
                     'fg': color1.tolist(),
                     'bg': color2.tolist()
                 },
-                'language': langid.classify(text_regions[i].get_text())[0],
+                'language': text_regions[i].source_lang,
                 'background': background
             })
         if return_image and ctx.img_colorized is not None:
diff --git a/manga_translator/rendering/__init__.py b/manga_translator/rendering/__init__.py
index d088db33..111ef0dd 100644
--- a/manga_translator/rendering/__init__.py
+++ b/manga_translator/rendering/__init__.py
@@ -41,7 +41,7 @@ def resize_regions_to_font_size(img: np.ndarray, text_regions: List[TextBlock],
 
     dst_points_list = []
     for region in text_regions:
-        char_count_orig = len(region.get_text())
+        char_count_orig = len(region.text)
         char_count_trans = len(region.translation.strip())
         if char_count_trans > char_count_orig:
             # More characters were added, have to reduce fontsize to fit allotted area
diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py
index be0253b2..a86c668b 100644
--- a/manga_translator/utils/generic.py
+++ b/manga_translator/utils/generic.py
@@ -118,7 +118,7 @@ def is_punctuation(ch):
 
 def is_valuable_char(ch):
     # return re.search(r'[^\d\W]', ch)
-    return not is_punctuation(ch) and not is_control(ch) and not is_whitespace(ch) and not ch.isnumeric()
+    return not is_punctuation(ch) and not is_control(ch) and not is_whitespace(ch) and not ch.isdigit()
 
 def is_valuable_text(text):
     for ch in text:
diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py
index 06227815..3f5b1018 100644
--- a/manga_translator/utils/textblock.py
+++ b/manga_translator/utils/textblock.py
@@ -5,6 +5,7 @@
 from functools import cached_property
 import copy
 import re
+import py3langid as langid
 
 from .generic import color_difference, is_right_to_left_char, is_valuable_char
 # from ..detection.ctd_utils.utils.imgproc_utils import union_area, xywh2xyxypoly
@@ -41,7 +42,7 @@ class TextBlock(object):
     Object that stores a block of text made up of textlines.
     """
     def __init__(self, lines: List,
-                 text: List[str] = None,
+                 texts: List[str] = None,
                  language: str = 'unknown',
                  font_size: float = -1,
                  angle: int = 0,
@@ -60,6 +61,7 @@ def __init__(self, lines: List,
                  _bounding_rect: List = None,
                  default_stroke_width = 0.2,
                  font_weight = 50,
+                 source_lang: str = "",
                  target_lang: str = "",
                  opacity: float = 1.,
                  shadow_radius: float = 0.,
@@ -75,7 +77,8 @@ def __init__(self, lines: List,
         self.angle = angle
         self._direction = direction
 
-        self.text = text if text is not None else []
+        self.texts = texts if texts is not None else []
+        self.text = ' '.join(texts)
         self.prob = prob
 
         self.translation = translation
@@ -92,6 +95,7 @@ def __init__(self, lines: List,
         self.line_spacing = line_spacing
         self.letter_spacing = letter_spacing
         self._alignment = alignment
+        self._source_lang = source_lang
         self.target_lang = target_lang
 
         self._bounding_rect = _bounding_rect
@@ -235,10 +239,11 @@ def get_transformed_region(self, img: np.ndarray, line_idx: int, textheight: int
                 region = cv2.resize(region, (maxwidth, h))
         return region
 
-    def get_text(self):
-        if isinstance(self.text, str):
-            return self.text
-        return ' '.join(self.text).strip()
+    @property
+    def source_lang(self):
+        if not self._source_lang:
+            self._source_lang = langid.classify(self.text)[0]
+        return self._source_lang
 
     def get_translation_for_rendering(self):
         text = self.translation
@@ -275,7 +280,7 @@ def is_bulleted_list(self):
         A determining factor of whether we should be sticking to the strict per textline
         text distribution when rendering.
         """
-        if len(self.text) <= 1:
+        if len(self.texts) <= 1:
             return False
 
         bullet_regexes = [
@@ -284,7 +289,7 @@ def is_bulleted_list(self):
             r'[QA]:', # Q: ... A: ...
         ]
         bullet_type_idx = -1
-        for line_text in self.text:
+        for line_text in self.texts:
             for i, breg in enumerate(bullet_regexes):
                 if re.search(r'(?:[\n]|^)((?:' + breg + r')[\s]*)', line_text):
                     if bullet_type_idx >= 0 and bullet_type_idx != i:

From 05680656292636acb6a04b9b7822dba5e8c3048b Mon Sep 17 00:00:00 2001
From: thatDudo <thatdudo@proton.me>
Date: Mon, 20 Nov 2023 12:21:00 +0100
Subject: [PATCH 5/5] Split chatgpt translations with \n if incorrect or repeat
 request

---
 manga_translator/translators/chatgpt.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
index ffca1349..417b1651 100644
--- a/manga_translator/translators/chatgpt.py
+++ b/manga_translator/translators/chatgpt.py
@@ -90,7 +90,7 @@ def temperature(self) -> float:
     def top_p(self) -> float:
         return self._config_get('top_p', default=1)
 
-    def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]:
+    def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
         prompt = ''
 
         if self._INCLUDE_TEMPLATE:
@@ -110,7 +110,7 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]) ->
             if self._MAX_TOKENS * 2 and len(''.join(queries[i+1:])) > self._MAX_TOKENS:
                 if self._RETURN_PROMPT:
                     prompt += '\n<|1|>'
-                yield prompt.lstrip()
+                yield prompt.lstrip(), i+1-i_offset
                 prompt = self.prompt_template.format(to_lang=to_lang)
                 # Restart counting at 1
                 i_offset = i + 1
@@ -118,7 +118,7 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]) ->
         if self._RETURN_PROMPT:
             prompt += '\n<|1|>'
 
-        yield prompt.lstrip()
+        yield prompt.lstrip(), len(queries)-i_offset
 
     def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
         return prompt
@@ -127,7 +127,7 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) ->
         translations = []
         self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')
 
-        for prompt in self._assemble_prompts(from_lang, to_lang, queries):
+        for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries):
             self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt))
 
             ratelimit_attempt = 0
@@ -165,10 +165,21 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) ->
                     await asyncio.sleep(1)
 
             self.logger.debug('-- GPT Response --\n' + response)
+
             new_translations = re.split(r'<\|\d+\|>', response)
             # When there is only one query chatgpt likes to exclude the <|1|>
             if not new_translations[0].strip():
                 new_translations = new_translations[1:]
+
+            if len(new_translations) <= 1 and query_size > 1:
+                # Try splitting by newlines instead
+                new_translations = re.split(r'\n', response)
+
+            if len(new_translations) != query_size:
+                # super method will repeat translation as per self._INVALID_REPEAT_COUNT
+                translations = []
+                break
+
             translations.extend([t.strip() for t in new_translations])
 
         self.logger.debug(translations)