mindee · felixdittrich92 · Aug 8, 2024
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
 
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.1-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
 
 
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -154,6 +154,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
 pip install "python-doctr[tf]"
 # for PyTorch
 pip install "python-doctr[torch]"
+# optional dependencies for visualization, html, and contrib modules can be installed as follows:
+pip install "python-doctr[torch,viz,html,contib]"
 ```
 
 For MacBooks with M1 chip, you will need some additional packages or specific versions:
@@ -342,14 +344,13 @@ Your API should now be running locally on your port 8002. Access your automatica
 ```python
 import requests
 
-headers = {"accept": "application/json"}
 params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
 
 with open('/path/to/your/doc.jpg', 'rb') as f:
     files = [  # application/pdf, image/jpeg, image/png supported
         ("files", ("doc.jpg", f.read(), "image/jpeg")),
     ]
-print(requests.post("http://localhost:8080/ocr", headers=headers, params=params, files=files).json())
+print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
 ```
 
 ### Example notebooks

diff --git a/api/app/schemas.py b/api/app/schemas.py
@@ -25,7 +25,7 @@ class KIEIn(BaseModel):
 
 class OCRIn(KIEIn, BaseModel):
     resolve_lines: bool = Field(default=True, examples=[True])
-    resolve_blocks: bool = Field(default=True, examples=[True])
+    resolve_blocks: bool = Field(default=False, examples=[False])
     paragraph_break: float = Field(default=0.0035, examples=[0.0035])
 
 

diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst
@@ -311,7 +311,7 @@ Additionally, you can change the batch size of the underlying detection and reco
 To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`:
 
 * `resolve_lines`: whether words should be automatically grouped into lines (default: True)
-* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True)
+* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: False)
 * `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035)
 
 For example to disable the automatic grouping of lines into blocks:

diff --git a/doctr/models/builder.py b/doctr/models/builder.py
@@ -31,7 +31,7 @@ class DocumentBuilder(NestedObject):
     def __init__(
         self,
         resolve_lines: bool = True,
-        resolve_blocks: bool = True,
+        resolve_blocks: bool = False,
         paragraph_break: float = 0.035,
         export_as_straight_boxes: bool = False,
     ) -> None:

diff --git a/doctr/utils/fonts.py b/doctr/utils/fonts.py
@@ -5,14 +5,16 @@
 
 import logging
 import platform
-from typing import Optional
+from typing import Optional, Union
 
 from PIL import ImageFont
 
 __all__ = ["get_font"]
 
 
-def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFont.ImageFont:
+def get_font(
+    font_family: Optional[str] = None, font_size: int = 13
+) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
     """Resolves a compatible ImageFont for the system
 
     Args:
@@ -28,14 +30,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
     if font_family is None:
         try:
             font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
-        except OSError:
-            font = ImageFont.load_default()
+        except OSError:  # pragma: no cover
+            font = ImageFont.load_default()  # type: ignore[assignment]
             logging.warning(
                 "unable to load recommended font family. Loading default PIL font,"
                 "font size issues may be expected."
                 "To prevent this, it is recommended to specify the value of 'font_family'."
             )
-    else:
+    else:  # pragma: no cover
         font = ImageFont.truetype(font_family, font_size)
 
     return font
diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
@@ -60,6 +60,8 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     if assume_straight_pages:
@@ -103,6 +105,8 @@ def test_trained_ocr_predictor(mock_payslip):
         assume_straight_pages=True,
         straighten_pages=True,
         preserve_aspect_ratio=False,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     out = predictor(doc)
@@ -131,6 +135,8 @@ def test_trained_ocr_predictor(mock_payslip):
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
     # test hooks
     predictor.add_hook(_DummyCallback())
@@ -178,6 +184,8 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     if assume_straight_pages:
@@ -221,6 +229,8 @@ def test_trained_kie_predictor(mock_payslip):
         assume_straight_pages=True,
         straighten_pages=True,
         preserve_aspect_ratio=False,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
     # test hooks
     predictor.add_hook(_DummyCallback())
@@ -252,6 +262,8 @@ def test_trained_kie_predictor(mock_payslip):
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     out = predictor(doc)

diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
@@ -57,6 +57,8 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     if assume_straight_pages:
@@ -102,6 +104,8 @@ def test_trained_ocr_predictor(mock_payslip):
         assume_straight_pages=True,
         straighten_pages=True,
         preserve_aspect_ratio=False,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
     # test hooks
     predictor.add_hook(_DummyCallback())
@@ -132,6 +136,8 @@ def test_trained_ocr_predictor(mock_payslip):
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     out = predictor(doc)
@@ -174,6 +180,8 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
         straighten_pages=straighten_pages,
         detect_orientation=True,
         detect_language=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     if assume_straight_pages:
@@ -219,6 +227,8 @@ def test_trained_kie_predictor(mock_payslip):
         assume_straight_pages=True,
         straighten_pages=True,
         preserve_aspect_ratio=False,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
     # test hooks
     predictor.add_hook(_DummyCallback())
@@ -250,6 +260,8 @@ def test_trained_kie_predictor(mock_payslip):
         straighten_pages=True,
         preserve_aspect_ratio=True,
         symmetric_pad=True,
+        resolve_blocks=True,
+        resolve_lines=True,
     )
 
     out = predictor(doc)