From b7c669995a5ce53e8ffadade8d317cda343a369d Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 8 Aug 2024 13:43:21 +0200 Subject: [PATCH] update readme & fix mypy & change resolve blocks default --- README.md | 7 ++++--- api/app/schemas.py | 2 +- docs/source/using_doctr/using_models.rst | 2 +- doctr/models/builder.py | 2 +- doctr/utils/fonts.py | 12 +++++++----- tests/pytorch/test_models_zoo_pt.py | 12 ++++++++++++ tests/tensorflow/test_models_zoo_tf.py | 12 ++++++++++++ 7 files changed, 38 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d82ef8344b..2fc92971ff 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@

-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.1-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) +[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch** @@ -154,6 +154,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra pip install "python-doctr[tf]" # for PyTorch pip install "python-doctr[torch]" +# optional dependencies for visualization, html, and contrib modules can be installed as follows: +pip install "python-doctr[torch,viz,html,contib]" ``` For MacBooks with M1 chip, you will need some additional packages or specific versions: @@ -342,14 +344,13 @@ Your API should now be running locally on your port 8002. Access your automatica ```python import requests -headers = {"accept": "application/json"} params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"} with open('/path/to/your/doc.jpg', 'rb') as f: files = [ # application/pdf, image/jpeg, image/png supported ("files", ("doc.jpg", f.read(), "image/jpeg")), ] -print(requests.post("http://localhost:8080/ocr", headers=headers, params=params, files=files).json()) +print(requests.post("http://localhost:8080/ocr", params=params, files=files).json()) ``` ### Example notebooks diff --git a/api/app/schemas.py b/api/app/schemas.py index 4f0c0d8d4f..6f4085a294 100644 --- a/api/app/schemas.py +++ b/api/app/schemas.py @@ -25,7 +25,7 @@ class KIEIn(BaseModel): class OCRIn(KIEIn, BaseModel): resolve_lines: bool = Field(default=True, examples=[True]) - resolve_blocks: bool = Field(default=True, examples=[True]) + resolve_blocks: bool = Field(default=False, examples=[False]) paragraph_break: float = Field(default=0.0035, examples=[0.0035]) diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst index 3318711f45..0524169afa 100644 --- a/docs/source/using_doctr/using_models.rst +++ b/docs/source/using_doctr/using_models.rst @@ -311,7 +311,7 @@ Additionally, you can change the batch size of the underlying detection and reco To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: * `resolve_lines`: whether words should be automatically grouped into lines (default: True) -* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: False) * `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) For example to disable the automatic grouping of lines into blocks: diff --git a/doctr/models/builder.py b/doctr/models/builder.py index e6ec53be28..4773404dec 100644 --- a/doctr/models/builder.py +++ b/doctr/models/builder.py @@ -31,7 +31,7 @@ class DocumentBuilder(NestedObject): def __init__( self, resolve_lines: bool = True, - resolve_blocks: bool = True, + resolve_blocks: bool = False, paragraph_break: float = 0.035, export_as_straight_boxes: bool = False, ) -> None: diff --git a/doctr/utils/fonts.py b/doctr/utils/fonts.py index a6f0772f68..ecd3e377db 100644 --- a/doctr/utils/fonts.py +++ b/doctr/utils/fonts.py @@ -5,14 +5,16 @@ import logging import platform -from typing import Optional +from typing import Optional, Union from PIL import ImageFont __all__ = ["get_font"] -def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFont.ImageFont: +def get_font( + font_family: Optional[str] = None, font_size: int = 13 +) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]: """Resolves a compatible ImageFont for the system Args: @@ -28,14 +30,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon if font_family is None: try: font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size) - except OSError: - font = ImageFont.load_default() + except OSError: # pragma: no cover + font = ImageFont.load_default() # type: ignore[assignment] logging.warning( "unable to load recommended font family. Loading default PIL font," "font size issues may be expected." "To prevent this, it is recommended to specify the value of 'font_family'." ) - else: + else: # pragma: no cover font = ImageFont.truetype(font_family, font_size) return font diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index 76136e6d84..0cac9724ee 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -60,6 +60,8 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, + resolve_blocks=True, + resolve_lines=True, ) if assume_straight_pages: @@ -103,6 +105,8 @@ def test_trained_ocr_predictor(mock_payslip): assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=False, + resolve_blocks=True, + resolve_lines=True, ) out = predictor(doc) @@ -131,6 +135,8 @@ def test_trained_ocr_predictor(mock_payslip): straighten_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, + resolve_blocks=True, + resolve_lines=True, ) # test hooks predictor.add_hook(_DummyCallback()) @@ -178,6 +184,8 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, + resolve_blocks=True, + resolve_lines=True, ) if assume_straight_pages: @@ -221,6 +229,8 @@ def test_trained_kie_predictor(mock_payslip): assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=False, + resolve_blocks=True, + resolve_lines=True, ) # test hooks predictor.add_hook(_DummyCallback()) @@ -252,6 +262,8 @@ def test_trained_kie_predictor(mock_payslip): straighten_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, + resolve_blocks=True, + resolve_lines=True, ) out = predictor(doc) diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py index b612d72db1..f20cb21f5c 100644 --- a/tests/tensorflow/test_models_zoo_tf.py +++ b/tests/tensorflow/test_models_zoo_tf.py @@ -57,6 +57,8 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, + resolve_blocks=True, + resolve_lines=True, ) if assume_straight_pages: @@ -102,6 +104,8 @@ def test_trained_ocr_predictor(mock_payslip): assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=False, + resolve_blocks=True, + resolve_lines=True, ) # test hooks predictor.add_hook(_DummyCallback()) @@ -132,6 +136,8 @@ def test_trained_ocr_predictor(mock_payslip): straighten_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, + resolve_blocks=True, + resolve_lines=True, ) out = predictor(doc) @@ -174,6 +180,8 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, + resolve_blocks=True, + resolve_lines=True, ) if assume_straight_pages: @@ -219,6 +227,8 @@ def test_trained_kie_predictor(mock_payslip): assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=False, + resolve_blocks=True, + resolve_lines=True, ) # test hooks predictor.add_hook(_DummyCallback()) @@ -250,6 +260,8 @@ def test_trained_kie_predictor(mock_payslip): straighten_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, + resolve_blocks=True, + resolve_lines=True, ) out = predictor(doc)