diff --git a/.conda/meta.yaml b/.conda/meta.yaml index 8ff2e0a9ad..473bfed23e 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -1,7 +1,8 @@ {% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %} {% set project = pyproject.get('project') %} {% set urls = pyproject.get('project', {}).get('urls') %} -{% set version = environ.get('BUILD_VERSION', '0.9.0a0') %} +{% set version = environ.get('BUILD_VERSION', '0.8.1a0') %} + package: name: {{ project.get('name') }} version: {{ version }} @@ -11,12 +12,11 @@ source: url: ../dist/{{ project.get('name') }}-{{ version }}.tar.gz build: - noarch: python script: python setup.py install --single-version-externally-managed --record=record.txt requirements: host: - - python>=3.8, <4.0 + - python>=3.8, <3.12 - setuptools run: @@ -40,9 +40,18 @@ requirements: - unidecode >=1.0.0 - tqdm >=4.30.0 +test: + requires: + - pip + - pytorch >=2.0.0, <3.0.0 + - torchvision + + imports: + - doctr + about: home: {{ urls.get('repository') }} - license: Apache 2.0 + license: Apache-2.0 license_file: {{ project.get('license', {}).get('file') }} summary: {{ project.get('description') | replace(":", " -")}} doc_url: {{ urls.get('documentation') }} diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 53f4643be2..e74b9bb698 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -58,7 +58,7 @@ jobs: with: auto-update-conda: true python-version: 3.8 - channels: pypdfium2-team,bblanchon,conda-forge + channels: pypdfium2-team,bblanchon,defaults,conda-forge channel-priority: strict - name: Install dependencies shell: bash -el {0} @@ -69,4 +69,4 @@ jobs: python setup.py sdist mkdir conda-dist conda build .conda/ --output-folder conda-dist - conda-verify conda-dist/noarch/*tar.bz2 --ignore=C1115 + conda-verify conda-dist/linux-64/*tar.bz2 --ignore=C1115 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 858fafe887..efc024fd21 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -74,7 +74,7 @@ jobs: with: auto-update-conda: true python-version: 3.8 - channels: pypdfium2-team,bblanchon,conda-forge + channels: pypdfium2-team,bblanchon,defaults,conda-forge channel-priority: strict - name: Install dependencies shell: bash -el {0} @@ -92,8 +92,8 @@ jobs: python setup.py sdist mkdir conda-dist conda build .conda/ --output-folder conda-dist - conda-verify conda-dist/noarch/*tar.bz2 --ignore=C1115 - anaconda upload conda-dist/noarch/*tar.bz2 + conda-verify conda-dist/linux-64/*tar.bz2 --ignore=C1115 + anaconda upload conda-dist/linux-64/*tar.bz2 conda-check: if: "!github.event.release.prerelease" @@ -108,4 +108,6 @@ jobs: shell: bash -el {0} run: | conda config --set channel_priority strict - conda install -c techMindee -c pypdfium2-team -c bblanchon -c conda-forge python-doctr + conda install pytorch torchvision torchaudio cpuonly -c pytorch + conda install -c techMindee -c pypdfium2-team -c bblanchon -c defaults -c conda-forge python-doctr + python -c "import doctr; print(doctr.__version__)" diff --git a/README.md b/README.md index 3c92c827c5..057109c656 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
- +
[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) @@ -12,7 +12,7 @@ What you can expect from this repository: - efficient ways to parse textual information (localize and identify each word) from your documents - guidance on how to integrate this in your current architecture -![OCR_example](docs/images/ocr.png) +![OCR_example](https://github.com/mindee/doctr/raw/main/docs/images/ocr.png) ## Quick Tour @@ -78,7 +78,7 @@ To interpret your model's predictions, you can visualize them interactively as f result.show() ``` -![Visualization sample](docs/images/doctr_example_script.gif) +![Visualization sample](https://github.com/mindee/doctr/raw/main/docs/images/doctr_example_script.gif) Or even rebuild the original document from its predictions: @@ -89,7 +89,7 @@ synthetic_pages = result.synthesize() plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show() ``` -![Synthesis sample](docs/images/synthesized_sample.png) +![Synthesis sample](https://github.com/mindee/doctr/raw/main/docs/images/synthesized_sample.png) The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`). To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure): @@ -128,7 +128,7 @@ The KIE predictor results per page are in a dictionary format with each key repr ### If you are looking for support from the Mindee team -[![Bad OCR test detection image asking the developer if they need help](docs/images/doctr-need-help.png)](https://mindee.com/product/doctr) +[![Bad OCR test detection image asking the developer if they need help](https://github.com/mindee/doctr/raw/main/docs/images/doctr-need-help.png)](https://mindee.com/product/doctr) ## Installation @@ -217,7 +217,7 @@ The full package documentation is available [here](https://mindee.github.io/doct A minimal demo app is provided for you to play with our end-to-end OCR models! -![Demo app](docs/images/demo_update.png) +![Demo app](https://github.com/mindee/doctr/raw/main/docs/images/demo_update.png) #### Live demo @@ -257,11 +257,11 @@ USE_TORCH=1 streamlit run demo/app.py Instead of having your demo actually running Python, you would prefer to run everything in your web browser? Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started! -![TFJS demo](docs/images/demo_illustration_mini.png) +![TFJS demo](https://github.com/mindee/doctr/raw/main/docs/images/demo_illustration_mini.png) ### Docker container -[We offers Docker container support for easy testing and deployment](https://github.com/mindee/doctr/packages). +[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr). #### Using GPU with docTR Docker Images @@ -377,8 +377,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way? -You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](CONTRIBUTING.md)) for you to easily do so! +You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so! ## License -Distributed under the Apache 2.0 License. See [`LICENSE`](LICENSE) for more information. +Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information. diff --git a/api/pyproject.toml b/api/pyproject.toml index a101c4a436..a3c4134d00 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api" [tool.poetry] name = "doctr-api" -version = "0.9.0a0" +version = "0.8.1a0" description = "Backend template for your OCR API with docTR" authors = ["Mindee