Skip to content

Commit

Permalink
Merge pull request #1 from explosion/pre-release-fixes
Browse files Browse the repository at this point in the history
add unit tests in ci
  • Loading branch information
koaning authored Oct 5, 2023
2 parents b6a31a3 + dad0cc4 commit e27e796
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 5 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Unit Tests

on:
pull_request:
push:
branches:
- main

jobs:
setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: "pip" # caching pip dependencies

- name: Check out Prodigy
uses: actions/checkout@v3
with:
repository: explosion/prodigy
ref: v1.14.0
path: ./prodigy
ssh-key: ${{ secrets.GHA_PRODIGY_READ }}

- name: Install prodigy
run: |
ls -la
pip install ./prodigy
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e .
pip install ruff pytest
- name: Run ruff
if: always()
shell: bash
run: python -m ruff prodigy_pdf tests

- name: Run pytest
if: always()
shell: bash
run: python -m pytest tests
14 changes: 9 additions & 5 deletions prodigy_pdf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List
from typing import List
import base64
from io import BytesIO
from pathlib import Path
Expand All @@ -10,14 +10,16 @@
from prodigy.util import msg

def page_to_image(page: pdfium.PdfPage) -> str:
"""Turns a PdfPage into a base64 image for Prodigy"""
pil_image = page.render().to_pil()
buffered = BytesIO()
pil_image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue())
with BytesIO() as buffered:
pil_image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue())
return f"data:image/png;base64,{img_str.decode('utf-8')}"


def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
def generate_pdf_pages(pdf_paths: List[Path]):
"""Generate dictionaries that contain an image for each page in the PDF"""
for pdf_path in pdf_paths:
pdf = pdfium.PdfDocument(pdf_path)
n_pages = len(pdf)
Expand All @@ -30,6 +32,7 @@ def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
"pdf": pdf_path.parts[-1],
}
})
pdf.close()


@recipe(
Expand All @@ -47,6 +50,7 @@ def pdf_image_manual(
labels:str,
remove_base64:bool=False
) -> ControllerComponentsDict:
"""Turns pdfs into images in order to annotate them."""
# Read in stream as a list for progress bar.
if not pdf_folder.exists():
msg.fail(f"Folder `{pdf_folder}` does not exist.", exits=True)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.ruff]
line-length = 120
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ zip_safe = true
python_requires = >=3.8
install_requires =
pypdfium2==4.20.0
Pillow==9.4.0

[options.entry_points]
prodigy_recipes =
Expand Down

0 comments on commit e27e796

Please sign in to comment.