Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add unit tests in ci #1

Merged
merged 8 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Unit Tests

on:
pull_request:
push:
branches:
- main

jobs:
setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: "pip" # caching pip dependencies

- name: Check out Prodigy
uses: actions/checkout@v3
with:
repository: explosion/prodigy
ref: v1.14.0
path: ./prodigy
ssh-key: ${{ secrets.GHA_PRODIGY_READ }}

- name: Install prodigy
run: |
ls -la
pip install ./prodigy

- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e .
pip install ruff pytest

- name: Run ruff
if: always()
shell: bash
run: python -m ruff prodigy_pdf tests

- name: Run pytest
if: always()
shell: bash
run: python -m pytest tests
14 changes: 9 additions & 5 deletions prodigy_pdf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List
from typing import List
import base64
from io import BytesIO
from pathlib import Path
Expand All @@ -10,14 +10,16 @@
from prodigy.util import msg

def page_to_image(page: pdfium.PdfPage) -> str:
"""Turns a PdfPage into a base64 image for Prodigy"""
pil_image = page.render().to_pil()
buffered = BytesIO()
pil_image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue())
with BytesIO() as buffered:
pil_image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue())
return f"data:image/png;base64,{img_str.decode('utf-8')}"


def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
def generate_pdf_pages(pdf_paths: List[Path]):
"""Generate dictionaries that contain an image for each page in the PDF"""
for pdf_path in pdf_paths:
pdf = pdfium.PdfDocument(pdf_path)
n_pages = len(pdf)
Expand All @@ -30,6 +32,7 @@ def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
"pdf": pdf_path.parts[-1],
}
})
pdf.close()


@recipe(
Expand All @@ -47,6 +50,7 @@ def pdf_image_manual(
labels:str,
remove_base64:bool=False
) -> ControllerComponentsDict:
"""Turns pdfs into images in order to annotate them."""
# Read in stream as a list for progress bar.
if not pdf_folder.exists():
msg.fail(f"Folder `{pdf_folder}` does not exist.", exits=True)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.ruff]
line-length = 120
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ zip_safe = true
python_requires = >=3.8
install_requires =
pypdfium2==4.20.0
Pillow==9.4.0

[options.entry_points]
prodigy_recipes =
Expand Down