Skip to content

Commit

Permalink
update with included config
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed Apr 10, 2024
1 parent 73e3faa commit f0412ec
Show file tree
Hide file tree
Showing 13 changed files with 416 additions and 130 deletions.
91 changes: 73 additions & 18 deletions api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,22 @@ should yield
```json
[
{
"name": "invitation.png",
"boxes": [
[0.50390625, 0.712890625, 0.5185546875, 0.720703125],
[0.4716796875, 0.712890625, 0.48828125, 0.720703125]
"name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
"geometries": [
[
0.724609375,
0.1787109375,
0.7900390625,
0.2080078125
],
[
0.6748046875,
0.1796875,
0.7314453125,
0.20703125
]
},
]
}
]
```

Expand All @@ -73,9 +83,10 @@ should yield
```json
[
{
"name": "invitation.png",
"value": "invite"
},
"name": "117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg",
"value": "invite",
"confidence": 1.0
}
]
```

Expand All @@ -98,17 +109,61 @@ should yield
```json
[
{
"name": "hello_world.jpg",
"items": [
"name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
"orientation": {
"value": 0,
"confidence": null
},
"language": {
"value": null,
"confidence": null
},
"items": [
{
"value": "Hello",
"box": [0.005859375, 0.003312938981562763, 0.0205078125, 0.0332854340430202]
},
{
"value": "world!",
"box": [0.005859375, 0.003312938981562763, 0.0205078125, 0.0332854340430202]
},
],
"blocks": [
{
"geometry": [
0.7471996155154171,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"lines": [
{
"geometry": [
0.7471996155154171,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"words": [
{
"value": "Hello",
"geometry": [
0.7471996155154171,
0.1796875,
0.8272978149561669,
0.20703125
],
"confidence": 1.0
},
{
"value": "world!",
"geometry": [
0.8176307908857315,
0.1787109375,
0.9101580212741838,
0.2080078125
],
"confidence": 1.0
}
]
}
]
}
]
}
]
}
]
```
40 changes: 19 additions & 21 deletions api/app/routes/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,31 @@

from typing import List

from fastapi import APIRouter, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import DetectionOut
from app.vision import det_predictor
from app.schemas import DetectionIn, DetectionOut
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor
from doctr.file_utils import CLASS_NAME
from doctr.io import DocumentFile

router = APIRouter()


@router.post("/", response_model=List[DetectionOut], status_code=status.HTTP_200_OK, summary="Perform text detection")
async def text_detection(files: List[UploadFile] = [File(...)]):
async def text_detection(request: DetectionIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 19 in api/app/routes/detection.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/detection.py#L19

as argument
"""Runs docTR text detection model to analyze the input image"""
boxes: List[DetectionOut] = []
for file in files:
mime_type = file.content_type
if mime_type in ["image/jpeg", "image/png"]:
content = DocumentFile.from_images([await file.read()])
elif mime_type == "application/pdf":
content = DocumentFile.from_pdf(await file.read())
else:
raise HTTPException(status_code=400, detail=f"Unsupported file format for detection endpoint: {mime_type}")

boxes.append(
DetectionOut(
name=file.filename or "", boxes=[box.tolist() for box in det_predictor(content)[0][CLASS_NAME][:, :-1]]
)
try:
predictor = init_predictor(request)
content, filenames = await get_documents(files)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

return [
DetectionOut(
name=filename,
geometries=[
geom[:-1].tolist() if len(geom) == 5 else resolve_geometry(geom.tolist()) for geom in doc[CLASS_NAME]
],
)

return boxes
for doc, filename in zip(predictor(content), filenames)
]
62 changes: 32 additions & 30 deletions api/app/routes/kie.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,47 @@

from typing import List

from fastapi import APIRouter, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import KIEElement, KIEOut
from app.vision import kie_predictor
from doctr.io import DocumentFile
from app.schemas import KIEElement, KIEIn, KIEOut
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor

router = APIRouter()


@router.post("/", response_model=List[KIEOut], status_code=status.HTTP_200_OK, summary="Perform KIE")
async def perform_kie(files: List[UploadFile] = [File(...)]):
async def perform_kie(request: KIEIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 18 in api/app/routes/kie.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/kie.py#L18

as argument
"""Runs docTR KIE model to analyze the input image"""
results: List[KIEOut] = []
for file in files:
mime_type = file.content_type
if mime_type in ["image/jpeg", "image/png"]:
content = DocumentFile.from_images([await file.read()])
elif mime_type == "application/pdf":
content = DocumentFile.from_pdf(await file.read())
else:
raise HTTPException(status_code=400, detail=f"Unsupported file format for KIE endpoint: {mime_type}")

out = kie_predictor(content)

for page in out.pages:
results.append(
KIEOut(
name=file.filename or "",
predictions=[
KIEElement(
class_name=class_name,
items=[
dict(value=prediction.value, box=(*prediction.geometry[0], *prediction.geometry[1]))
for prediction in page.predictions[class_name]
],
try:
predictor = init_predictor(request)
content, filenames = await get_documents(files)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

out = predictor(content)

results = [
KIEOut(
name=filenames[i],
orientation=page.orientation,
language=page.language,
predictions=[
KIEElement(
class_name=class_name,
items=[
dict(
value=prediction.value,
geometry=resolve_geometry(prediction.geometry),
confidence=round(prediction.confidence, 2),
)
for class_name in page.predictions.keys()
for prediction in page.predictions[class_name]
],
)
)
for class_name in page.predictions.keys()
],
)
for i, page in enumerate(out.pages)
]

return results
70 changes: 44 additions & 26 deletions api/app/routes/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,58 @@

from typing import List

from fastapi import APIRouter, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import OCROut
from app.vision import predictor
from doctr.io import DocumentFile
from app.schemas import OCRBlock, OCRIn, OCRLine, OCROut, OCRPage, OCRWord
from app.utils import get_documents, resolve_geometry
from app.vision import init_predictor

router = APIRouter()


@router.post("/", response_model=List[OCROut], status_code=status.HTTP_200_OK, summary="Perform OCR")
async def perform_ocr(files: List[UploadFile] = [File(...)]):
async def perform_ocr(request: OCRIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 18 in api/app/routes/ocr.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/ocr.py#L18

as argument
"""Runs docTR OCR model to analyze the input image"""
results: List[OCROut] = []
for file in files:
mime_type = file.content_type
if mime_type in ["image/jpeg", "image/png"]:
content = DocumentFile.from_images([await file.read()])
elif mime_type == "application/pdf":
content = DocumentFile.from_pdf(await file.read())
else:
raise HTTPException(status_code=400, detail=f"Unsupported file format for OCR endpoint: {mime_type}")

out = predictor(content)
for page in out.pages:
results.append(
OCROut(
name=file.filename or "",
items=[
dict(value=word.value, box=(*word.geometry[0], *word.geometry[1]))
try:
# generator object to list
content, filenames = await get_documents(files)
predictor = init_predictor(request)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

out = predictor(content)

results = [
OCROut(
name=filenames[i],
orientation=page.orientation,
language=page.language,
items=[
OCRPage(
blocks=[
OCRBlock(
geometry=resolve_geometry(block.geometry),
lines=[
OCRLine(
geometry=resolve_geometry(line.geometry),
words=[
OCRWord(
value=word.value,
geometry=resolve_geometry(word.geometry),
confidence=round(word.confidence, 2),
)
for word in line.words
],
)
for line in block.lines
],
)
for block in page.blocks
for line in block.lines
for word in line.words
],
]
)
)
],
)
for i, page in enumerate(out.pages)
]

return results
32 changes: 14 additions & 18 deletions api/app/routes/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,26 @@

from typing import List

from fastapi import APIRouter, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status

from app.schemas import RecognitionOut
from app.vision import reco_predictor
from doctr.io import DocumentFile
from app.schemas import RecognitionIn, RecognitionOut
from app.utils import get_documents
from app.vision import init_predictor

router = APIRouter()


@router.post(
"/", response_model=List[RecognitionOut], status_code=status.HTTP_200_OK, summary="Perform text recognition"
)
async def text_recognition(files: List[UploadFile] = [File(...)]):
async def text_recognition(request: RecognitionIn = Depends(), files: List[UploadFile] = [File(...)]):

Check warning on line 20 in api/app/routes/recognition.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

api/app/routes/recognition.py#L20

as argument
"""Runs docTR text recognition model to analyze the input image"""
words: List[RecognitionOut] = []
for file in files:
mime_type = file.content_type
if mime_type in ["image/jpeg", "image/png"]:
content = DocumentFile.from_images([await file.read()])
else:
raise HTTPException(
status_code=400, detail=f"Unsupported file format for recognition endpoint: {mime_type}"
)

words.append(RecognitionOut(name=file.filename or "", value=reco_predictor(content)[0][0]))

return words
try:
predictor = init_predictor(request)
content, filenames = await get_documents(files)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return [
RecognitionOut(name=filename, value=res[0], confidence=round(res[1], 2))
for res, filename in zip(predictor(content), filenames)
]
Loading

0 comments on commit f0412ec

Please sign in to comment.