Skip to content

Commit

Permalink
chore: run make tidy on api files (#253)
Browse files Browse the repository at this point in the history
These were excluded from linting when they were autogenerated. We need
to tidy these files, otherwise we'll get blocked by our internal deploy
pipeline.

Note that this should replace the current 0.0.47 candidate.
  • Loading branch information
awalker4 authored Sep 25, 2023
1 parent 2ace6f5 commit a20e01c
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ check: check-src check-tests check-version
## check-src: runs linters (source only, no tests)
.PHONY: check-src
check-src:
black --line-length 100 ${PACKAGE_NAME} --check --exclude ${PACKAGE_NAME}/api
black --line-length 100 ${PACKAGE_NAME} --check
flake8 ${PACKAGE_NAME}
mypy ${PACKAGE_NAME} --ignore-missing-imports --install-types --non-interactive --implicit-optional

Expand All @@ -114,7 +114,7 @@ check-tests:
## tidy: run black
.PHONY: tidy
tidy:
black --line-length 100 ${PACKAGE_NAME} --exclude ${PACKAGE_NAME}/api
black --line-length 100 ${PACKAGE_NAME}
black --line-length 100 test_${PIPELINE_PACKAGE} scripts/smoketest.py

## check-scripts: run shellcheck
Expand Down
11 changes: 3 additions & 8 deletions prepline_general/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@
async def http_error_handler(request: Request, e: HTTPException):
logger.error(e.detail)

return JSONResponse(
status_code=e.status_code,
content={"detail": e.detail}
)
return JSONResponse(status_code=e.status_code, content={"detail": e.detail})


# Note(austin) - Convert any other errors to HTTPException
Expand All @@ -48,10 +45,7 @@ async def error_handler(request: Request, e: Exception):

logger.error(trace)

error = HTTPException(
status_code=500,
detail=str(e)
)
error = HTTPException(status_code=500, detail=str(e))

return await http_error_handler(request, error)

Expand Down Expand Up @@ -85,6 +79,7 @@ def filter(self, record: logging.LogRecord) -> bool:
logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
logging.getLogger("uvicorn.access").addFilter(MetricsCheckFilter())


@app.get("/healthcheck", status_code=status.HTTP_200_OK, include_in_schema=False)
def healthcheck(request: Request):
return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"}
33 changes: 20 additions & 13 deletions prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,11 @@ def pipeline_api(
# This will raise if the file is encrypted
pdf.metadata
except pypdf.errors.EmptyFileError:
raise HTTPException(
status_code=400, detail=f"File does not appear to be a valid PDF"
)
raise HTTPException(status_code=400, detail="File does not appear to be a valid PDF")
except pypdf.errors.FileNotDecryptedError:
raise HTTPException(
status_code=400,
detail=f"File is encrypted. Please decrypt it with password.",
detail="File is encrypted. Please decrypt it with password.",
)

strategy = (m_strategy[0] if len(m_strategy) else "auto").lower()
Expand Down Expand Up @@ -332,19 +330,30 @@ def pipeline_api(
m_skip_infer_table_types[0] if len(m_skip_infer_table_types) else ["pdf", "jpg", "png"]
)

chunking_strategy = (m_chunking_strategy[0].lower() if len(m_chunking_strategy) else None)
chunking_strategy = m_chunking_strategy[0].lower() if len(m_chunking_strategy) else None
chunk_strategies = ["by_title"]
if chunking_strategy and (chunking_strategy not in chunk_strategies):
raise HTTPException(
status_code=400, detail=f"Invalid chunking strategy: {chunking_strategy}. Must be one of {chunk_strategies}"
status_code=400,
detail=f"Invalid chunking strategy: {chunking_strategy}. Must be one of {chunk_strategies}",
)

multipage_sections_str = (m_multipage_sections[0] if len(m_multipage_sections) else "false").lower()

multipage_sections_str = (
m_multipage_sections[0] if len(m_multipage_sections) else "false"
).lower()
multipage_sections = multipage_sections_str == "true"

combine_under_n_chars = (int(m_combine_under_n_chars[0]) if m_combine_under_n_chars and m_combine_under_n_chars[0].isdigit() else 500)
combine_under_n_chars = (
int(m_combine_under_n_chars[0])
if m_combine_under_n_chars and m_combine_under_n_chars[0].isdigit()
else 500
)

new_after_n_chars = (int(m_new_after_n_chars[0]) if m_new_after_n_chars and m_new_after_n_chars[0].isdigit() else 1500)
new_after_n_chars = (
int(m_new_after_n_chars[0])
if m_new_after_n_chars and m_new_after_n_chars[0].isdigit()
else 1500
)

try:
logger.debug(
Expand Down Expand Up @@ -477,9 +486,7 @@ def get_validated_mimetype(file):
if content_type not in allowed_mimetypes:
raise HTTPException(
status_code=400,
detail=(
f"File type {content_type} is not supported."
),
detail=(f"File type {content_type} is not supported."),
)

return content_type
Expand Down

0 comments on commit a20e01c

Please sign in to comment.