diff --git a/Makefile b/Makefile index 77a2a15c..a1b21468 100644 --- a/Makefile +++ b/Makefile @@ -102,7 +102,7 @@ check: check-src check-tests check-version ## check-src: runs linters (source only, no tests) .PHONY: check-src check-src: - black --line-length 100 ${PACKAGE_NAME} --check --exclude ${PACKAGE_NAME}/api + black --line-length 100 ${PACKAGE_NAME} --check flake8 ${PACKAGE_NAME} mypy ${PACKAGE_NAME} --ignore-missing-imports --install-types --non-interactive --implicit-optional @@ -114,7 +114,7 @@ check-tests: ## tidy: run black .PHONY: tidy tidy: - black --line-length 100 ${PACKAGE_NAME} --exclude ${PACKAGE_NAME}/api + black --line-length 100 ${PACKAGE_NAME} black --line-length 100 test_${PIPELINE_PACKAGE} scripts/smoketest.py ## check-scripts: run shellcheck diff --git a/prepline_general/api/app.py b/prepline_general/api/app.py index 9220f4ea..fab5381d 100644 --- a/prepline_general/api/app.py +++ b/prepline_general/api/app.py @@ -29,10 +29,7 @@ async def http_error_handler(request: Request, e: HTTPException): logger.error(e.detail) - return JSONResponse( - status_code=e.status_code, - content={"detail": e.detail} - ) + return JSONResponse(status_code=e.status_code, content={"detail": e.detail}) # Note(austin) - Convert any other errors to HTTPException @@ -48,10 +45,7 @@ async def error_handler(request: Request, e: Exception): logger.error(trace) - error = HTTPException( - status_code=500, - detail=str(e) - ) + error = HTTPException(status_code=500, detail=str(e)) return await http_error_handler(request, error) @@ -85,6 +79,7 @@ def filter(self, record: logging.LogRecord) -> bool: logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter()) logging.getLogger("uvicorn.access").addFilter(MetricsCheckFilter()) + @app.get("/healthcheck", status_code=status.HTTP_200_OK, include_in_schema=False) def healthcheck(request: Request): return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} diff --git a/prepline_general/api/general.py b/prepline_general/api/general.py index 58d809da..aa614cf9 100644 --- a/prepline_general/api/general.py +++ b/prepline_general/api/general.py @@ -277,13 +277,11 @@ def pipeline_api( # This will raise if the file is encrypted pdf.metadata except pypdf.errors.EmptyFileError: - raise HTTPException( - status_code=400, detail=f"File does not appear to be a valid PDF" - ) + raise HTTPException(status_code=400, detail="File does not appear to be a valid PDF") except pypdf.errors.FileNotDecryptedError: raise HTTPException( status_code=400, - detail=f"File is encrypted. Please decrypt it with password.", + detail="File is encrypted. Please decrypt it with password.", ) strategy = (m_strategy[0] if len(m_strategy) else "auto").lower() @@ -332,19 +330,30 @@ def pipeline_api( m_skip_infer_table_types[0] if len(m_skip_infer_table_types) else ["pdf", "jpg", "png"] ) - chunking_strategy = (m_chunking_strategy[0].lower() if len(m_chunking_strategy) else None) + chunking_strategy = m_chunking_strategy[0].lower() if len(m_chunking_strategy) else None chunk_strategies = ["by_title"] if chunking_strategy and (chunking_strategy not in chunk_strategies): raise HTTPException( - status_code=400, detail=f"Invalid chunking strategy: {chunking_strategy}. Must be one of {chunk_strategies}" + status_code=400, + detail=f"Invalid chunking strategy: {chunking_strategy}. Must be one of {chunk_strategies}", ) - - multipage_sections_str = (m_multipage_sections[0] if len(m_multipage_sections) else "false").lower() + + multipage_sections_str = ( + m_multipage_sections[0] if len(m_multipage_sections) else "false" + ).lower() multipage_sections = multipage_sections_str == "true" - combine_under_n_chars = (int(m_combine_under_n_chars[0]) if m_combine_under_n_chars and m_combine_under_n_chars[0].isdigit() else 500) + combine_under_n_chars = ( + int(m_combine_under_n_chars[0]) + if m_combine_under_n_chars and m_combine_under_n_chars[0].isdigit() + else 500 + ) - new_after_n_chars = (int(m_new_after_n_chars[0]) if m_new_after_n_chars and m_new_after_n_chars[0].isdigit() else 1500) + new_after_n_chars = ( + int(m_new_after_n_chars[0]) + if m_new_after_n_chars and m_new_after_n_chars[0].isdigit() + else 1500 + ) try: logger.debug( @@ -477,9 +486,7 @@ def get_validated_mimetype(file): if content_type not in allowed_mimetypes: raise HTTPException( status_code=400, - detail=( - f"File type {content_type} is not supported." - ), + detail=(f"File type {content_type} is not supported."), ) return content_type