From b190fca7caf609d76915eb76d30802fb5de05d36 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Thu, 5 Sep 2024 17:07:05 -0400 Subject: [PATCH] address comments --- .../Dockerfile | 10 +-- .../build-docker.sh | 2 +- .../environment.yml | 14 ---- ... extract_pdbids_drugbank_xsdata_0@1@0.cwl} | 20 +---- .../out.txt | 3 - .../pyproject.toml | 31 ++++--- .../__main__.py | 84 ++++++++++--------- .../test_extract_pdbids_drugbank_xsdata.py | 37 +++++++- 8 files changed, 107 insertions(+), 94 deletions(-) delete mode 100644 utils/extract-pdbids-drugbank-xsdata-plugin/environment.yml rename utils/extract-pdbids-drugbank-xsdata-plugin/{extract_pdbids_drugbank_xsdata.cwl => extract_pdbids_drugbank_xsdata_0@1@0.cwl} (87%) delete mode 100644 utils/extract-pdbids-drugbank-xsdata-plugin/out.txt diff --git a/utils/extract-pdbids-drugbank-xsdata-plugin/Dockerfile b/utils/extract-pdbids-drugbank-xsdata-plugin/Dockerfile index b463afa..ed71855 100644 --- a/utils/extract-pdbids-drugbank-xsdata-plugin/Dockerfile +++ b/utils/extract-pdbids-drugbank-xsdata-plugin/Dockerfile @@ -14,14 +14,12 @@ COPY README.md ${EXEC_DIR} COPY CHANGELOG.md ${EXEC_DIR} # Install needed packages here -# errors installing rdkit from poetry so using conda -COPY environment.yml ${EXEC_DIR} -RUN mamba env create -f ${EXEC_DIR}/environment.yml -RUN echo "source activate project_env" > ~/.bashrc -ENV PATH /opt/conda/envs/env/bin:$PATH COPY src ${EXEC_DIR}/src +ADD Dockerfile . + RUN pip3 install ${EXEC_DIR} --no-cache-dir +#RUN pip3 install -i https://test.pypi.org/simple/ drugbank-schemas -CMD ["--help"] +#CMD ["--help"] diff --git a/utils/extract-pdbids-drugbank-xsdata-plugin/build-docker.sh b/utils/extract-pdbids-drugbank-xsdata-plugin/build-docker.sh index b867d77..69f52ad 100755 --- a/utils/extract-pdbids-drugbank-xsdata-plugin/build-docker.sh +++ b/utils/extract-pdbids-drugbank-xsdata-plugin/build-docker.sh @@ -1,4 +1,4 @@ #!/bin/bash version=$(", "Brandon Walker "] readme = "README.md" packages = [{include = "polus", from = "src"}] [tool.poetry.dependencies] -python = ">=3.9,<3.12" +python = ">=3.9,<3.13" typer = "^0.7.0" -cwltool = "3.1.20240404144621" +sophios = "0.1.4" +pandas = "2.2.2" +rdkit = "2024.3.5" + +# Specifying xsdata-pydantic with extra options +xsdata-pydantic = { version = "*", extras = ["cli", "lxml", "soap"] } + + +# Specifying additional index and package +drugbank-schemas = { version = "0.1.4", source = "test-pypi"} + +# Ensure the correct priority for the additional index +[[tool.poetry.source]] +name = "test-pypi" +url = "https://test.pypi.org/simple/" +priority = "explicit" [tool.poetry.group.dev.dependencies] bump2version = "^1.0.1" @@ -19,18 +34,10 @@ pre-commit = "^3.2.1" black = "^23.3.0" mypy = "^1.1.1" ruff = "^0.3.0" -drugbank_schemas = { version = "0.1.4", source = "testpypi" } - -[[tool.poetry.source]] -name = "testpypi" -url = "https://test.pypi.org/simple/" -secondary = true [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.pytest.ini_options] -pythonpath = [ - "." -] +pythonpath = ["."] diff --git a/utils/extract-pdbids-drugbank-xsdata-plugin/src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__main__.py b/utils/extract-pdbids-drugbank-xsdata-plugin/src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__main__.py index 2f8d6d6..aebed4d 100644 --- a/utils/extract-pdbids-drugbank-xsdata-plugin/src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__main__.py +++ b/utils/extract-pdbids-drugbank-xsdata-plugin/src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__main__.py @@ -1,10 +1,10 @@ """Package entrypoint for the extract_pdbids_drugbank_xsdata package.""" # Base packages +import argparse import logging from os import environ -import typer from polus.mm.utils.extract_pdbids_drugbank_xsdata.extract_pdbids_drugbank_xsdata import ( # noqa: E501 extract_pdbids_drugbank_xsdata, ) @@ -17,52 +17,60 @@ logger = logging.getLogger("polus.mm.utils.extract_pdbids_drugbank_xsdata.") logger.setLevel(POLUS_LOG) -app = typer.Typer(help="extract_pdbids_drugbank_xsdata.") + +def main(args: argparse.Namespace) -> None: + """extract_pdbids_drugbank_xsdata.""" + logger.info(f"drugbank_xml_file_path: {args.drugbank_xml_file_path}") + logger.info(f"smiles: {args.smiles}") + logger.info(f"inchi: {args.inchi}") + logger.info(f"inchi_keys: {args.inchi_keys}") + logger.info(f"output_txt_path: {args.output_txt_path}") + + extract_pdbids_drugbank_xsdata( + drugbank_xml_file_path=args.drugbank_xml_file_path, + smiles=args.smiles, + inchi=args.inchi, + inchi_keys=args.inchi_keys, + output_txt_path=args.output_txt_path, + ) -@app.command() -def main( - drugbank_xml_file_path: str = typer.Option( - ..., +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="extract_pdbids_drugbank_xsdata.") + + parser.add_argument( "--drugbank_xml_file_path", + type=str, + required=True, help="Path to the Drugbank xml file", - ), - smiles: list[str] = typer.Option( - ..., + ) + parser.add_argument( "--smiles", + type=str, + nargs="+", + required=True, help="List of input SMILES, Type string[], File type input", - ), - inchi: list[str] = typer.Option( - ..., + ) + parser.add_argument( "--inchi", - help="List of input SMILES, Type string[], File type input", - ), - inchi_keys: list[str] = typer.Option( - ..., + type=str, + nargs="+", + required=True, + help="List of input InChI, Type string[], File type input", + ) + parser.add_argument( "--inchi_keys", - help="List of input SMILES, Type string[], File type input", - ), - output_txt_path: str = typer.Option( - ..., + type=str, + nargs="+", + required=True, + help="List of input InChI keys, Type string[], File type input", + ) + parser.add_argument( "--output_txt_path", + type=str, + required=True, help="Path to the text dataset file, Type string, File type output", - ), -) -> None: - """extract_pdbids_drugbank_xsdata.""" - logger.info(f"drugbank_xml_file_path: {drugbank_xml_file_path}") - logger.info(f"smiles: {smiles}") - logger.info(f"inchi: {inchi}") - logger.info(f"inchi_keys: {inchi_keys}") - logger.info(f"output_txt_path: {output_txt_path}") - - extract_pdbids_drugbank_xsdata( - drugbank_xml_file_path=drugbank_xml_file_path, - smiles=smiles, - inchi=inchi, - inchi_keys=inchi_keys, - output_txt_path=output_txt_path, ) - -if __name__ == "__main__": - app() + args = parser.parse_args() + main(args) diff --git a/utils/extract-pdbids-drugbank-xsdata-plugin/tests/test_extract_pdbids_drugbank_xsdata.py b/utils/extract-pdbids-drugbank-xsdata-plugin/tests/test_extract_pdbids_drugbank_xsdata.py index 97e5a18..85db341 100644 --- a/utils/extract-pdbids-drugbank-xsdata-plugin/tests/test_extract_pdbids_drugbank_xsdata.py +++ b/utils/extract-pdbids-drugbank-xsdata-plugin/tests/test_extract_pdbids_drugbank_xsdata.py @@ -4,6 +4,8 @@ from polus.mm.utils.extract_pdbids_drugbank_xsdata.extract_pdbids_drugbank_xsdata import ( # noqa: E501 extract_pdbids_drugbank_xsdata, ) +from sophios.api.pythonapi import Step +from sophios.api.pythonapi import Workflow def test_extract_pdbids_drugbank_xsdata() -> None: @@ -15,6 +17,37 @@ def test_extract_pdbids_drugbank_xsdata() -> None: input_xml_path = str(Path(__file__).resolve().parent / Path(input_xml_path)) - extract_pdbids_drugbank_xsdata(input_xml_path, [], inchi, [], "out.txt") + extract_pdbids_drugbank_xsdata(input_xml_path, [], inchi, [], "output.txt") - assert Path("out.txt").exists() + assert Path("output.txt").exists() + + +def test_extract_pdbids_drugbank_xsdata_cwl() -> None: + """Test extract_pdbids_drugbank_xsdata CWL.""" + cwl_file = Path("extract_pdbids_drugbank_xsdata_0@1@0.cwl") + + # Create the step for the CWL file + extract_pdbids_drugbank_xsdata_step = Step(clt_path=cwl_file) + + input_xml_path = "drugbank_10_fake_records_5.1.10.xml" + input_xml_path = str(Path(__file__).resolve().parent / Path(input_xml_path)) + + inchi = ["InChI3491", "InChI8564", "InChI7556"] + + extract_pdbids_drugbank_xsdata_step.drugbank_xml_file_path = input_xml_path + extract_pdbids_drugbank_xsdata_step.inchi = inchi + extract_pdbids_drugbank_xsdata_step.output_txt_path = "output_cwl.txt" + + # Define the workflow with the step + steps = [extract_pdbids_drugbank_xsdata_step] + filename = "extract_pdbids_drugbank_xsdata" + workflow = Workflow(steps, filename) + + # Run the workflow + workflow.run() + + # Check for the existence of the output file + outdir = Path("outdir") + assert any( + file.name == "output_cwl.txt" for file in outdir.rglob("*") + ), "The file output_cwl.txt was not found."