diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg b/utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg new file mode 100644 index 00000000..b5c21796 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/mm/utils/rank_diffdock_poses/__init__.py] diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore b/utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore new file mode 100644 index 00000000..7c603f81 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/.gitattributes b/utils/docking/diffdock/rank-diffdock-poses-tool/.gitattributes new file mode 100644 index 00000000..07fedc1e --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/.gitattributes @@ -0,0 +1,5 @@ +*.pdb filter=lfs diff=lfs merge=lfs -text +*.pdbqt filter=lfs diff=lfs merge=lfs -text +*.mol2 filter=lfs diff=lfs merge=lfs -text +*.xlsx filter=lfs diff=lfs merge=lfs -text +*.sdf filter=lfs diff=lfs merge=lfs -text diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/.gitignore b/utils/docking/diffdock/rank-diffdock-poses-tool/.gitignore new file mode 100644 index 00000000..c04bc49f --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/CHANGELOG.md b/utils/docking/diffdock/rank-diffdock-poses-tool/CHANGELOG.md new file mode 100644 index 00000000..b67793f7 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile b/utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile new file mode 100644 index 00000000..24abd6a2 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile @@ -0,0 +1,22 @@ +# docker build -f Dockerfile -t polusai/rank-diffdock-poses-tool . +FROM python + +ENV EXEC_DIR="/opt/executables" +ENV POLUS_LOG="INFO" +RUN mkdir -p ${EXEC_DIR} + + +# Work directory defined in the base container +# WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY CHANGELOG.md ${EXEC_DIR} + +COPY src ${EXEC_DIR}/src +ADD Dockerfile . + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +CMD ["--help"] diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/README.md b/utils/docking/diffdock/rank-diffdock-poses-tool/README.md new file mode 100644 index 00000000..5ebdbe7b --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/README.md @@ -0,0 +1,14 @@ +# rank_diffdock_poses (0.1.0) + +DiffDock Diffusion pose ranking + +## Options + +This plugin takes 3 input arguments and 1 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|---------| +| diffdock_poses | | Input | File[] | File[] | +| top_n_confident | | Input | float | float | +| top_percent_confidence | | Input | float | float | +| output_poses | | Output | File[] | File[] | diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/VERSION b/utils/docking/diffdock/rank-diffdock-poses-tool/VERSION new file mode 100644 index 00000000..6e8bf73a --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/build-docker.sh b/utils/docking/diffdock/rank-diffdock-poses-tool/build-docker.sh new file mode 100755 index 00000000..0cb372cc --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$("] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12.5" +sophios = "0.1.1" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/rank_diffdock_poses_0@1@0.cwl b/utils/docking/diffdock/rank-diffdock-poses-tool/rank_diffdock_poses_0@1@0.cwl new file mode 100644 index 00000000..17e81c0c --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/rank_diffdock_poses_0@1@0.cwl @@ -0,0 +1,76 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 + +class: CommandLineTool + +label: DiffDock Diffusion pose ranking + +doc: |- + DiffDock Diffusion pose ranking + +baseCommand: ["python", "-m", "polus.mm.utils.rank_diffdock_poses"] + +hints: + DockerRequirement: + dockerPull: polusai/rank-diffdock-poses-tool@sha256:ff03d4c4a4a908ea0b00ee79de05e633b5edf9e5067d71f62251cc034a6fee62 + +requirements: + InlineJavascriptRequirement: {} + +inputs: + + diffdock_poses: + label: diffdock poses + type: + type: array + items: File + inputBinding: + prefix: --diffdock_poses + format: edam:format_3814 + + top_n_confident: + type: float + label: top n most confident poses to keep + inputBinding: + prefix: --top_n_confident + # set default to essentially keep all poses + default: 1000 + + top_percent_confidence: + type: float + label: top percent of most confident poses to keep + inputBinding: + prefix: --top_percent_confidence + # set default to keep all poses + default: 100 + +outputs: + + output_poses: + type: File[] + label: top ranked poses + outputBinding: + glob: ranked_poses.txt # This determines what binds to self[0] + loadContents: true + outputEval: | + ${ + // file looks like + // file_index + const lines = self[0].contents.split("\n").filter(line => line.trim() !== ''); + const lst = []; + for (var i = 0; i < lines.length; i++) { + var splitLine = lines[i].split(" "); + // now find the File from inputs.diffdock_poses + var mol_idx = parseInt(splitLine[0]); + var file = inputs.diffdock_poses[mol_idx]; + lst.push(file); + } + return lst; + } + format: edam:format_3814 + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__init__.py b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__init__.py new file mode 100644 index 00000000..a887fe9e --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__init__.py @@ -0,0 +1,7 @@ +"""rank_diffdock_poses.""" + +__version__ = "0.1.0" + +from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import ( # noqa # pylint: disable=unused-import + rank_diffdock_poses, +) diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__main__.py b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__main__.py new file mode 100644 index 00000000..f982540b --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__main__.py @@ -0,0 +1,57 @@ +"""Package entrypoint for the rank_diffdock_poses package.""" + +# Base packages +import argparse +import logging +from os import environ +from pathlib import Path + +from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import rank_diffdock_poses + +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) +logger = logging.getLogger("polus.mm.utils.rank_diffdock_poses.") +logger.setLevel(POLUS_LOG) + + +def main() -> None: + """rank_diffdock_poses.""" + parser = argparse.ArgumentParser(description="rank_diffdock_poses.") + parser.add_argument( + "--diffdock_poses", + nargs="+", + type=Path, + required=True, + help="List of diffdock pose paths.", + ) + parser.add_argument( + "--top_n_confident", + type=int, + required=True, + help="Top N confident poses.", + ) + parser.add_argument( + "--top_percent_confidence", + type=int, + required=True, + help="Top percent confidence threshold.", + ) + + args = parser.parse_args() + + logger.info(f"diffdock_poses: {args.diffdock_poses}") + logger.info(f"top_n_confident: {args.top_n_confident}") + logger.info(f"top_percent_confidence: {args.top_percent_confidence}") + + rank_diffdock_poses( + diffdock_poses=args.diffdock_poses, + top_n_confident=args.top_n_confident, + top_percent_confidence=args.top_percent_confidence, + ) + + +if __name__ == "__main__": + main() diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/rank_diffdock_poses.py b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/rank_diffdock_poses.py new file mode 100644 index 00000000..833d68a2 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/rank_diffdock_poses.py @@ -0,0 +1,57 @@ +"""This module ranks the poses from a DiffDock output based on confidence scores.""" +import re +from pathlib import Path + + +def rank_diffdock_poses( + diffdock_poses: list[Path], + top_n_confident: int, + top_percent_confidence: int, +) -> None: + """rank_diffdock_poses. + + Args: + diffdock_poses: List of poses from DiffDock output + top_n_confident: Number of poses to keep based on confidence score + top_percent_confidence: Percentage of poses to keep based on confidence score + Returns: + None + """ + confidences = [parse_confidence(name) for name in diffdock_poses] + diffdock_poses_str = [str(name) for name in diffdock_poses] + confidence_to_pose: dict[float, Path] = dict(zip(confidences, diffdock_poses)) + file_to_index: dict[str, int] = dict( + zip(diffdock_poses_str, range(len(diffdock_poses))), + ) + + # First filter by absolute value top_n_confident + # if user only wants to use top_percent_confidence, + # can set top_n_confident to trivially high number + # if user only wants to use top_n_confident, + # then can set top_percent_confidence to 100 + sorted_list = sorted(confidence_to_pose.items(), reverse=True) + sorted_pose_list = [v for (k, v) in sorted_list] + poses = sorted_pose_list[:top_n_confident] + # Next filter by top percentage of confident poses + num_poses = int(top_percent_confidence * 0.01 * len(poses)) + poses = poses[:num_poses] + # Write to ranked_filename + path = Path("ranked_poses.txt") + with path.open("w", encoding="utf-8") as file: + file.writelines([f"{file_to_index[str(string)]} \n" for string in poses]) + + +def parse_confidence(file_name: Path) -> float: + """This function returns the confidence score from a filename. + + Filenames must follow the format 'rankX_confidenceY.mol', + where X is a positive integer and Y is a float. + This format is the default for DiffDock outputs.". + + Args: + file_name (Path): The filename of output pose + + Returns: + float: The confidence value from pose + """ + return float(re.findall("rank[0-9]+_confidence(.*).sdf", str(file_name))[0]) diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/tests/__init__.py b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/__init__.py new file mode 100644 index 00000000..c81f96ec --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for rank_diffdock_poses.""" diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank1_confidence0.36.sdf b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank1_confidence0.36.sdf new file mode 100644 index 00000000..c200554a --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank1_confidence0.36.sdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8ad00e2818f8e0dd85346db3f34a48533902a172ca39eacb72319758aa8255 +size 2355 diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank2_confidence0.35.sdf b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank2_confidence0.35.sdf new file mode 100644 index 00000000..da56ccdb --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank2_confidence0.35.sdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b57ffcca27c7462847ec1e6484f795fd0b1b05f2ae3cfbd30a9c00095979942 +size 2355 diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank3_confidence0.34.sdf b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank3_confidence0.34.sdf new file mode 100644 index 00000000..f46b0523 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank3_confidence0.34.sdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5b5786bb8fa92372cea3fc3b28b562fa3f0afe0a177e590815decbb7378bea +size 2355 diff --git a/utils/docking/diffdock/rank-diffdock-poses-tool/tests/test_rank_diffdock_poses.py b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/test_rank_diffdock_poses.py new file mode 100644 index 00000000..dd6d3704 --- /dev/null +++ b/utils/docking/diffdock/rank-diffdock-poses-tool/tests/test_rank_diffdock_poses.py @@ -0,0 +1,53 @@ +"""Tests for rank_diffdock_poses.""" +from pathlib import Path + +from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import rank_diffdock_poses +from sophios.api.pythonapi import Step +from sophios.api.pythonapi import Workflow + + +def test_rank_diffdock_poses() -> None: + """Test rank_diffdock_poses.""" + diffdock_poses = [ + "rank2_confidence0.35.sdf", + "rank3_confidence0.34.sdf", + "rank1_confidence0.36.sdf", + ] + top_n_confident = 1000 + top_percent_confidence = 100 + rank_diffdock_poses(diffdock_poses, top_n_confident, top_percent_confidence) + assert Path("ranked_poses.txt").exists() + + # parse the files, read the order + with Path("ranked_poses.txt").open() as file: + lines = file.readlines() + lines = [line.strip() for line in lines] + # check the order is 0,1,2 + assert lines == ["2", "0", "1"] + + +def test_rank_diffdock_poses_cwl() -> None: + """Test rank_diffdock_poses CWL.""" + if Path("ranked_poses.txt").exists(): + Path("ranked_poses.txt").unlink() + + cwl_file = Path(__file__).resolve().parent.parent / Path( + "rank_diffdock_poses_0@1@0.cwl", + ) + + rank_diffdock_poses_step = Step(clt_path=cwl_file) + diffdock_poses = [ + str(Path(__file__).resolve().parent / Path("rank2_confidence0.35.sdf")), + str(Path(__file__).resolve().parent / Path("rank3_confidence0.34.sdf")), + str(Path(__file__).resolve().parent / Path("rank1_confidence0.36.sdf")), + ] + + rank_diffdock_poses_step.diffdock_poses = list(diffdock_poses) + rank_diffdock_poses_step.top_n_confident = 1000 + rank_diffdock_poses_step.top_percent_confidence = 100 + + steps = [rank_diffdock_poses_step] + filename = "rank_diffdock_poses" + workflow = Workflow(steps, filename) + + workflow.run()