-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Brandon Duane Walker
authored and
Brandon Duane Walker
committed
May 30, 2024
1 parent
8556c48
commit 09c365c
Showing
20 changed files
with
431 additions
and
0 deletions.
There are no files selected for viewing
29 changes: 29 additions & 0 deletions
29
utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/rank_diffdock_poses/__init__.py] |
4 changes: 4 additions & 0 deletions
4
utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
5 changes: 5 additions & 0 deletions
5
utils/docking/diffdock/rank-diffdock-poses-tool/.gitattributes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
*.pdb filter=lfs diff=lfs merge=lfs -text | ||
*.pdbqt filter=lfs diff=lfs merge=lfs -text | ||
*.mol2 filter=lfs diff=lfs merge=lfs -text | ||
*.xlsx filter=lfs diff=lfs merge=lfs -text | ||
*.sdf filter=lfs diff=lfs merge=lfs -text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
22 changes: 22 additions & 0 deletions
22
utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# docker build -f Dockerfile -t mrbrandonwalker/rank_diffdock_poses_tool . | ||
FROM python | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
|
||
# Work directory defined in the base container | ||
# WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
|
||
COPY src ${EXEC_DIR}/src | ||
ADD Dockerfile . | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir | ||
|
||
CMD ["--help"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# rank_diffdock_poses (0.1.0) | ||
|
||
DiffDock Diffusion pose ranking | ||
|
||
## Options | ||
|
||
This plugin takes 3 input arguments and 1 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| diffdock_poses | | Input | File[] | File[] | | ||
| top_n_confident | | Input | float | float | | ||
| top_percent_confidence | | Input | float | float | | ||
| output_poses | | Output | File[] | File[] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
4 changes: 4 additions & 0 deletions
4
utils/docking/diffdock/rank-diffdock-poses-tool/build-docker.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/rank-diffdock-poses-tool:${version} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
specVersion: "0.1.0" | ||
name: rank_diffdock_poses | ||
version: 0.1.0 | ||
container: rank-diffdock-poses-tool | ||
entrypoint: | ||
title: rank_diffdock_poses | ||
description: DiffDock Diffusion pose ranking | ||
author: Brandon Walker | ||
contact: [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: diffdock_poses | ||
required: true | ||
description: | ||
type: File[] | ||
format: | ||
uri: edam:format_3814 | ||
- name: top_n_confident | ||
required: true | ||
description: | ||
type: float | ||
defaultValue: 1000 | ||
- name: top_percent_confidence | ||
required: true | ||
description: | ||
type: float | ||
defaultValue: 100 | ||
outputs: | ||
- name: output_poses | ||
required: true | ||
description: | ||
type: File[] | ||
format: | ||
uri: edam:format_3814 | ||
ui: | ||
- key: inputs.diffdock_poses | ||
title: "diffdock_poses: " | ||
description: "" | ||
type: File[] | ||
- key: inputs.top_n_confident | ||
title: "top_n_confident: " | ||
description: "" | ||
type: float | ||
- key: inputs.top_percent_confidence | ||
title: "top_percent_confidence: " | ||
description: "" | ||
type: float |
31 changes: 31 additions & 0 deletions
31
utils/docking/diffdock/rank-diffdock-poses-tool/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[tool.poetry] | ||
name = "polus-mm-utils-rank-diffdock-poses" | ||
version = "0.1.0" | ||
description = "DiffDock Diffusion pose ranking" | ||
authors = ["Data Scientist <[email protected]>"] | ||
readme = "README.md" | ||
packages = [{include = "polus", from = "src"}] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.9,<3.12.4" | ||
typer = "^0.7.0" | ||
cwl-utils = "0.33" | ||
cwltool = "3.1.20240404144621" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
bump2version = "^1.0.1" | ||
pytest = "^7.4" | ||
pytest-sugar = "^0.9.6" | ||
pre-commit = "^3.2.1" | ||
black = "^23.3.0" | ||
mypy = "^1.1.1" | ||
ruff = "^0.0.270" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = [ | ||
"." | ||
] |
76 changes: 76 additions & 0 deletions
76
utils/docking/diffdock/rank-diffdock-poses-tool/rank_diffdock_poses.cwl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: DiffDock Diffusion pose ranking | ||
|
||
doc: |- | ||
DiffDock Diffusion pose ranking | ||
|
||
baseCommand: ["python", "-m", "polus.mm.utils.rank_diffdock_poses"] | ||
|
||
hints: | ||
DockerRequirement: | ||
dockerPull: mrbrandonwalker/rank_diffdock_poses_tool | ||
|
||
requirements: | ||
InlineJavascriptRequirement: {} | ||
|
||
inputs: | ||
|
||
diffdock_poses: | ||
label: diffdock poses | ||
type: | ||
type: array | ||
items: File | ||
inputBinding: | ||
prefix: --diffdock_poses | ||
format: edam:format_3814 | ||
|
||
top_n_confident: | ||
type: float | ||
label: top n most confident poses to keep | ||
inputBinding: | ||
prefix: --top_n_confident | ||
# set default to essentially keep all poses | ||
default: 1000 | ||
|
||
top_percent_confidence: | ||
type: float | ||
label: top percent of most confident poses to keep | ||
inputBinding: | ||
prefix: --top_percent_confidence | ||
# set default to keep all poses | ||
default: 100 | ||
|
||
outputs: | ||
|
||
output_poses: | ||
type: File[] | ||
label: top ranked poses | ||
outputBinding: | ||
glob: ranked_poses.txt # This determines what binds to self[0] | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
// file looks like | ||
// file_index | ||
const lines = self[0].contents.split("\n").filter(line => line.trim() !== ''); | ||
const lst = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
var splitLine = lines[i].split(" "); | ||
// now find the File from inputs.diffdock_poses | ||
var mol_idx = parseInt(splitLine[0]); | ||
var file = inputs.diffdock_poses[mol_idx]; | ||
lst.push(file); | ||
} | ||
return lst; | ||
} | ||
format: edam:format_3814 | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
7 changes: 7 additions & 0 deletions
7
...king/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""rank_diffdock_poses.""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import ( # noqa # pylint: disable=unused-import | ||
rank_diffdock_poses, | ||
) |
53 changes: 53 additions & 0 deletions
53
...king/diffdock/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/__main__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
"""Package entrypoint for the rank_diffdock_poses package.""" | ||
|
||
# Base packages | ||
import logging | ||
from os import environ | ||
from pathlib import Path | ||
|
||
import typer | ||
from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import rank_diffdock_poses | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", | ||
datefmt="%d-%b-%y %H:%M:%S", | ||
) | ||
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) | ||
logger = logging.getLogger("polus.mm.utils.rank_diffdock_poses.") | ||
logger.setLevel(POLUS_LOG) | ||
|
||
app = typer.Typer(help="rank_diffdock_poses.") | ||
|
||
|
||
@app.command() | ||
def main( | ||
diffdock_poses: list[Path] = typer.Option( | ||
..., | ||
"--diffdock_poses", | ||
help="", | ||
), | ||
top_n_confident: int = typer.Option( | ||
..., | ||
"--top_n_confident", | ||
help="", | ||
), | ||
top_percent_confidence: int = typer.Option( | ||
..., | ||
"--top_percent_confidence", | ||
help="", | ||
), | ||
) -> None: | ||
"""rank_diffdock_poses.""" | ||
logger.info(f"diffdock_poses: {diffdock_poses}") | ||
logger.info(f"top_n_confident: {top_n_confident}") | ||
logger.info(f"top_percent_confidence: {top_percent_confidence}") | ||
|
||
rank_diffdock_poses( | ||
diffdock_poses=diffdock_poses, | ||
top_n_confident=top_n_confident, | ||
top_percent_confidence=top_percent_confidence, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
57 changes: 57 additions & 0 deletions
57
...ck/rank-diffdock-poses-tool/src/polus/mm/utils/rank_diffdock_poses/rank_diffdock_poses.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
"""This module ranks the poses from a DiffDock output based on confidence scores.""" | ||
import re | ||
from pathlib import Path | ||
|
||
|
||
def rank_diffdock_poses( | ||
diffdock_poses: list[Path], | ||
top_n_confident: int, | ||
top_percent_confidence: int, | ||
) -> None: | ||
"""rank_diffdock_poses. | ||
Args: | ||
diffdock_poses: List of poses from DiffDock output | ||
top_n_confident: Number of poses to keep based on confidence score | ||
top_percent_confidence: Percentage of poses to keep based on confidence score | ||
Returns: | ||
None | ||
""" | ||
confidences = [parse_confidence(name) for name in diffdock_poses] | ||
diffdock_poses_str = [str(name) for name in diffdock_poses] | ||
confidence_to_pose: dict[float, Path] = dict(zip(confidences, diffdock_poses)) | ||
file_to_index: dict[str, int] = dict( | ||
zip(diffdock_poses_str, range(len(diffdock_poses))), | ||
) | ||
|
||
# First filter by absolute value top_n_confident | ||
# if user only wants to use top_percent_confidence, | ||
# can set top_n_confident to trivially high number | ||
# if user only wants to use top_n_confident, | ||
# then can set top_percent_confidence to 100 | ||
sorted_list = sorted(confidence_to_pose.items(), reverse=True) | ||
sorted_pose_list = [v for (k, v) in sorted_list] | ||
poses = sorted_pose_list[:top_n_confident] | ||
# Next filter by top percentage of confident poses | ||
num_poses = int(top_percent_confidence * 0.01 * len(poses)) | ||
poses = poses[:num_poses] | ||
# Write to ranked_filename | ||
path = Path("ranked_poses.txt") | ||
with path.open("w", encoding="utf-8") as file: | ||
file.writelines([f"{file_to_index[str(string)]} \n" for string in poses]) | ||
|
||
|
||
def parse_confidence(file_name: Path) -> float: | ||
"""This function returns the confidence score from a filename. | ||
Filenames must follow the format 'rankX_confidenceY.mol', | ||
where X is a positive integer and Y is a float. | ||
This format is the default for DiffDock outputs.". | ||
Args: | ||
file_name (Path): The filename of output pose | ||
Returns: | ||
float: The confidence value from pose | ||
""" | ||
return float(re.findall("rank[0-9]+_confidence(.*).sdf", str(file_name))[0]) |
1 change: 1 addition & 0 deletions
1
utils/docking/diffdock/rank-diffdock-poses-tool/tests/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Tests for rank_diffdock_poses.""" |
3 changes: 3 additions & 0 deletions
3
utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank1_confidence0.36.sdf
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank2_confidence0.35.sdf
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
utils/docking/diffdock/rank-diffdock-poses-tool/tests/rank3_confidence0.34.sdf
Git LFS file not shown
Oops, something went wrong.