Skip to content

Commit

Permalink
rank diffdock poses
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and Brandon Duane Walker committed May 30, 2024
1 parent 8556c48 commit 09c365c
Show file tree
Hide file tree
Showing 20 changed files with 431 additions and 0 deletions.
29 changes: 29 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/rank_diffdock_poses/__init__.py]
4 changes: 4 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.pdb filter=lfs diff=lfs merge=lfs -text
*.pdbqt filter=lfs diff=lfs merge=lfs -text
*.mol2 filter=lfs diff=lfs merge=lfs -text
*.xlsx filter=lfs diff=lfs merge=lfs -text
*.sdf filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
22 changes: 22 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# docker build -f Dockerfile -t mrbrandonwalker/rank_diffdock_poses_tool .
FROM python

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

COPY src ${EXEC_DIR}/src
ADD Dockerfile .

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
14 changes: 14 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# rank_diffdock_poses (0.1.0)

DiffDock Diffusion pose ranking

## Options

This plugin takes 3 input arguments and 1 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| diffdock_poses | | Input | File[] | File[] |
| top_n_confident | | Input | float | float |
| top_percent_confidence | | Input | float | float |
| output_poses | | Output | File[] | File[] |
1 change: 1 addition & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/rank-diffdock-poses-tool:${version}
50 changes: 50 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
specVersion: "0.1.0"
name: rank_diffdock_poses
version: 0.1.0
container: rank-diffdock-poses-tool
entrypoint:
title: rank_diffdock_poses
description: DiffDock Diffusion pose ranking
author: Brandon Walker
contact: [email protected]
repository:
documentation:
citation:

inputs:
- name: diffdock_poses
required: true
description:
type: File[]
format:
uri: edam:format_3814
- name: top_n_confident
required: true
description:
type: float
defaultValue: 1000
- name: top_percent_confidence
required: true
description:
type: float
defaultValue: 100
outputs:
- name: output_poses
required: true
description:
type: File[]
format:
uri: edam:format_3814
ui:
- key: inputs.diffdock_poses
title: "diffdock_poses: "
description: ""
type: File[]
- key: inputs.top_n_confident
title: "top_n_confident: "
description: ""
type: float
- key: inputs.top_percent_confidence
title: "top_percent_confidence: "
description: ""
type: float
31 changes: 31 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[tool.poetry]
name = "polus-mm-utils-rank-diffdock-poses"
version = "0.1.0"
description = "DiffDock Diffusion pose ranking"
authors = ["Data Scientist <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.12.4"
typer = "^0.7.0"
cwl-utils = "0.33"
cwltool = "3.1.20240404144621"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: DiffDock Diffusion pose ranking

doc: |-
DiffDock Diffusion pose ranking

baseCommand: ["python", "-m", "polus.mm.utils.rank_diffdock_poses"]

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/rank_diffdock_poses_tool

requirements:
InlineJavascriptRequirement: {}

inputs:

diffdock_poses:
label: diffdock poses
type:
type: array
items: File
inputBinding:
prefix: --diffdock_poses
format: edam:format_3814

top_n_confident:
type: float
label: top n most confident poses to keep
inputBinding:
prefix: --top_n_confident
# set default to essentially keep all poses
default: 1000

top_percent_confidence:
type: float
label: top percent of most confident poses to keep
inputBinding:
prefix: --top_percent_confidence
# set default to keep all poses
default: 100

outputs:

output_poses:
type: File[]
label: top ranked poses
outputBinding:
glob: ranked_poses.txt # This determines what binds to self[0]
loadContents: true
outputEval: |
${
// file looks like
// file_index
const lines = self[0].contents.split("\n").filter(line => line.trim() !== '');
const lst = [];
for (var i = 0; i < lines.length; i++) {
var splitLine = lines[i].split(" ");
// now find the File from inputs.diffdock_poses
var mol_idx = parseInt(splitLine[0]);
var file = inputs.diffdock_poses[mol_idx];
lst.push(file);
}
return lst;
}
format: edam:format_3814

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""rank_diffdock_poses."""

__version__ = "0.1.0"

from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import ( # noqa # pylint: disable=unused-import
rank_diffdock_poses,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Package entrypoint for the rank_diffdock_poses package."""

# Base packages
import logging
from os import environ
from pathlib import Path

import typer
from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import rank_diffdock_poses

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.mm.utils.rank_diffdock_poses.")
logger.setLevel(POLUS_LOG)

app = typer.Typer(help="rank_diffdock_poses.")


@app.command()
def main(
diffdock_poses: list[Path] = typer.Option(
...,
"--diffdock_poses",
help="",
),
top_n_confident: int = typer.Option(
...,
"--top_n_confident",
help="",
),
top_percent_confidence: int = typer.Option(
...,
"--top_percent_confidence",
help="",
),
) -> None:
"""rank_diffdock_poses."""
logger.info(f"diffdock_poses: {diffdock_poses}")
logger.info(f"top_n_confident: {top_n_confident}")
logger.info(f"top_percent_confidence: {top_percent_confidence}")

rank_diffdock_poses(
diffdock_poses=diffdock_poses,
top_n_confident=top_n_confident,
top_percent_confidence=top_percent_confidence,
)


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""This module ranks the poses from a DiffDock output based on confidence scores."""
import re
from pathlib import Path


def rank_diffdock_poses(
diffdock_poses: list[Path],
top_n_confident: int,
top_percent_confidence: int,
) -> None:
"""rank_diffdock_poses.
Args:
diffdock_poses: List of poses from DiffDock output
top_n_confident: Number of poses to keep based on confidence score
top_percent_confidence: Percentage of poses to keep based on confidence score
Returns:
None
"""
confidences = [parse_confidence(name) for name in diffdock_poses]
diffdock_poses_str = [str(name) for name in diffdock_poses]
confidence_to_pose: dict[float, Path] = dict(zip(confidences, diffdock_poses))
file_to_index: dict[str, int] = dict(
zip(diffdock_poses_str, range(len(diffdock_poses))),
)

# First filter by absolute value top_n_confident
# if user only wants to use top_percent_confidence,
# can set top_n_confident to trivially high number
# if user only wants to use top_n_confident,
# then can set top_percent_confidence to 100
sorted_list = sorted(confidence_to_pose.items(), reverse=True)
sorted_pose_list = [v for (k, v) in sorted_list]
poses = sorted_pose_list[:top_n_confident]
# Next filter by top percentage of confident poses
num_poses = int(top_percent_confidence * 0.01 * len(poses))
poses = poses[:num_poses]
# Write to ranked_filename
path = Path("ranked_poses.txt")
with path.open("w", encoding="utf-8") as file:
file.writelines([f"{file_to_index[str(string)]} \n" for string in poses])


def parse_confidence(file_name: Path) -> float:
"""This function returns the confidence score from a filename.
Filenames must follow the format 'rankX_confidenceY.mol',
where X is a positive integer and Y is a float.
This format is the default for DiffDock outputs.".
Args:
file_name (Path): The filename of output pose
Returns:
float: The confidence value from pose
"""
return float(re.findall("rank[0-9]+_confidence(.*).sdf", str(file_name))[0])
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for rank_diffdock_poses."""
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Loading

0 comments on commit 09c365c

Please sign in to comment.