Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rank diffdock poses #122

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/rank_diffdock_poses/__init__.py]
4 changes: 4 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.pdb filter=lfs diff=lfs merge=lfs -text
*.pdbqt filter=lfs diff=lfs merge=lfs -text
*.mol2 filter=lfs diff=lfs merge=lfs -text
*.xlsx filter=lfs diff=lfs merge=lfs -text
*.sdf filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
22 changes: 22 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# docker build -f Dockerfile -t polusai/rank-diffdock-poses-tool .
FROM python

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

COPY src ${EXEC_DIR}/src
ADD Dockerfile .

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
14 changes: 14 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# rank_diffdock_poses (0.1.0)

DiffDock Diffusion pose ranking

## Options

This plugin takes 3 input arguments and 1 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| diffdock_poses | | Input | File[] | File[] |
| top_n_confident | | Input | float | float |
| top_percent_confidence | | Input | float | float |
| output_poses | | Output | File[] | File[] |
1 change: 1 addition & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/rank-diffdock-poses-tool:${version}
50 changes: 50 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
specVersion: "0.1.0"
name: rank_diffdock_poses
version: 0.1.0
container: rank-diffdock-poses-tool
entrypoint:
title: rank_diffdock_poses
description: DiffDock Diffusion pose ranking
author: Brandon Walker
contact: [email protected]
repository:
documentation:
citation:

inputs:
- name: diffdock_poses
required: true
description:
type: File[]
format:
uri: edam:format_3814
- name: top_n_confident
required: true
description:
type: float
defaultValue: 1000
- name: top_percent_confidence
required: true
description:
type: float
defaultValue: 100
outputs:
- name: output_poses
required: true
description:
type: File[]
format:
uri: edam:format_3814
ui:
- key: inputs.diffdock_poses
title: "diffdock_poses: "
description: ""
type: File[]
- key: inputs.top_n_confident
title: "top_n_confident: "
description: ""
type: float
- key: inputs.top_percent_confidence
title: "top_percent_confidence: "
description: ""
type: float
29 changes: 29 additions & 0 deletions utils/docking/diffdock/rank-diffdock-poses-tool/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[tool.poetry]
name = "polus-mm-utils-rank-diffdock-poses"
version = "0.1.0"
description = "DiffDock Diffusion pose ranking"
authors = ["Data Scientist <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.12.5"
sophios = "0.1.1"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
pytest = "^7.4"
pytest-sugar = "^0.9.6"
pre-commit = "^3.2.1"
black = "^23.3.0"
mypy = "^1.1.1"
ruff = "^0.0.270"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = [
"."
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: DiffDock Diffusion pose ranking

doc: |-
DiffDock Diffusion pose ranking

baseCommand: ["python", "-m", "polus.mm.utils.rank_diffdock_poses"]

hints:
DockerRequirement:
dockerPull: polusai/rank-diffdock-poses-tool@sha256:ff03d4c4a4a908ea0b00ee79de05e633b5edf9e5067d71f62251cc034a6fee62

requirements:
InlineJavascriptRequirement: {}

inputs:

diffdock_poses:
label: diffdock poses
type:
type: array
items: File
inputBinding:
prefix: --diffdock_poses
format: edam:format_3814

top_n_confident:
type: float
label: top n most confident poses to keep
inputBinding:
prefix: --top_n_confident
# set default to essentially keep all poses
default: 1000

top_percent_confidence:
type: float
label: top percent of most confident poses to keep
inputBinding:
prefix: --top_percent_confidence
# set default to keep all poses
default: 100

outputs:

output_poses:
type: File[]
label: top ranked poses
outputBinding:
glob: ranked_poses.txt # This determines what binds to self[0]
loadContents: true
outputEval: |
${
// file looks like
// file_index
const lines = self[0].contents.split("\n").filter(line => line.trim() !== '');
const lst = [];
for (var i = 0; i < lines.length; i++) {
var splitLine = lines[i].split(" ");
// now find the File from inputs.diffdock_poses
var mol_idx = parseInt(splitLine[0]);
var file = inputs.diffdock_poses[mol_idx];
lst.push(file);
}
return lst;
}
format: edam:format_3814

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""rank_diffdock_poses."""

__version__ = "0.1.0"

from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import ( # noqa # pylint: disable=unused-import
rank_diffdock_poses,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Package entrypoint for the rank_diffdock_poses package."""

# Base packages
import argparse
import logging
from os import environ
from pathlib import Path

from polus.mm.utils.rank_diffdock_poses.rank_diffdock_poses import rank_diffdock_poses

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.mm.utils.rank_diffdock_poses.")
logger.setLevel(POLUS_LOG)


def main() -> None:
"""rank_diffdock_poses."""
parser = argparse.ArgumentParser(description="rank_diffdock_poses.")
parser.add_argument(
"--diffdock_poses",
nargs="+",
type=Path,
required=True,
help="List of diffdock pose paths.",
)
parser.add_argument(
"--top_n_confident",
type=int,
required=True,
help="Top N confident poses.",
)
parser.add_argument(
"--top_percent_confidence",
type=int,
required=True,
help="Top percent confidence threshold.",
)

args = parser.parse_args()

logger.info(f"diffdock_poses: {args.diffdock_poses}")
logger.info(f"top_n_confident: {args.top_n_confident}")
logger.info(f"top_percent_confidence: {args.top_percent_confidence}")

rank_diffdock_poses(
diffdock_poses=args.diffdock_poses,
top_n_confident=args.top_n_confident,
top_percent_confidence=args.top_percent_confidence,
)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""This module ranks the poses from a DiffDock output based on confidence scores."""
import re
from pathlib import Path


def rank_diffdock_poses(
diffdock_poses: list[Path],
top_n_confident: int,
top_percent_confidence: int,
) -> None:
"""rank_diffdock_poses.

Args:
diffdock_poses: List of poses from DiffDock output
top_n_confident: Number of poses to keep based on confidence score
top_percent_confidence: Percentage of poses to keep based on confidence score
Returns:
None
"""
confidences = [parse_confidence(name) for name in diffdock_poses]
diffdock_poses_str = [str(name) for name in diffdock_poses]
confidence_to_pose: dict[float, Path] = dict(zip(confidences, diffdock_poses))
file_to_index: dict[str, int] = dict(
zip(diffdock_poses_str, range(len(diffdock_poses))),
)

# First filter by absolute value top_n_confident
# if user only wants to use top_percent_confidence,
# can set top_n_confident to trivially high number
# if user only wants to use top_n_confident,
# then can set top_percent_confidence to 100
sorted_list = sorted(confidence_to_pose.items(), reverse=True)
sorted_pose_list = [v for (k, v) in sorted_list]
poses = sorted_pose_list[:top_n_confident]
# Next filter by top percentage of confident poses
num_poses = int(top_percent_confidence * 0.01 * len(poses))
poses = poses[:num_poses]
# Write to ranked_filename
path = Path("ranked_poses.txt")
with path.open("w", encoding="utf-8") as file:
file.writelines([f"{file_to_index[str(string)]} \n" for string in poses])


def parse_confidence(file_name: Path) -> float:
"""This function returns the confidence score from a filename.

Filenames must follow the format 'rankX_confidenceY.mol',
where X is a positive integer and Y is a float.
This format is the default for DiffDock outputs.".

Args:
file_name (Path): The filename of output pose

Returns:
float: The confidence value from pose
"""
return float(re.findall("rank[0-9]+_confidence(.*).sdf", str(file_name))[0])
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for rank_diffdock_poses."""
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Loading
Loading