Skip to content

Commit

Permalink
feat:support bias (#131)
Browse files Browse the repository at this point in the history
  • Loading branch information
dp-yuanyn authored May 15, 2024
1 parent 6c4c72d commit fef9c34
Show file tree
Hide file tree
Showing 7 changed files with 2,650 additions and 7 deletions.
65 changes: 61 additions & 4 deletions unidock_tools/src/unidock_tools/application/unidock_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
"local_only": False,
"seed": 181129,
"debug": False,
"bias_file": None,
"multi_bias": False,
"multi_bias_file": None,
"multi_bias_index": None,
}


Expand All @@ -57,6 +61,8 @@ def __init__(self,
size_x: float = 22.5,
size_y: float = 22.5,
size_z: float = 22.5,
bias_file: Optional[Path] = None,
multi_bias_files: List[Path] = [],
workdir: Path = Path("docking_pipeline"),
):
"""
Expand Down Expand Up @@ -87,6 +93,8 @@ def __init__(self,
self.receptor = receptor
self.mols = sum([read_ligand(ligand) for ligand in ligands], [])
self.mols = [PropertyMol(mol) for mol in self.mols]
self.bias_file = bias_file
self.multi_bias_files_dict = {f.stem: f for f in multi_bias_files}
self.center_x = center_x
self.center_y = center_y
self.center_z = center_z
Expand Down Expand Up @@ -127,11 +135,20 @@ def prepare_topology_sdf(self, mol_list: List[Chem.Mol], savedir: Path) -> List[
return prepared_sdf_list

@time_logger
def init_docking_data(self, input_dir: Path, batch_size: int = 20):
def init_docking_data(self, input_dir: Path, multi_bias: bool = False, batch_size: int = 20):
real_batch_size = math.ceil(len(self.mols) / math.ceil(len(self.mols) / batch_size))
batched_mol_list = [self.mols[i:i+real_batch_size] for i in range(0, len(self.mols), real_batch_size)]
for one_batch_mol_list in batched_mol_list:
input_list = self.prepare_topology_sdf(one_batch_mol_list, input_dir)
if multi_bias:
for input_file in input_list:
filename = input_file.stem
logging.info(filename)
logging.info(self.multi_bias_files_dict)
if filename in self.multi_bias_files_dict:
shutil.copyfile(self.multi_bias_files_dict[filename], os.path.join(input_dir, f"{filename}.bpf"))
else:
logging.warning(f"Cannot find bias file in multi-bias mode for {filename}")
yield input_list

@time_logger
Expand All @@ -148,6 +165,7 @@ def docking(self,
batch_size: int = 1200,
score_only: bool = False,
local_only: bool = False,
multi_bias: bool = False,
score_name: str = "docking_score",
docking_dir_name : str = "docking_dir",
topn: int = 10,
Expand All @@ -157,6 +175,7 @@ def docking(self,
for ligand_list in self.init_docking_data(
input_dir=input_dir,
batch_size=batch_size,
multi_bias=multi_bias,
):
# Run docking
ligands, scores_list = run_unidock(
Expand All @@ -165,8 +184,8 @@ def docking(self,
size_x=self.size_x, size_y=self.size_y, size_z=self.size_z,
scoring=scoring_function, num_modes=num_modes,
search_mode=search_mode, exhaustiveness=exhaustiveness, max_step=max_step,
seed=seed, refine_step=refine_step, energy_range=energy_range,
score_only=score_only, local_only=local_only,
seed=seed, refine_step=refine_step, energy_range=energy_range, bias_file=self.bias_file,
score_only=score_only, local_only=local_only, multi_bias=multi_bias,
)
self.postprocessing(ligand_scores_list=zip(ligands, scores_list),
save_dir=save_dir,
Expand Down Expand Up @@ -222,6 +241,33 @@ def main(args: dict):
exit(1)
logging.info(f"[UniDock Pipeline] {len(ligands)} ligands found.")

bias_file = None
if args.get("bias_file"):
bias_file = Path(args["bias_file"]).resolve()
if not bias_file.exists():
logging.error(f"Cannot find {bias_file}")
exit(1)

multi_bias_file_list = []
if args["multi_bias"]:
if args.get("multi_bias_file"):
for multi_bias_file in args["multi_bias_file"]:
if not Path(multi_bias_file).exists():
logging.error(f"Cannot find {multi_bias_file}")
continue
multi_bias_file_list.append(Path(multi_bias_file).resolve())
elif args.get("multi_bias_index"):
with open(args["multi_bias_index"], "r") as f:
index_content = f.read()
index_lines1 = [line.strip() for line in index_content.split("\n") if line.strip()]
index_lines2 = [line.strip() for line in index_content.split(" ") if line.strip()]
multi_bias_file_list.extend(index_lines2 if len(index_lines2) > len(index_lines1) else index_lines1)
multi_bias_file_list = [Path(multi_bias_file).resolve() for multi_bias_file in multi_bias_file_list if Path(multi_bias_file).exists()]

if len(multi_bias_file_list) != len(ligands):
logging.error("Number of ligands and bias files should be equal in multi-bias mode.")
exit(1)

logging.info("[UniDock Pipeline] Start")
start_time = time.time()
runner = UniDock(
Expand All @@ -233,7 +279,9 @@ def main(args: dict):
size_x=float(args["size_x"]),
size_y=float(args["size_y"]),
size_z=float(args["size_z"]),
workdir=workdir
bias_file=bias_file,
multi_bias_files=multi_bias_file_list,
workdir=workdir,
)
logging.info("[UniDock Pipeline] Start docking")
runner.docking(
Expand All @@ -249,6 +297,7 @@ def main(args: dict):
batch_size=int(args["batch_size"]),
score_only=bool(args["score_only"]),
local_only=bool(args["local_only"]),
multi_bias=bool(args["multi_bias"]),
score_name="docking_score",
docking_dir_name="docking_dir",
topn=int(args["topn"]),
Expand All @@ -268,6 +317,12 @@ def get_parser() -> argparse.ArgumentParser:
help="Ligand file in sdf format. Specify multiple files separated by commas.")
parser.add_argument("-i", "--ligand_index", type=str, default=None,
help="A text file containing the path of ligand files in sdf format.")
parser.add_argument("-b", "--bias_file", type=str, default=None,
help="Bias file in bpf format. Default: None.")
parser.add_argument("-mbf", "--multi_bias_file", type=lambda s: s.split(','), default=None,
help="multi Bias file in bpf format separated by commas. Number should be equal to ligands. Default: None.")
parser.add_argument("-mbi", "--multi_bias_index", type=str, default=None,
help="A text file containing the path of multi bias files in bpf format. Number should be equal to ligands. Default: None.")

parser.add_argument("-cx", "--center_x", type=float, required=True,
help="X-coordinate of the docking box center.")
Expand Down Expand Up @@ -317,6 +372,8 @@ def get_parser() -> argparse.ArgumentParser:
help="Whether to use score_only mode.")
parser.add_argument("--local_only", action="store_true",
help="Whether to use local_only mode.")
parser.add_argument("--multi_bias", action="store_true",
help="Whether to use multi_bias mode.")

parser.add_argument("--seed", type=int, default=181129,
help="Uni-Dock random seed")
Expand Down
14 changes: 11 additions & 3 deletions unidock_tools/src/unidock_tools/modules/docking/unidock.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ def __init__(self,
max_step: int = 10,
energy_range: float = 3.0,
refine_step: int = 5,
bias_file: Optional[Path] = None,
seed : int = 181129,
score_only: bool = False,
local_only: bool = False
local_only: bool = False,
multi_bias: bool = False,
):
self.mgltools_python_path = ""
self.prepare_gpf4_script_path = ""
Expand Down Expand Up @@ -86,10 +88,14 @@ def __init__(self,
"--verbosity", "2",
"--keep_nonpolar_H",
]
if bias_file:
cmd += ["--bias", str(bias_file)]
if score_only:
cmd.append("--score_only")
if local_only:
cmd.append("--local_only")
if multi_bias:
cmd.append("--multi_bias")

logging.info(f"unidock cmd: {' '.join(cmd)}")
self.cmd = cmd
Expand Down Expand Up @@ -263,9 +269,11 @@ def run_unidock(
max_step: int = 10,
energy_range: float = 3.0,
refine_step: int = 5,
bias_file: Optional[Path] = None,
seed: int = 181129,
score_only: bool = False,
local_only: bool = False,
multi_bias: bool = False,
) -> Tuple[List[Path], List[List[float]]]:
runner = UniDockRunner(
receptor=receptor, ligands=ligands,
Expand All @@ -275,8 +283,8 @@ def run_unidock(
scoring=scoring, num_modes=num_modes,
search_mode=search_mode,
exhaustiveness=exhaustiveness, max_step=max_step,
energy_range=energy_range, refine_step=refine_step, seed=seed,
score_only=score_only, local_only=local_only,
energy_range=energy_range, refine_step=refine_step, seed=seed, bias_file=bias_file,
score_only=score_only, local_only=local_only, multi_bias=multi_bias,
)
result_ligands = runner.run()
scores_list = [UniDockRunner.read_scores(ligand) for ligand in result_ligands]
Expand Down
21 changes: 21 additions & 0 deletions unidock_tools/tests/applications/test_unidock.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ def pocket():
def get_docking_args(testset_name):
receptor = os.path.join(testset_dir_path, testset_name, "protein.pdb")
ligand = os.path.join(testset_dir_path, testset_name, "ligand.sdf")
multi_bias_file = os.path.join(testset_dir_path, testset_name, "ligand.bpf")
with open(os.path.join(testset_dir_path, testset_name, "docking_grid.json")) as f:
pocket = json.load(f)
testset_info = {
"receptor": receptor,
"ligand": ligand,
"pocket": pocket,
"multi_bias_file": multi_bias_file if os.path.exists(multi_bias_file) else None
}
return testset_info

Expand Down Expand Up @@ -119,6 +121,25 @@ def test_unidock_pipeline_multi_pose(receptor, ligand, pocket):
assert -20 <= score <= 0, f"Uni-Dock score not in range: {score}"
shutil.rmtree(results_dir, ignore_errors=True)

def test_unidock_pipeline_multi_bias():
testset_name = "1gkc"
testset_info = get_docking_args(testset_name)
receptor, ligand, pocket, multi_bias_file = testset_info["receptor"], testset_info["ligand"], testset_info["pocket"], testset_info["multi_bias_file"]
results_dir = "unidock_results_multi_bias"
cmd = f"unidocktools unidock_pipeline -r {receptor} -l {ligand} -mbf {multi_bias_file} -sd {results_dir} \
-cx {pocket['center_x']} -cy {pocket['center_y']} -cz {pocket['center_z']} \
-sx {pocket['size_x']} -sy {pocket['size_y']} -sz {pocket['size_z']} \
-sf vina -nm 4 --seed 181129 --multi_bias --debug"
print(cmd)
resp = subprocess.run(cmd, shell=True, capture_output=True, encoding="utf-8")
print(resp.stdout)
assert resp.returncode == 0, f"run unidock pipeline app err:\n{resp.stderr}"

result_file = os.path.join(results_dir, Path(ligand).name)
assert os.path.exists(result_file), f"docking result file not found"

score_list = read_scores(result_file, "docking_score")
shutil.rmtree(results_dir, ignore_errors=True)

@pytest.mark.parametrize("testset_name", testset_name_list)
def test_unidock_pipeline_default_arg(testset_name):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"center_x": 40.2,
"center_y": 14.41,
"center_z": 141.58,
"size_x": 16.51,
"size_y": 16.17,
"size_z": 18.82
}
23 changes: 23 additions & 0 deletions unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.bpf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
x y z Vset r type atom
40.812 14.326 141.745 -1.00 1.49 map C
41.195 15.202 145.465 -1.00 1.41 map N
41.790 14.064 141.049 -1.00 1.40 map OA
39.236 13.318 139.119 -1.00 1.49 map C
39.553 13.856 141.464 -1.00 1.41 map N
38.239 13.830 138.621 -1.00 1.40 map OA
40.410 13.158 138.399 -1.00 1.41 map N
40.934 15.242 142.960 -1.00 1.49 map C
42.023 16.325 142.803 -1.00 1.49 map C
42.041 17.061 141.448 -1.00 1.49 map C
42.352 15.594 146.097 -1.00 1.49 map C
42.472 15.765 147.306 -1.00 1.40 map OA
41.197 14.416 144.234 -1.00 1.49 map C
40.126 14.875 146.326 -1.00 1.40 map OA
39.301 12.719 140.557 -1.00 1.49 map C
38.068 11.859 140.989 -1.00 1.49 map C
41.056 18.229 141.397 -1.00 1.49 map C
43.458 17.566 141.156 -1.00 1.49 map C
36.930 12.732 141.550 -1.00 1.49 map C
37.533 11.023 139.806 -1.00 1.49 map C
38.478 10.844 142.081 -1.00 1.49 map C
40.378 12.708 137.032 -1.00 1.49 map C
107 changes: 107 additions & 0 deletions unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
1GKC ligand
3D

51 50 0 0 1 0 999 V2000
41.0934 14.0383 141.8702 C 0 0 0 0 0 0
41.8346 15.1186 145.4892 N 0 0 0 0 0 0
42.0373 13.3340 141.5168 O 0 0 0 0 0 0
39.7692 13.4251 138.9333 C 0 0 0 0 0 0
39.8590 13.9070 141.3537 N 0 0 0 0 0 0
39.3556 14.5376 138.6133 O 0 0 0 0 0 0
40.4448 12.6120 138.1086 N 0 0 0 0 0 0
41.3173 15.0785 142.9866 C 0 0 2 0 0 0
42.4695 16.0680 142.6365 C 0 0 0 0 0 0
42.3216 16.8916 141.3317 C 0 0 0 0 0 0
43.0333 15.4532 145.9995 C 0 0 0 0 0 0
43.2104 16.1573 146.9924 O 0 0 0 0 0 0
41.6055 14.3010 144.2999 C 0 0 0 0 0 0
40.7153 15.7294 146.0614 O 0 0 0 0 0 0
39.4946 12.8863 140.3562 C 0 0 1 0 0 0
38.0521 12.2924 140.5705 C 0 0 0 0 0 0
41.0356 17.7322 141.3046 C 0 0 0 0 0 0
43.5543 17.7823 141.1169 C 0 0 0 0 0 0
37.7476 11.1469 139.5698 C 0 0 0 0 0 0
37.9569 11.6901 141.9975 C 0 0 0 0 0 0
36.9428 13.3679 140.4439 C 0 0 0 0 0 0
40.7560 12.9302 136.7236 C 0 0 0 0 0 0
39.1127 14.5146 141.6644 H 0 0 0 0 0 0
40.7769 11.7200 138.4563 H 0 0 0 0 0 0
40.4097 15.6628 143.1381 H 0 0 0 0 0 0
43.4128 15.5186 142.6005 H 0 0 0 0 0 0
42.5814 16.7779 143.4562 H 0 0 0 0 0 0
42.2798 16.2014 140.4884 H 0 0 0 0 0 0
43.8506 15.0170 145.4066 H 0 0 0 0 0 0
40.7747 13.6269 144.5204 H 0 0 0 0 0 0
42.4791 13.6614 144.1562 H 0 0 0 0 0 0
40.3227 15.0122 146.5719 H 0 0 0 0 0 0
40.1732 12.0442 140.4999 H 0 0 0 0 0 0
41.0187 18.4113 140.4528 H 0 0 0 0 0 0
40.1600 17.0922 141.2201 H 0 0 0 0 0 0
40.9243 18.3345 142.2044 H 0 0 0 0 0 0
43.4856 18.3455 140.1865 H 0 0 0 0 0 0
43.6791 18.4984 141.9281 H 0 0 0 0 0 0
44.4572 17.1778 141.0688 H 0 0 0 0 0 0
36.7529 10.7289 139.7309 H 0 0 0 0 0 0
37.7777 11.4837 138.5331 H 0 0 0 0 0 0
38.4574 10.3266 139.6757 H 0 0 0 0 0 0
36.9834 11.2303 142.1738 H 0 0 0 0 0 0
38.7088 10.9155 142.1577 H 0 0 0 0 0 0
38.0889 12.4456 142.7733 H 0 0 0 0 0 0
35.9519 12.9339 140.5834 H 0 0 0 0 0 0
37.0478 14.1488 141.1956 H 0 0 0 0 0 0
36.9375 13.8428 139.4628 H 0 0 0 0 0 0
40.9353 12.0097 136.1681 H 0 0 0 0 0 0
39.9408 13.4669 136.2363 H 0 0 0 0 0 0
41.6573 13.5389 136.6646 H 0 0 0 0 0 0
1 3 2 0 0 0
1 5 1 0 0 0
1 8 1 0 0 0
2 11 1 0 0 0
2 13 1 0 0 0
2 14 1 0 0 0
4 6 2 0 0 0
4 7 1 0 0 0
4 15 1 0 0 0
5 15 1 0 0 0
5 23 1 0 0 0
7 22 1 0 0 0
7 24 1 0 0 0
8 9 1 0 0 0
8 13 1 0 0 0
8 25 1 0 0 0
9 10 1 0 0 0
9 26 1 0 0 0
9 27 1 0 0 0
10 17 1 0 0 0
10 18 1 0 0 0
10 28 1 0 0 0
11 12 2 0 0 0
11 29 1 0 0 0
13 30 1 0 0 0
13 31 1 0 0 0
14 32 1 0 0 0
15 16 1 0 0 0
15 33 1 0 0 0
16 19 1 0 0 0
16 20 1 0 0 0
16 21 1 0 0 0
17 34 1 0 0 0
17 35 1 0 0 0
17 36 1 0 0 0
18 37 1 0 0 0
18 38 1 0 0 0
18 39 1 0 0 0
19 40 1 0 0 0
19 41 1 0 0 0
19 42 1 0 0 0
20 43 1 0 0 0
20 44 1 0 0 0
20 45 1 0 0 0
21 46 1 0 0 0
21 47 1 0 0 0
21 48 1 0 0 0
22 49 1 0 0 0
22 50 1 0 0 0
22 51 1 0 0 0
M END
$$$$
Loading

0 comments on commit fef9c34

Please sign in to comment.