feat:support bias (#131)

dptech-corp · May 15, 2024 · fef9c34 · fef9c34
1 parent 6c4c72d
commit fef9c34
Show file tree

Hide file tree

Showing 7 changed files with 2,650 additions and 7 deletions.
diff --git a/unidock_tools/src/unidock_tools/application/unidock_pipeline.py b/unidock_tools/src/unidock_tools/application/unidock_pipeline.py
@@ -44,6 +44,10 @@
     "local_only": False,
     "seed": 181129,
     "debug": False,
+    "bias_file": None,
+    "multi_bias": False,
+    "multi_bias_file": None,
+    "multi_bias_index": None,
 }
 
 
@@ -57,6 +61,8 @@ def __init__(self,
                  size_x: float = 22.5,
                  size_y: float = 22.5,
                  size_z: float = 22.5,
+                 bias_file: Optional[Path] = None,
+                 multi_bias_files: List[Path] = [],
                  workdir: Path = Path("docking_pipeline"),
                  ):
         """
@@ -87,6 +93,8 @@ def __init__(self,
         self.receptor = receptor
         self.mols = sum([read_ligand(ligand) for ligand in ligands], [])
         self.mols = [PropertyMol(mol) for mol in self.mols]
+        self.bias_file = bias_file
+        self.multi_bias_files_dict = {f.stem: f for f in multi_bias_files}
         self.center_x = center_x
         self.center_y = center_y
         self.center_z = center_z
@@ -127,11 +135,20 @@ def prepare_topology_sdf(self, mol_list: List[Chem.Mol], savedir: Path) -> List[
         return prepared_sdf_list
 
     @time_logger
-    def init_docking_data(self, input_dir: Path, batch_size: int = 20):
+    def init_docking_data(self, input_dir: Path, multi_bias: bool = False, batch_size: int = 20):
         real_batch_size = math.ceil(len(self.mols) / math.ceil(len(self.mols) / batch_size))
         batched_mol_list = [self.mols[i:i+real_batch_size] for i in range(0, len(self.mols), real_batch_size)]
         for one_batch_mol_list in batched_mol_list:
             input_list = self.prepare_topology_sdf(one_batch_mol_list, input_dir)
+            if multi_bias:
+                for input_file in input_list:
+                    filename = input_file.stem
+                    logging.info(filename)
+                    logging.info(self.multi_bias_files_dict)
+                    if filename in self.multi_bias_files_dict:
+                        shutil.copyfile(self.multi_bias_files_dict[filename], os.path.join(input_dir, f"{filename}.bpf"))
+                    else:
+                        logging.warning(f"Cannot find bias file in multi-bias mode for {filename}")
             yield input_list
 
     @time_logger
@@ -148,6 +165,7 @@ def docking(self,
                 batch_size: int = 1200,
                 score_only: bool = False,
                 local_only: bool = False,
+                multi_bias: bool = False,
                 score_name: str = "docking_score",
                 docking_dir_name : str = "docking_dir",
                 topn: int = 10,
@@ -157,6 +175,7 @@ def docking(self,
         for ligand_list in self.init_docking_data(
                 input_dir=input_dir,
                 batch_size=batch_size,
+                multi_bias=multi_bias,
         ):
             # Run docking
             ligands, scores_list = run_unidock(
@@ -165,8 +184,8 @@ def docking(self,
                 size_x=self.size_x, size_y=self.size_y, size_z=self.size_z,
                 scoring=scoring_function, num_modes=num_modes,
                 search_mode=search_mode, exhaustiveness=exhaustiveness, max_step=max_step, 
-                seed=seed, refine_step=refine_step, energy_range=energy_range,
-                score_only=score_only, local_only=local_only,
+                seed=seed, refine_step=refine_step, energy_range=energy_range, bias_file=self.bias_file,
+                score_only=score_only, local_only=local_only, multi_bias=multi_bias,
             )
             self.postprocessing(ligand_scores_list=zip(ligands, scores_list), 
                                 save_dir=save_dir,
@@ -222,6 +241,33 @@ def main(args: dict):
         exit(1)
     logging.info(f"[UniDock Pipeline] {len(ligands)} ligands found.")
 
+    bias_file = None
+    if args.get("bias_file"):
+        bias_file = Path(args["bias_file"]).resolve()
+        if not bias_file.exists():
+            logging.error(f"Cannot find {bias_file}")
+            exit(1)
+
+    multi_bias_file_list = []
+    if args["multi_bias"]:
+        if args.get("multi_bias_file"):
+            for multi_bias_file in args["multi_bias_file"]:
+                if not Path(multi_bias_file).exists():
+                    logging.error(f"Cannot find {multi_bias_file}")
+                    continue
+                multi_bias_file_list.append(Path(multi_bias_file).resolve())
+        elif args.get("multi_bias_index"):
+            with open(args["multi_bias_index"], "r") as f:
+                index_content = f.read()
+            index_lines1 = [line.strip() for line in index_content.split("\n") if line.strip()]
+            index_lines2 = [line.strip() for line in index_content.split(" ") if line.strip()]
+            multi_bias_file_list.extend(index_lines2 if len(index_lines2) > len(index_lines1) else index_lines1)
+            multi_bias_file_list = [Path(multi_bias_file).resolve() for multi_bias_file in multi_bias_file_list if Path(multi_bias_file).exists()]
+
+        if len(multi_bias_file_list) != len(ligands):
+            logging.error("Number of ligands and bias files should be equal in multi-bias mode.")
+            exit(1)
+
     logging.info("[UniDock Pipeline] Start")
     start_time = time.time()
     runner = UniDock(
@@ -233,7 +279,9 @@ def main(args: dict):
         size_x=float(args["size_x"]),
         size_y=float(args["size_y"]),
         size_z=float(args["size_z"]),
-        workdir=workdir
+        bias_file=bias_file,
+        multi_bias_files=multi_bias_file_list,
+        workdir=workdir,
     )
     logging.info("[UniDock Pipeline] Start docking")
     runner.docking(
@@ -249,6 +297,7 @@ def main(args: dict):
         batch_size=int(args["batch_size"]),
         score_only=bool(args["score_only"]),
         local_only=bool(args["local_only"]),
+        multi_bias=bool(args["multi_bias"]),
         score_name="docking_score",
         docking_dir_name="docking_dir",
         topn=int(args["topn"]),
@@ -268,6 +317,12 @@ def get_parser() -> argparse.ArgumentParser:
                         help="Ligand file in sdf format. Specify multiple files separated by commas.")
     parser.add_argument("-i", "--ligand_index", type=str, default=None,
                         help="A text file containing the path of ligand files in sdf format.")
+    parser.add_argument("-b", "--bias_file", type=str, default=None,
+                        help="Bias file in bpf format. Default: None.")
+    parser.add_argument("-mbf", "--multi_bias_file", type=lambda s: s.split(','), default=None,
+                        help="multi Bias file in bpf format separated by commas. Number should be equal to ligands. Default: None.")
+    parser.add_argument("-mbi", "--multi_bias_index", type=str, default=None,
+                        help="A text file containing the path of multi bias files in bpf format. Number should be equal to ligands. Default: None.")
 
     parser.add_argument("-cx", "--center_x", type=float, required=True,
                         help="X-coordinate of the docking box center.")
@@ -317,6 +372,8 @@ def get_parser() -> argparse.ArgumentParser:
                         help="Whether to use score_only mode.")
     parser.add_argument("--local_only", action="store_true",
                         help="Whether to use local_only mode.")
+    parser.add_argument("--multi_bias", action="store_true",
+                        help="Whether to use multi_bias mode.")
 
     parser.add_argument("--seed", type=int, default=181129, 
                         help="Uni-Dock random seed")

diff --git a/unidock_tools/src/unidock_tools/modules/docking/unidock.py b/unidock_tools/src/unidock_tools/modules/docking/unidock.py
@@ -27,9 +27,11 @@ def __init__(self,
                  max_step: int = 10,
                  energy_range: float = 3.0,
                  refine_step: int = 5,
+                 bias_file: Optional[Path] = None,
                  seed : int = 181129,
                  score_only: bool = False,
-                 local_only: bool = False
+                 local_only: bool = False,
+                 multi_bias: bool = False,
                  ):
         self.mgltools_python_path = ""
         self.prepare_gpf4_script_path = ""
@@ -86,10 +88,14 @@ def __init__(self,
             "--verbosity", "2",
             "--keep_nonpolar_H",
         ]
+        if bias_file:
+            cmd += ["--bias", str(bias_file)]
         if score_only:
             cmd.append("--score_only")
         if local_only:
             cmd.append("--local_only")
+        if multi_bias:
+            cmd.append("--multi_bias")
 
         logging.info(f"unidock cmd: {' '.join(cmd)}")
         self.cmd = cmd
@@ -263,9 +269,11 @@ def run_unidock(
         max_step: int = 10,
         energy_range: float = 3.0,
         refine_step: int = 5,
+        bias_file: Optional[Path] = None,
         seed: int = 181129,
         score_only: bool = False,
         local_only: bool = False,
+        multi_bias: bool = False,
 ) -> Tuple[List[Path], List[List[float]]]:
     runner = UniDockRunner(
         receptor=receptor, ligands=ligands,
@@ -275,8 +283,8 @@ def run_unidock(
         scoring=scoring, num_modes=num_modes,
         search_mode=search_mode,
         exhaustiveness=exhaustiveness, max_step=max_step,
-        energy_range=energy_range, refine_step=refine_step, seed=seed,
-        score_only=score_only, local_only=local_only,
+        energy_range=energy_range, refine_step=refine_step, seed=seed, bias_file=bias_file,
+        score_only=score_only, local_only=local_only, multi_bias=multi_bias,
     )
     result_ligands = runner.run()
     scores_list = [UniDockRunner.read_scores(ligand) for ligand in result_ligands]

diff --git a/unidock_tools/tests/applications/test_unidock.py b/unidock_tools/tests/applications/test_unidock.py
@@ -34,12 +34,14 @@ def pocket():
 def get_docking_args(testset_name):
     receptor = os.path.join(testset_dir_path, testset_name, "protein.pdb")
     ligand = os.path.join(testset_dir_path, testset_name, "ligand.sdf")
+    multi_bias_file = os.path.join(testset_dir_path, testset_name, "ligand.bpf")
     with open(os.path.join(testset_dir_path, testset_name, "docking_grid.json")) as f:
         pocket = json.load(f)
     testset_info = {
         "receptor": receptor,
         "ligand": ligand,
         "pocket": pocket,
+        "multi_bias_file": multi_bias_file if os.path.exists(multi_bias_file) else None
     }
     return testset_info
 
@@ -119,6 +121,25 @@ def test_unidock_pipeline_multi_pose(receptor, ligand, pocket):
         assert -20 <= score <= 0, f"Uni-Dock score not in range: {score}"
     shutil.rmtree(results_dir, ignore_errors=True)
 
+def test_unidock_pipeline_multi_bias():
+    testset_name = "1gkc"
+    testset_info = get_docking_args(testset_name)
+    receptor, ligand, pocket, multi_bias_file = testset_info["receptor"], testset_info["ligand"], testset_info["pocket"], testset_info["multi_bias_file"]
+    results_dir = "unidock_results_multi_bias"
+    cmd = f"unidocktools unidock_pipeline -r {receptor} -l {ligand} -mbf {multi_bias_file} -sd {results_dir} \
+            -cx {pocket['center_x']} -cy {pocket['center_y']} -cz {pocket['center_z']} \
+            -sx {pocket['size_x']} -sy {pocket['size_y']} -sz {pocket['size_z']} \
+            -sf vina -nm 4 --seed 181129 --multi_bias --debug"
+    print(cmd)
+    resp = subprocess.run(cmd, shell=True, capture_output=True, encoding="utf-8")
+    print(resp.stdout)
+    assert resp.returncode == 0, f"run unidock pipeline app err:\n{resp.stderr}"
+
+    result_file = os.path.join(results_dir, Path(ligand).name)
+    assert os.path.exists(result_file), f"docking result file not found"
+
+    score_list = read_scores(result_file, "docking_score")
+    shutil.rmtree(results_dir, ignore_errors=True)
 
 @pytest.mark.parametrize("testset_name", testset_name_list)
 def test_unidock_pipeline_default_arg(testset_name):

diff --git a/unidock_tools/tests/inputs/unidock_pipeline/1gkc/docking_grid.json b/unidock_tools/tests/inputs/unidock_pipeline/1gkc/docking_grid.json
@@ -0,0 +1,8 @@
+{
+    "center_x": 40.2, 
+    "center_y": 14.41, 
+    "center_z": 141.58,
+    "size_x": 16.51, 
+    "size_y": 16.17, 
+    "size_z": 18.82
+}
diff --git a/unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.bpf b/unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.bpf
@@ -0,0 +1,23 @@
+x y z Vset r type atom
+40.812 14.326 141.745  -1.00   1.49 map C 
+41.195 15.202 145.465  -1.00   1.41 map N 
+41.790 14.064 141.049  -1.00   1.40 map OA
+39.236 13.318 139.119  -1.00   1.49 map C 
+39.553 13.856 141.464  -1.00   1.41 map N 
+38.239 13.830 138.621  -1.00   1.40 map OA
+40.410 13.158 138.399  -1.00   1.41 map N 
+40.934 15.242 142.960  -1.00   1.49 map C 
+42.023 16.325 142.803  -1.00   1.49 map C 
+42.041 17.061 141.448  -1.00   1.49 map C 
+42.352 15.594 146.097  -1.00   1.49 map C 
+42.472 15.765 147.306  -1.00   1.40 map OA
+41.197 14.416 144.234  -1.00   1.49 map C 
+40.126 14.875 146.326  -1.00   1.40 map OA
+39.301 12.719 140.557  -1.00   1.49 map C 
+38.068 11.859 140.989  -1.00   1.49 map C 
+41.056 18.229 141.397  -1.00   1.49 map C 
+43.458 17.566 141.156  -1.00   1.49 map C 
+36.930 12.732 141.550  -1.00   1.49 map C 
+37.533 11.023 139.806  -1.00   1.49 map C 
+38.478 10.844 142.081  -1.00   1.49 map C 
+40.378 12.708 137.032  -1.00   1.49 map C 
diff --git a/unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.sdf b/unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.sdf
@@ -0,0 +1,107 @@
+1GKC ligand
+                    3D
+
+ 51 50  0  0  1  0            999 V2000
+   41.0934   14.0383  141.8702 C   0  0  0  0  0  0
+   41.8346   15.1186  145.4892 N   0  0  0  0  0  0
+   42.0373   13.3340  141.5168 O   0  0  0  0  0  0
+   39.7692   13.4251  138.9333 C   0  0  0  0  0  0
+   39.8590   13.9070  141.3537 N   0  0  0  0  0  0
+   39.3556   14.5376  138.6133 O   0  0  0  0  0  0
+   40.4448   12.6120  138.1086 N   0  0  0  0  0  0
+   41.3173   15.0785  142.9866 C   0  0  2  0  0  0
+   42.4695   16.0680  142.6365 C   0  0  0  0  0  0
+   42.3216   16.8916  141.3317 C   0  0  0  0  0  0
+   43.0333   15.4532  145.9995 C   0  0  0  0  0  0
+   43.2104   16.1573  146.9924 O   0  0  0  0  0  0
+   41.6055   14.3010  144.2999 C   0  0  0  0  0  0
+   40.7153   15.7294  146.0614 O   0  0  0  0  0  0
+   39.4946   12.8863  140.3562 C   0  0  1  0  0  0
+   38.0521   12.2924  140.5705 C   0  0  0  0  0  0
+   41.0356   17.7322  141.3046 C   0  0  0  0  0  0
+   43.5543   17.7823  141.1169 C   0  0  0  0  0  0
+   37.7476   11.1469  139.5698 C   0  0  0  0  0  0
+   37.9569   11.6901  141.9975 C   0  0  0  0  0  0
+   36.9428   13.3679  140.4439 C   0  0  0  0  0  0
+   40.7560   12.9302  136.7236 C   0  0  0  0  0  0
+   39.1127   14.5146  141.6644 H   0  0  0  0  0  0
+   40.7769   11.7200  138.4563 H   0  0  0  0  0  0
+   40.4097   15.6628  143.1381 H   0  0  0  0  0  0
+   43.4128   15.5186  142.6005 H   0  0  0  0  0  0
+   42.5814   16.7779  143.4562 H   0  0  0  0  0  0
+   42.2798   16.2014  140.4884 H   0  0  0  0  0  0
+   43.8506   15.0170  145.4066 H   0  0  0  0  0  0
+   40.7747   13.6269  144.5204 H   0  0  0  0  0  0
+   42.4791   13.6614  144.1562 H   0  0  0  0  0  0
+   40.3227   15.0122  146.5719 H   0  0  0  0  0  0
+   40.1732   12.0442  140.4999 H   0  0  0  0  0  0
+   41.0187   18.4113  140.4528 H   0  0  0  0  0  0
+   40.1600   17.0922  141.2201 H   0  0  0  0  0  0
+   40.9243   18.3345  142.2044 H   0  0  0  0  0  0
+   43.4856   18.3455  140.1865 H   0  0  0  0  0  0
+   43.6791   18.4984  141.9281 H   0  0  0  0  0  0
+   44.4572   17.1778  141.0688 H   0  0  0  0  0  0
+   36.7529   10.7289  139.7309 H   0  0  0  0  0  0
+   37.7777   11.4837  138.5331 H   0  0  0  0  0  0
+   38.4574   10.3266  139.6757 H   0  0  0  0  0  0
+   36.9834   11.2303  142.1738 H   0  0  0  0  0  0
+   38.7088   10.9155  142.1577 H   0  0  0  0  0  0
+   38.0889   12.4456  142.7733 H   0  0  0  0  0  0
+   35.9519   12.9339  140.5834 H   0  0  0  0  0  0
+   37.0478   14.1488  141.1956 H   0  0  0  0  0  0
+   36.9375   13.8428  139.4628 H   0  0  0  0  0  0
+   40.9353   12.0097  136.1681 H   0  0  0  0  0  0
+   39.9408   13.4669  136.2363 H   0  0  0  0  0  0
+   41.6573   13.5389  136.6646 H   0  0  0  0  0  0
+  1  3  2  0  0  0
+  1  5  1  0  0  0
+  1  8  1  0  0  0
+  2 11  1  0  0  0
+  2 13  1  0  0  0
+  2 14  1  0  0  0
+  4  6  2  0  0  0
+  4  7  1  0  0  0
+  4 15  1  0  0  0
+  5 15  1  0  0  0
+  5 23  1  0  0  0
+  7 22  1  0  0  0
+  7 24  1  0  0  0
+  8  9  1  0  0  0
+  8 13  1  0  0  0
+  8 25  1  0  0  0
+  9 10  1  0  0  0
+  9 26  1  0  0  0
+  9 27  1  0  0  0
+ 10 17  1  0  0  0
+ 10 18  1  0  0  0
+ 10 28  1  0  0  0
+ 11 12  2  0  0  0
+ 11 29  1  0  0  0
+ 13 30  1  0  0  0
+ 13 31  1  0  0  0
+ 14 32  1  0  0  0
+ 15 16  1  0  0  0
+ 15 33  1  0  0  0
+ 16 19  1  0  0  0
+ 16 20  1  0  0  0
+ 16 21  1  0  0  0
+ 17 34  1  0  0  0
+ 17 35  1  0  0  0
+ 17 36  1  0  0  0
+ 18 37  1  0  0  0
+ 18 38  1  0  0  0
+ 18 39  1  0  0  0
+ 19 40  1  0  0  0
+ 19 41  1  0  0  0
+ 19 42  1  0  0  0
+ 20 43  1  0  0  0
+ 20 44  1  0  0  0
+ 20 45  1  0  0  0
+ 21 46  1  0  0  0
+ 21 47  1  0  0  0
+ 21 48  1  0  0  0
+ 22 49  1  0  0  0
+ 22 50  1  0  0  0
+ 22 51  1  0  0  0
+M  END
+$$$$