From ab204256d5f98643c2a391ab3b3a20ca12b49bec Mon Sep 17 00:00:00 2001 From: Jintao Date: Tue, 20 Aug 2024 10:51:58 +0800 Subject: [PATCH 1/3] Define `compare_confs_fl` as a cheaper preprocess for `compare_confs` Compare two Cartesian coordinates representing conformers using first and last atom distances. If the `fl_distances` are the similar, the distance matrices are computed and returned. --- arc/species/converter.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arc/species/converter.py b/arc/species/converter.py index 101bd04be6..e0955a2396 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -2011,6 +2011,43 @@ def compare_zmats(z1, z2, r_tol=0.01, a_tol=2, d_tol=2, verbose=False, symmetric symmetric_torsions=symmetric_torsions) +def compare_confs_fl(xyz1: dict, + conf2: dict, + rtol: float = 0.01, + ) -> Tuple[float, Optional[np.ndarray], dict, bool]: + """ + Compare two Cartesian coordinates representing conformers using first and last atom distances. If the distances are the same, + the distance matrices are computed and returned. + + The relative difference (``rtol`` * fl_distance1) is compared against the absolute difference abs(fl_distance1 - fl_distance2). + + Args: + xyz1 (dict): Conformer 1. + conf2 (dict): Conformer 2. + rtol (float): The relative tolerance parameter (see Notes). + + Returns: + Tuple containing distances and matrices: + - (fl_distance1, dmat1, conf2, similar): The first and last atom distance of conformer 1, its distance matrix, + conformer 2, and whether the two conformers have almost equal atom distances. + """ + similar = False + conf2['fl_distance'] = conf2.get('fl_distance') + conf2['dmat'] = conf2.get('dmat') + xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(conf2['xyz']) + dmat1 = None + fl_distance1 = np.linalg.norm(np.array(xyz1['coords'][0]) - np.array(xyz1['coords'][-1])) + if conf2['fl_distance'] is None: + conf2['fl_distance'] = np.linalg.norm(np.array(xyz2['coords'][0]) - np.array(xyz2['coords'][-1])) + if not np.isclose(fl_distance1, conf2['fl_distance'], rtol=rtol): + return fl_distance1, dmat1, conf2, similar + similar = True + dmat1 = xyz_to_dmat(xyz1) + if conf2['dmat'] is None: + conf2['dmat'] = xyz_to_dmat(xyz2) + return fl_distance1, dmat1, conf2, similar + + def compare_confs(xyz1: dict, xyz2: dict, rtol: float = 0.01, From 2418f649f5ab2a6b8bd91eae0654c8d306d47fc7 Mon Sep 17 00:00:00 2001 From: Jintao Date: Tue, 20 Aug 2024 10:54:16 +0800 Subject: [PATCH 2/3] Modify `compare_confs` for different cases If we don't need the conversion, meaning `dmat1` and `dmat2` are both given, we can start the subsequent steps directly. --- arc/species/converter.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arc/species/converter.py b/arc/species/converter.py index e0955a2396..50718e9487 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -2053,6 +2053,9 @@ def compare_confs(xyz1: dict, rtol: float = 0.01, atol: float = 0.1, rmsd_score: bool = False, + skip_conversion: bool = False, + dmat1: Optional[np.ndarray] = None, + dmat2: Optional[np.ndarray] = None, ) -> Union[float, bool]: """ Compare two Cartesian coordinates representing conformers using distance matrices. @@ -2066,6 +2069,9 @@ def compare_confs(xyz1: dict, rtol (float): The relative tolerance parameter (see Notes). atol (float): The absolute tolerance parameter (see Notes). rmsd_score (bool): Whether to output a root-mean-square deviation score of the two distance matrices. + skip_conversion (bool): Whether to skip converting xyz to distance matrices. + dmat1 (np.ndarray, optional): The distance matrix of conformer 1. + dmat2 (np.ndarray, optional): The distance matrix of conformer 2. Returns: Union[float, bool]: @@ -2073,8 +2079,9 @@ def compare_confs(xyz1: dict, ``True`` if they do. - If ``rmsd_score`` is ``True``: The RMSD score of two distance matrices. """ - xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2) - dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2) + if not skip_conversion: + xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2) + dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2) if rmsd_score: # Distance matrix is symmetric, only need the upper triangular part to compute rmsd. rmsd = calc_rmsd(np.triu(dmat1), np.triu(dmat2)) From 693b2f3334afd290630492ceab3b5b293238cc24 Mon Sep 17 00:00:00 2001 From: Jintao Date: Tue, 20 Aug 2024 10:55:48 +0800 Subject: [PATCH 3/3] Modify the conformer comparison logic for efficiency Save `fl_distance` and `dmat` to each of the `conformer` for efficient processing. --- arc/species/conformers.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/arc/species/conformers.py b/arc/species/conformers.py index 239c8267ab..bc077cb801 100644 --- a/arc/species/conformers.py +++ b/arc/species/conformers.py @@ -519,15 +519,23 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t newest_conformers_dict[tor] = list() # Keys are torsions for plotting. for xyz, energy, dihedral in zip(xyzs, energies, sampling_points): exists = False - if any([converter.compare_confs(xyz, conf['xyz']) for conf in new_conformers + newest_conformer_list]): - exists = True + dmat1, fl_distance1 = None, None + for conf in new_conformers + newest_conformer_list: + fl_distance1, dmat1, conf, similar = converter.compare_confs_fl(xyz,conf) + if not similar: + break + if converter.compare_confs(xyz, conf['xyz'], skip_conversion=True, dmat1=dmat1, dmat2=conf['dmat']): + exists = True + break if xyz is not None and energy is not None: conformer = {'index': len_conformers + len(new_conformers) + len(newest_conformer_list), 'xyz': xyz, 'FF energy': round(energy, 3), 'source': f'Changing dihedrals on most stable conformer, iteration {i}', 'torsion': tor, - 'dihedral': round(dihedral, 2)} + 'dihedral': round(dihedral, 2), + 'dmat': dmat1, + 'fl_distance': fl_distance1} newest_conformers_dict[tor].append(conformer) if not exists: newest_conformer_list.append(conformer) @@ -541,7 +549,9 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t 'FF energy': None, 'source': f'Changing dihedrals on most stable conformer, iteration {i}, but FF energy is None', 'torsion': tor, - 'dihedral': round(dihedral, 2)}) + 'dihedral': round(dihedral, 2), + 'dmat': dmat1, + 'fl_distance': fl_distance1}) new_conformers.extend(newest_conformer_list) if not newest_conformer_list: newest_conformer_list = [lowest_conf_i] @@ -1113,9 +1123,15 @@ def get_lowest_confs(label: str, for index in range(len(conformer_list)): if (e is not None and conformer_list[index][energy] > min_e + e) or (n is not None and len(lowest_confs) >= n): break - if index > 0 and not any([converter.compare_confs(lowest_conf['xyz'], conformer_list[index]['xyz']) - for lowest_conf in lowest_confs]): - lowest_confs.append(conformer_list[index]) + if index > 0: + for lowest_conf in lowest_confs: + _, dmat1, lowest_conf, similar = converter.compare_confs_fl(conformer_list[index]['xyz'],lowest_conf) + if not similar or not converter.compare_confs(conformer_list[index]['xyz'], lowest_conf['xyz'], + skip_conversion=True, + dmat1=dmat1,dmat2=lowest_conf['dmat']): + lowest_confs.append(conformer_list[index]) if lowest_conf==lowest_confs[-1] else None + else: + break return lowest_confs