Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize conformer comparison algorithm #765

Merged
merged 3 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions arc/species/conformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,15 +519,23 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t
newest_conformers_dict[tor] = list() # Keys are torsions for plotting.
for xyz, energy, dihedral in zip(xyzs, energies, sampling_points):
exists = False
if any([converter.compare_confs(xyz, conf['xyz']) for conf in new_conformers + newest_conformer_list]):
exists = True
dmat1, fl_distance1 = None, None
for conf in new_conformers + newest_conformer_list:
fl_distance1, dmat1, conf, similar = converter.compare_confs_fl(xyz,conf)
if not similar:
break
if converter.compare_confs(xyz, conf['xyz'], skip_conversion=True, dmat1=dmat1, dmat2=conf['dmat']):
exists = True
break
if xyz is not None and energy is not None:
conformer = {'index': len_conformers + len(new_conformers) + len(newest_conformer_list),
'xyz': xyz,
'FF energy': round(energy, 3),
'source': f'Changing dihedrals on most stable conformer, iteration {i}',
'torsion': tor,
'dihedral': round(dihedral, 2)}
'dihedral': round(dihedral, 2),
'dmat': dmat1,
'fl_distance': fl_distance1}
newest_conformers_dict[tor].append(conformer)
if not exists:
newest_conformer_list.append(conformer)
Expand All @@ -541,7 +549,9 @@ def conformers_combinations_by_lowest_conformer(label, mol, base_xyz, multiple_t
'FF energy': None,
'source': f'Changing dihedrals on most stable conformer, iteration {i}, but FF energy is None',
'torsion': tor,
'dihedral': round(dihedral, 2)})
'dihedral': round(dihedral, 2),
'dmat': dmat1,
'fl_distance': fl_distance1})
new_conformers.extend(newest_conformer_list)
if not newest_conformer_list:
newest_conformer_list = [lowest_conf_i]
Expand Down Expand Up @@ -1113,9 +1123,15 @@ def get_lowest_confs(label: str,
for index in range(len(conformer_list)):
if (e is not None and conformer_list[index][energy] > min_e + e) or (n is not None and len(lowest_confs) >= n):
break
if index > 0 and not any([converter.compare_confs(lowest_conf['xyz'], conformer_list[index]['xyz'])
for lowest_conf in lowest_confs]):
lowest_confs.append(conformer_list[index])
if index > 0:
alongd marked this conversation as resolved.
Show resolved Hide resolved
for lowest_conf in lowest_confs:
_, dmat1, lowest_conf, similar = converter.compare_confs_fl(conformer_list[index]['xyz'],lowest_conf)
if not similar or not converter.compare_confs(conformer_list[index]['xyz'], lowest_conf['xyz'],
skip_conversion=True,
dmat1=dmat1,dmat2=lowest_conf['dmat']):
lowest_confs.append(conformer_list[index]) if lowest_conf==lowest_confs[-1] else None
else:
break
return lowest_confs


Expand Down
48 changes: 46 additions & 2 deletions arc/species/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2011,11 +2011,51 @@ def compare_zmats(z1, z2, r_tol=0.01, a_tol=2, d_tol=2, verbose=False, symmetric
symmetric_torsions=symmetric_torsions)


def compare_confs_fl(xyz1: dict,
conf2: dict,
rtol: float = 0.01,
) -> Tuple[float, Optional[np.ndarray], dict, bool]:
"""
Compare two Cartesian coordinates representing conformers using first and last atom distances. If the distances are the same,
the distance matrices are computed and returned.

The relative difference (``rtol`` * fl_distance1) is compared against the absolute difference abs(fl_distance1 - fl_distance2).

Args:
xyz1 (dict): Conformer 1.
conf2 (dict): Conformer 2.
rtol (float): The relative tolerance parameter (see Notes).

Returns:
Tuple containing distances and matrices:
- (fl_distance1, dmat1, conf2, similar): The first and last atom distance of conformer 1, its distance matrix,
conformer 2, and whether the two conformers have almost equal atom distances.
"""
similar = False
conf2['fl_distance'] = conf2.get('fl_distance')
conf2['dmat'] = conf2.get('dmat')
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(conf2['xyz'])
dmat1 = None
fl_distance1 = np.linalg.norm(np.array(xyz1['coords'][0]) - np.array(xyz1['coords'][-1]))
if conf2['fl_distance'] is None:
conf2['fl_distance'] = np.linalg.norm(np.array(xyz2['coords'][0]) - np.array(xyz2['coords'][-1]))
if not np.isclose(fl_distance1, conf2['fl_distance'], rtol=rtol):
return fl_distance1, dmat1, conf2, similar
similar = True
dmat1 = xyz_to_dmat(xyz1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we compute dmat1 in this function?
This function also doesn't use conf2['dmat'], maybe we can simplify it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of this function compare_confs_fl, is as following:

  1. To compare two Cartesian coordinates representing conformers by examining the distances between the first and last atoms.
  2. Distance matrices are computed and returned only if these distances are identical.

Considering potential modifications to the second purpose of this process, here are three options:

  1. Rename the function to compare_confs_pre to indicate that it performs a preliminary, quicker comparison before the more resource-intensive compare_confs calculation.
  2. Create a new function to do this calculation.
  3. Merge this part to compare_confs, though feasible, I do not recommend this due to the complexity it would add, especially in returning the variable conf2, which could complicate the primary function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

if conf2['dmat'] is None:
conf2['dmat'] = xyz_to_dmat(xyz2)
return fl_distance1, dmat1, conf2, similar


def compare_confs(xyz1: dict,
xyz2: dict,
rtol: float = 0.01,
atol: float = 0.1,
rmsd_score: bool = False,
skip_conversion: bool = False,
dmat1: Optional[np.ndarray] = None,
dmat2: Optional[np.ndarray] = None,
) -> Union[float, bool]:
"""
Compare two Cartesian coordinates representing conformers using distance matrices.
Expand All @@ -2029,15 +2069,19 @@ def compare_confs(xyz1: dict,
rtol (float): The relative tolerance parameter (see Notes).
atol (float): The absolute tolerance parameter (see Notes).
rmsd_score (bool): Whether to output a root-mean-square deviation score of the two distance matrices.
skip_conversion (bool): Whether to skip converting xyz to distance matrices.
dmat1 (np.ndarray, optional): The distance matrix of conformer 1.
dmat2 (np.ndarray, optional): The distance matrix of conformer 2.

Returns:
Union[float, bool]:
- If ``rmsd_score`` is ``False`` (default): Whether the two conformers have almost equal atom distances.
``True`` if they do.
- If ``rmsd_score`` is ``True``: The RMSD score of two distance matrices.
"""
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2)
dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2)
if not skip_conversion:
xyz1, xyz2 = check_xyz_dict(xyz1), check_xyz_dict(xyz2)
dmat1, dmat2 = xyz_to_dmat(xyz1), xyz_to_dmat(xyz2)
if rmsd_score:
# Distance matrix is symmetric, only need the upper triangular part to compute rmsd.
rmsd = calc_rmsd(np.triu(dmat1), np.triu(dmat2))
Expand Down
Loading