diff --git a/aiidalab_widgets_base/structures.py b/aiidalab_widgets_base/structures.py index 055215b63..ec9f286c3 100644 --- a/aiidalab_widgets_base/structures.py +++ b/aiidalab_widgets_base/structures.py @@ -772,7 +772,10 @@ def _rdkit_opt(self, smiles, steps): return None mol = Chem.AddHs(mol) - AllChem.EmbedMolecule(mol, maxAttempts=20, randomSeed=42) + conf_id = AllChem.EmbedMolecule(mol, maxAttempts=20, randomSeed=42) + if conf_id < 0: + self.output.value = "RDKit ERROR: Could not generate conformer" + return None if AllChem.UFFHasAllMoleculeParams(mol): AllChem.UFFOptimizeMolecule(mol, maxIters=steps) else: @@ -785,8 +788,18 @@ def _rdkit_opt(self, smiles, steps): def _mol_from_smiles(self, smiles, steps=1000): """Convert SMILES to ase structure try rdkit then pybel""" + + # Canonicalize the SMILES code + # https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system#Terminology + canonical_smiles = self.canonicalize_smiles(smiles) + if not canonical_smiles: + return None + + if canonical_smiles != smiles: + self.output.value = f"Canonical SMILES: {canonical_smiles}" + try: - return self._rdkit_opt(smiles, steps) + return self._rdkit_opt(canonical_smiles, steps) except ValueError as e: self.output.value = str(e) if self.disable_openbabel: @@ -802,11 +815,27 @@ def _on_button_pressed(self, change=None): return spinner = f"Screening possible conformers {self.SPINNER}" # font-size:20em; self.output.value = spinner + self.structure = self._mol_from_smiles(self.smiles.value) # Don't overwrite possible error/warning messages if self.output.value == spinner: self.output.value = "" + def canonicalize_smiles(self, smiles): + from rdkit import Chem + + mol = Chem.MolFromSmiles(smiles, sanitize=True) + if mol is None: + # Something is seriously wrong with the SMILES code, + # just return None and don't attempt anything else. + self.output.value = "RDkit ERROR: Invalid SMILES string" + return None + canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True) + if not canonical_smiles: + self.output.value = "RDkit ERROR: Could not canonicalize SMILES" + return None + return canonical_smiles + @tl.default("structure") def _default_structure(self): return None diff --git a/tests/test_structures.py b/tests/test_structures.py index 0fafd5d4c..f9119ddc8 100644 --- a/tests/test_structures.py +++ b/tests/test_structures.py @@ -157,6 +157,27 @@ def test_smiles_widget(): assert widget.structure.get_chemical_formula() == "N2" +@pytest.mark.usefixtures("aiida_profile_clean") +def test_smiles_canonicalization(): + """Test the SMILES canonicalization via RdKit.""" + widget = awb.SmilesWidget() + + # Should not change canonical smiles + assert widget.canonicalize_smiles("C") == "C" + + # Should canonicalize this + canonical = widget.canonicalize_smiles("O=CC=C") + assert canonical == "C=CC=O" + + # Should be idempotent + assert canonical == widget.canonicalize_smiles(canonical) + + # Regression test for https://github.com/aiidalab/aiidalab-widgets-base/issues/505 + # Throwing in this non-canonical string should not raise + nasty_smiles = "C=CC1=C(C2=CC=C(C3=CC=CC=C3)C=C2)C=C(C=C)C(C4=CC=C(C(C=C5)=CC=C5C(C=C6C=C)=C(C=C)C=C6C7=CC=C(C(C=C8)=CC=C8C(C=C9C=C)=C(C=C)C=C9C%10=CC=CC=C%10)C=C7)C=C4)=C1" + widget._rdkit_opt(nasty_smiles, steps=1) + + @pytest.mark.usefixtures("aiida_profile_clean") def test_basic_cell_editor_widget(structure_data_object): """Test the `BasicCellEditor`."""