ReactionMechanismGenerator · rwest · Jun 9, 2023 · Jun 1, 2023 · Jun 1, 2023 · Jun 1, 2023
diff --git a/documentation/source/reference/molecule/adjlist.rst b/documentation/source/reference/molecule/adjlist.rst
@@ -34,8 +34,8 @@ alphanumeric characters and the underscore, as if an identifier in many popular
 programming languages. However, strictly speaking any non-space ASCII character
 is allowed.
 
-The subsequent lines may contain keyword-value pairs. Currently there is only
-one keyword, ``multiplicity``.
+The subsequent lines may contain keyword-value pairs. Currently there can be: 
+``multiplicity``, ``metal`` and ``facet``.
 
 For species or molecule declarations, the value after ``multiplicity`` defines 
 the spin multiplicity of the molecule. E.g. ``multiplicity 1`` for most ground state 
@@ -44,7 +44,7 @@ and ``multiplicity 3`` for a triplet biradical.
 If the ``multiplicity`` line is not present then a value of 
 (1 + number of unpaired electrons) is assumed. 
 Thus, it can usually be omitted, but if present can be used to distinguish,
-for example, singlet CH2 from triplet CH2.
+for example, singlet CH2 from triplet CH2. 
 
 If defining a Functional :class:`~rmgpy.molecule.Group`, then the value must be a list,
 which defines the multiplicities that will be matched by the group, eg. 
@@ -53,6 +53,9 @@ If a wildcard is desired, the line ``'multiplicity x`` can be used instead to ac
 all multiplicities.  If the multiplicity line is omitted altogether, then a wildcard 
 is assumed.
 
+``metal`` and ``facet`` work similarly and will correspond to lines like ``metal Fe``, 
+``metal [Fe,Cu,Ag]``, ``facet 111``, ``facet [111,211,110]``. 
+
 e.g. the following two group adjlists represent identical groups. ::
 
     group1
@@ -69,7 +72,7 @@ each subsequent line describes a single atom and its
 local bond structure. The format of these lines is a whitespace-delimited list
 with tokens ::
 
-    <number> [<label>] <element> u<unpaired> [p<pairs>] [c<charge>] <bondlist>
+    <number> [<label>] <element> u<unpaired> [p<pairs>] [c<charge>] [s<site>] [m<morphology>] <bondlist>
 
 The first item is the number used to identify that atom. Any number may be used,
 though it is recommended to number the atoms sequentially starting from one.
@@ -86,6 +89,8 @@ a sequence of tokens describing the electronic state of the atom:
 * ``p0`` number of lone **pairs** of electrons, common on oxygen and nitrogen.
 * ``c0`` formal **charge** on the atom, e.g. ``c-1`` (negatively charged),
   ``c0``, ``c+1`` (positively charged)
+* ``s`` the site type a site atom is e.g. ``s"fcc"``
+* ``m`` the morphology of a site atom e.g. ``m"terrace"``
 
 For :class:`~rmgpy.molecule.Molecule` definitions:
 The value must be a single integer (and for charge must have a + or - sign if not equal to 0)
@@ -94,7 +99,7 @@ The number of lone pairs and the formal charge are assumed to be zero if omitted
 
 For :class:`~rmgpy.molecule.Group` definitions:
 The value can be an integer or a list of integers (with signs, for charges),
-eg. ``u[0,1,2]`` or ``c[0,+1,+2,+3,+4]``, or may be a wildcard ``x`` 
+eg. ``u[0,1,2]``, ``c[0,+1,+2,+3,+4]``, or ``s["hcp","fcc"] or may be a wildcard ``x`` 
 which matches any valid value, 
 eg. ``px`` is the same as ``p[0,1,2,3,4, ...]`` and ``cx`` is the same as
 ``c[...,-4,-3,-2,-1,0,+1,+2,+3,+4,...]``. Lists must be enclosed is square brackets,
@@ -169,6 +174,12 @@ The allowed element types, radicals, and bonds are listed in the following table
  |                      | T        | Triple bond         |
  |                      +----------+---------------------+
  |                      | B        | Benzene bond        |
+ |                      +--------------------------------+
+ |                      | vdW      | Van der Waals bond  |
+ |                      +--------------------------------+
+ |                      | H        | Hydrogen bond       |
+ |                      +--------------------------------+
+ |                      | R        | Reaction bond       |
  +----------------------+----------+---------------------+
 
 

diff --git a/rmgpy/molecule/adjlist.py b/rmgpy/molecule/adjlist.py
@@ -438,8 +438,10 @@ def from_old_adjacency_list(adjlist, group=False, saturate_h=False):
     except InvalidAdjacencyListError:
         logging.error("Troublesome adjacency list:\n" + adjlist)
         raise
-
-    return atoms, multiplicity
+    if group:
+        return atoms, multiplicity, [], []
+    else:
+        return atoms, multiplicity, '', ''
 
 
 ###############################
@@ -460,7 +462,7 @@ def from_old_adjacency_list(adjlist, group=False, saturate_h=False):
                             r'\s*$')  # the end!
 
 
-def from_adjacency_list(adjlist, group=False, saturate_h=False):
+def from_adjacency_list(adjlist, group=False, saturate_h=False, check_consistency=True):
     """
     Convert a string adjacency list `adjlist` into a set of :class:`Atom` and
     :class:`Bond` objects.
@@ -524,11 +526,17 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
             multiplicity = int(line.split()[1])
         if len(lines) == 0:
             raise InvalidAdjacencyListError('No atoms specified in adjacency list: \n{0}'.format(adjlist))
-
+        
     mistake1 = re.compile(r'\{[^}]*\s+[^}]*\}')
+    if group:
+        metal = []
+        facet = []
+    else:
+        metal = ''
+        facet = ''
     # Iterate over the remaining lines, generating Atom or GroupAtom objects
     for line in lines:
-
+            
         # Sometimes people put spaces after commas, which messes up the
         # parse-by-whitespace. Examples include '[Cd, Ct]'.
         if mistake1.search(line):
@@ -545,7 +553,49 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
         # Skip if blank line
         if len(data) == 0:
             continue
-
+
+        if line.split()[0] == 'metal':
+            if group:
+                match = re.match(r'\s*metal\s+\[\s*[\w,\s]+\s*\]\s*$', line)
+                if not match:
+                    rematch = re.match(r'\s*metal\s+x\s*$', line)
+                    if not rematch:
+                        raise InvalidAdjacencyListError("Invalid metal line '{0}'. Should be a list like "
+                                                        "'metal [Cu,Fe,Ag]' or a wildcard 'metal x'".format(line))
+                else:
+                    out = line.split('[')[1][:-1]
+                    metal = [x.strip() for x in out.split(',') if x.strip() != '']
+
+            else:
+                match = re.match(r'\s*metal\s+\w+\s*$', line)
+                if not match:
+                    raise InvalidAdjacencyListError("Invalid metal line '{0}'. Should be a string like "
+                                                "'metal Fe'".format(line))
+                metal = line.split()[1].strip()
+
+            continue
+
+        if line.split()[0] == 'facet':
+            if group:
+                match = re.match(r'\s*facet\s+\[\s*[\w,\s]+\s*\]\s*$', line)
+                if not match:
+                    rematch = re.match(r'\s*facet\s+x\s*$', line)
+                    if not rematch:
+                        raise InvalidAdjacencyListError("Invalid facet line '{0}'. Should be a list like "
+                                                        "'facet [111,211,110]' or a wildcard 'facet x'".format(line))
+                else:
+                    out = line.split('[')[1][:-1]
+                    facet = [x.strip() for x in out.split(',') if x.strip() != '']
+
+            else:
+                match = re.match(r'\s*facet\s+\w+\s*$', line)
+                if not match:
+                    raise InvalidAdjacencyListError("Invalid facet line '{0}'. Should be a string like "
+                                                "'facet 111'".format(line))
+                facet = line.split()[1].strip()
+
+            continue
+
         # First item is index for atom
         # Sometimes these have a trailing period (as if in a numbered list),
         # so remove it just in case
@@ -677,6 +727,32 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
             if not group:
                 partial_charges.append(0)
 
+        # Next the sites (if provided)
+        sites = []
+        if len(data) > index:
+            s_state = data[index]
+            if s_state[0] == 's':
+                if s_state[1] == '[':
+                    s_state = s_state[2:-1].split(',')
+                else:
+                    s_state = [s_state[1:]]
+                for s in s_state:
+                    sites.append(s[1:-1])
+                index += 1
+
+        # Next the morphologys (if provided)
+        morphologies = []
+        if len(data) > index:
+            m_state = data[index]
+            if m_state[0] == 'm':
+                if m_state[1] == '[':
+                    m_state = m_state[2:-1].split(',')
+                else:
+                    m_state = [m_state[1:]]
+                for m in m_state:
+                    morphologies.append(m[1:-1])
+                index += 1
+
         # Next the isotope (if provided)
         isotope = -1
         if len(data) > index:
@@ -695,12 +771,20 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
 
         # Create a new atom based on the above information
         if group:
-            atom = GroupAtom(atom_type, unpaired_electrons, partial_charges, label, lone_pairs, props)
+            atom = GroupAtom(atom_type, unpaired_electrons, partial_charges, label, lone_pairs, sites, morphologies, props)
         else:
             # detect if this is cutting label or atom
             _ , cutting_label_list = Fragment().detect_cutting_label(atom_type[0])
             if cutting_label_list == []:
-                atom = Atom(atom_type[0], unpaired_electrons[0], partial_charges[0], label, lone_pairs[0])
+                if sites == []:
+                    site = ''
+                else:
+                    site = sites[0]
+                if morphologies == []:
+                    morphology = ''
+                else:
+                    morphology = morphologies[0]
+                atom = Atom(atom_type[0], unpaired_electrons[0], partial_charges[0], label, lone_pairs[0], site, morphology)
                 if isotope != -1:
                     atom.element = get_element(atom.number, isotope)
             else:
@@ -772,7 +856,7 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
             Saturator.saturate(atoms)
 
     # Consistency checks
-    if not group:
+    if not group and check_consistency:
         # Molecule consistency check
         # Electron and valency consistency check for each atom
         for atom in atoms:
@@ -787,13 +871,19 @@ def from_adjacency_list(adjlist, group=False, saturate_h=False):
         ConsistencyChecker.check_multiplicity(n_rad, multiplicity)
         for atom in atoms:
             ConsistencyChecker.check_hund_rule(atom, multiplicity)
-        return atoms, multiplicity
+        return atoms, multiplicity, metal, facet
     else:
         # Currently no group consistency check
-        return atoms, multiplicity
+        if not group:
+            if multiplicity is None:
+                n_rad = sum([atom.radical_electrons for atom in atoms])
+                multiplicity = n_rad + 1
+
+        return atoms, multiplicity, metal, facet
+
 
 
-def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=False, remove_lone_pairs=False,
+def to_adjacency_list(atoms, multiplicity, metal='', facet='', label=None, group=False, remove_h=False, remove_lone_pairs=False,
                       old_style=False):
     """
     Convert a chemical graph defined by a list of `atoms` into a string
@@ -821,10 +911,18 @@ def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=Fal
             # Functional group should have a list of possible multiplicities.  
             # If the list is empty, then it does not need to be written
             adjlist += 'multiplicity [{0!s}]\n'.format(','.join(str(i) for i in multiplicity))
+        if metal:
+            adjlist += 'metal [{0!s}]\n'.format(','.join(i for i in metal))
+        if facet:
+            adjlist += 'facet [{0!s}]\n'.format(','.join(i for i in facet))
     else:
         assert isinstance(multiplicity, int), "Molecule should have an integer multiplicity"
         if multiplicity != 1 or any(atom.radical_electrons for atom in atoms):
             adjlist += 'multiplicity {0!r}\n'.format(multiplicity)
+        if metal:
+            adjlist += f"metal {metal}\n"
+        if facet:
+            adjlist += f"facet {facet}\n"
 
     # Determine the numbers to use for each atom
     atom_numbers = {}
@@ -843,6 +941,8 @@ def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=Fal
     atom_charge = {}
     atom_isotope = {}
     atom_props = {}
+    atom_site = {}
+    atom_morphology = {}
     if group:
         for atom in atom_numbers:
             # Atom type(s)
@@ -874,6 +974,22 @@ def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=Fal
             else:
                 atom_charge[atom] = '[{0}]'.format(','.join(['+'+str(charge) if charge > 0 else ''+str(charge) for charge in atom.charge]))
 
+            # Sites
+            if len(atom.site) == 1:
+                atom_site[atom] = "\"" + atom.site[0] + "\""
+            elif len(atom.site) == 0:
+                atom_site[atom] = None  # Empty list indicates wildcard
+            else:
+                atom_site[atom] = '["{0}"]'.format('","'.join(s for s in atom.site))
+
+            # Morphologies
+            if len(atom.morphology) == 1:
+                atom_morphology[atom] = "\"" + atom.morphology[0] + "\""
+            elif len(atom.morphology) == 0:
+                atom_morphology[atom] = None  # Empty list indicates wildcard
+            else:
+                atom_morphology[atom] = '["{0}"]'.format('","'.join(s for s in atom.morphology))
+
             # Isotopes
             atom_isotope[atom] = -1
 
@@ -892,6 +1008,16 @@ def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=Fal
             atom_lone_pairs[atom] = str(atom.lone_pairs)
             # Partial Charge(s)
             atom_charge[atom] = '+' + str(atom.charge) if atom.charge > 0 else '' + str(atom.charge)
+            # Sites
+            if atom.site:
+                atom_site[atom] = "\"" + atom.site + "\""
+            else:
+                atom_site[atom] = None
+            # Morphology
+            if atom.morphology:
+                atom_morphology[atom] = "\"" + atom.morphology + "\""
+            else:
+                atom_morphology[atom] = None
             # Isotopes
             if isinstance(atom, Atom):
                 atom_isotope[atom] = atom.element.isotope
@@ -926,6 +1052,12 @@ def to_adjacency_list(atoms, multiplicity, label=None, group=False, remove_h=Fal
         # Partial charges
         if atom_charge[atom] is not None:
             adjlist += ' c{0}'.format(atom_charge[atom])
+        # Sites
+        if atom_site[atom]:
+            adjlist += ' s{0}'.format(atom_site[atom])
+        # Morphologies
+        if atom_morphology[atom]:
+            adjlist += ' m{0}'.format(atom_morphology[atom])
         # Isotopes
         if atom_isotope[atom] != -1:
             adjlist += ' i{0}'.format(atom_isotope[atom])

diff --git a/rmgpy/molecule/adjlistTest.py b/rmgpy/molecule/adjlistTest.py
@@ -171,6 +171,58 @@ def test_atom_props(self):
         adjlist2 = group.to_adjacency_list()
 
         self.assertEqual(adjlist.strip(), adjlist2.strip())
+
+    def test_metal_facet_site_morphology(self):
+        adjlist1 = """metal Cu
+facet 111
+1 *3 X u0 p0 c0 s"atop" m"terrace"  {2,S} {4,S}
+2 *1 O u0 p2 c0 {1,S} {3,R}
+3 *2 H u0 p0 c0 {2,R} {4,R}
+4 *4 X u0 p0 c0 s"hcp" m"terrace" {3,R} {1,S}"""
+
+        adjlist2 = """multiplicity [1]
+metal [Cu, Fe, CuO2 ]
+facet [111, 211, 1101, 110, ]
+1 *3 X u0 p0 c0 s["atop","fcc"] m"terrace"  {2,S} {4,S}
+2 *1 O u0 p2 c0 {1,S} {3,R}
+3 *2 H u0 p0 c0 {2,R} {4,R}
+4 *4 X u0 p0 c0 s"hcp" m["terrace","sc"] {3,R} {1,S}"""
+
+        adjlist1test = """metal Cu
+facet 111
+1 *3 X u0 p0 c0 s"atop" m"terrace" {2,S} {4,S}
+2 *1 O u0 p2 c0 {1,S} {3,R}
+3 *2 H u0 p0 c0 {2,R} {4,R}
+4 *4 X u0 p0 c0 s"hcp" m"terrace" {1,S} {3,R}"""
+
+        adjlist2test = """multiplicity [1]
+metal [Cu,Fe,CuO2]
+facet [111,211,1101,110]
+1 *3 X u0 p0 c0 s["atop","fcc"] m"terrace" {2,S} {4,S}
+2 *1 O u0 p2 c0 {1,S} {3,R}
+3 *2 H u0 p0 c0 {2,R} {4,R}
+4 *4 X u0 p0 c0 s"hcp" m["terrace","sc"] {1,S} {3,R}"""
+        mol = Molecule().from_adjacency_list(adjlist1,check_consistency=False)
+        group = Group().from_adjacency_list(adjlist2)
+
+        self.assertEqual(mol.metal,"Cu")
+        self.assertEqual(mol.facet,"111")
+        self.assertEqual(group.metal,["Cu","Fe","CuO2"])
+        self.assertEqual(group.facet,["111","211","1101","110"])
+
+        self.assertEqual(mol.atoms[0].site,"atop")
+        self.assertEqual(mol.atoms[3].site,"hcp")
+        self.assertEqual(mol.atoms[0].morphology, "terrace")
+        self.assertEqual(mol.atoms[3].morphology, "terrace")
+
+        self.assertEqual(group.atoms[0].site,["atop","fcc"])
+        self.assertEqual(group.atoms[3].site,["hcp"])
+        self.assertEqual(group.atoms[0].morphology, ["terrace"])
+        self.assertEqual(group.atoms[3].morphology,["terrace","sc"])
+
+        self.assertEqual(mol.to_adjacency_list().strip(),adjlist1test.strip())
+        self.assertEqual(group.to_adjacency_list().strip(), adjlist2test.strip())
+
 
 
 class TestMoleculeAdjLists(unittest.TestCase):