Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HETATM record type written out #2880

Merged
merged 19 commits into from
Aug 14, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ Chronological list of authors
- Andrea Rizzi
- William Glass
- Marcello Sega
- Mieczyslaw Torchala

External code
-------------
Expand Down
5 changes: 4 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ The rules for this file:

------------------------------------------------------------------------------
??/??/?? tylerjereddy, richardjgowers, IAlibay, hmacdope, orbeckst, cbouy,
lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney
lilyminium, daveminh, jbarnoud, yuxuanzhuang, VOD555, ianmkenney,
mieczyslaw

* 2.0.0

Fixes
* Instead of using ATOM for both ATOM and HETATM, HETATM record type
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
keyword is properly written out by PDB writer (Issue #1753, PR #2880)
* Bond attribute is no longer set if PDB file contains no CONECT records
(Issue #2832)
* ResidueAttrs now have Atom as a target class by default; ICodes and
Expand Down
39 changes: 32 additions & 7 deletions package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,8 @@ class PDBWriter(base.WriterBase):
.. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
.. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
.. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT
.. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
.. _HETATM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#HETATM


Note
Expand All @@ -493,6 +495,11 @@ class PDBWriter(base.WriterBase):
are not set (:code:`None` or :code:`np.zeros(6)`,
see Issue #2698).

When the record_types attribute is present (e.g. Universe object was
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
created by loading a PDB file), ATOM_ and HETATM_ record type keywords
are written out accordingly. Otherwise, the ATOM_ record type is the
default output.

See Also
--------
This class is identical to :class:`MultiPDBWriter` with the one
Expand Down Expand Up @@ -528,6 +535,11 @@ class PDBWriter(base.WriterBase):
"{chainID:1s}{resSeq:4d}{iCode:1s}"
" {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}"
"{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"),
'HETATM': (
"HETATM{serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}"
"{chainID:1s}{resSeq:4d}{iCode:1s}"
" {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}"
"{tempFactor:6.2f} {segID:<4s}{element:>2s}\n"),
'REMARK': "REMARK {0}\n",
'COMPND': "COMPND {0}\n",
'HEADER': "HEADER {0}\n",
Expand Down Expand Up @@ -987,13 +999,19 @@ def _write_timestep(self, ts, multiframe=False):

.. versionchanged:: 0.7.6
The *multiframe* keyword was added, which completely determines if
MODEL records are written. (Previously, this was decided based on the
underlying trajectory and only if ``len(traj) > 1`` would MODEL records
have been written.)
MODEL records are written. (Previously, this was decided based on
the underlying trajectory and only if ``len(traj) > 1`` would
MODEL records have been written.)

.. versionchanged:: 1.0.0
ChainID now comes from the last character of segid, as stated in the documentation.
An indexing issue meant it previously used the first charater (Issue #2224)
ChainID now comes from the last character of segid, as stated in
the documentation. An indexing issue meant it previously used the
first charater (Issue #2224)

.. versionchanged:: 2.0.0
When only record_type attribute is present, instead of using ATOM
for both ATOM and HETATM, HETATM record types are properly written
out (Issue #1753).
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved

"""
atoms = self.obj.atoms
Expand Down Expand Up @@ -1028,6 +1046,7 @@ def get_attr(attrname, default):
occupancies = get_attr('occupancies', 1.0)
tempfactors = get_attr('tempfactors', 0.0)
atomnames = get_attr('names', 'X')
record_types = get_attr('record_types', 'ATOM')

for i, atom in enumerate(atoms):
vals = {}
Expand All @@ -1044,8 +1063,14 @@ def get_attr(attrname, default):
vals['segID'] = segids[i][:4]
vals['element'] = guess_atom_element(atomnames[i].strip())[:2]

# .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM
self.pdbfile.write(self.fmt['ATOM'].format(**vals))
# record_type attribute, if exists, can be ATOM or HETATM
try:
self.pdbfile.write(self.fmt[record_types[i]].format(**vals))
except KeyError:
errmsg = (f"Found {record_types[i]} for the record type, but "
f"only allowed types are ATOM or HETATM")
raise ValueError(errmsg) from None

if multiframe:
self.ENDMDL()
self.frames_written += 1
Expand Down
71 changes: 68 additions & 3 deletions testsuite/MDAnalysisTests/coordinates/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
import pytest
from io import StringIO
import os
from io import StringIO

import MDAnalysis as mda
import numpy as np
import pytest
from MDAnalysisTests import make_Universe
from MDAnalysisTests.coordinates.base import _SingleFrameReader
from MDAnalysisTests.coordinates.reference import (RefAdKSmall,
Expand All @@ -36,7 +36,8 @@
INC_PDB, PDB_xlserial, ALIGN, ENT,
PDB_cm, PDB_cm_gz, PDB_cm_bz2,
PDB_mc, PDB_mc_gz, PDB_mc_bz2,
PDB_CRYOEM_BOX, MMTF_NOCRYST)
PDB_CRYOEM_BOX, MMTF_NOCRYST,
PDB_HOLE, mol2_molecule)
from numpy.testing import (assert_equal,
assert_array_almost_equal,
assert_almost_equal)
Expand Down Expand Up @@ -188,6 +189,14 @@ def universe2(self):
def universe3(self):
return mda.Universe(PDB)

@pytest.fixture
def universe4(self):
return mda.Universe(PDB_HOLE, PDB_HOLE)
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved

@pytest.fixture
def universe5(self):
return mda.Universe(mol2_molecule, mol2_molecule)

@pytest.fixture(params=[
[PDB_CRYOEM_BOX, np.zeros(6)],
[MMTF_NOCRYST, None]
Expand Down Expand Up @@ -446,6 +455,62 @@ def test_stringio_outofrange(self, universe3):
with mda.coordinates.PDB.PDBWriter(outstring) as writer:
writer.write(u.atoms)

def test_hetatm_written(self, universe4, tmpdir):
"""
Checks that HETATM record types are written.
"""

u = universe4
u_atoms = u.select_atoms("resname ETA and record_type HETATM")
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
assert_equal(len(u_atoms), 8)

outfile = str(tmpdir.join('test-hetatm.pdb'))
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
u.atoms.write(outfile)
written = mda.Universe(outfile, outfile)
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
written_atoms = written.select_atoms("resname ETA and "
"record_type HETATM")

assert_equal(len(u_atoms), len(written_atoms))
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved

def test_default_atom_record_type_written(self, universe5, tmpdir):
"""
Checks that ATOM record types are written when there is no
record_type attribute.
"""

u = universe5
outfile = str(tmpdir.join('test-mol2-to-pdb.pdb'))

expected_msg = "Found no information for attr: " \
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
"'record_types' Using default value of 'ATOM'"
with pytest.warns(UserWarning, match=expected_msg):
u.atoms.write(outfile)

written = mda.Universe(outfile, outfile)
assert_equal(len(u.atoms), len(written.atoms))
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved

atms = written.select_atoms("record_type ATOM")
assert_equal(len(atms.atoms), len(u.atoms))

hetatms = written.select_atoms("record_type HETATM")
assert_equal(len(hetatms.atoms), 0)

def test_abnormal_record_type(self, universe5, tmpdir):
"""
Checks whether KeyError is raised when record type is
neither ATOM or HETATM.
"""
u = universe5
u.add_TopologyAttr('record_type', ['ABNORM']*len(u.atoms))
outfile = str(tmpdir.join('test-abnormal-record_type.pdb'))

expected_msg = "Found 'ABNORM' for record type, but allowed " \
"types are ATOM or HETATM"

with pytest.raises(ValueError):
mieczyslaw marked this conversation as resolved.
Show resolved Hide resolved
with pytest.warns(UserWarning, match=expected_msg):
u.atoms.write(outfile)


class TestMultiPDBReader(object):
@staticmethod
Expand Down