Skip to content

Commit

Permalink
Support for protein-protein complex (#57)
Browse files Browse the repository at this point in the history
* feat: support protein-protein complex calculation

* version 0.1.7
  • Loading branch information
Aunity authored Oct 16, 2024
1 parent 207a777 commit 6543335
Show file tree
Hide file tree
Showing 13 changed files with 1,177 additions and 29 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,8 @@ dmypy.json

# Pyre type checker
.pyre/


# examples
example/3f/#*
example/3f/complex_reres.pdb
4 changes: 4 additions & 0 deletions ChangeLog.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Changes in Release 0.1.7
--------------------------------------------------------------------------------
* Uni-GBSA now support calculation for protein-protein complexes
* Fix some code styles.
494 changes: 494 additions & 0 deletions example/protein-protein/1vf6_LIG.pdb

Large diffs are not rendered by default.

477 changes: 477 additions & 0 deletions example/protein-protein/1vf6_REC.pdb

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions example/protein-protein/default.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; parameters for simulation
[simulation]
; input pose process method:
; input - just use input pose to calculation
; em - run a simple energy minimizaion for the input poses
; md - run a md simulation for the input poses
mode = em

; simulation box type: triclinic, cubic, dodecahedron, octahedron
boxtype = triclinic

; Distance between the solute and the simulation box
boxsize = 0.9

; Specify salt concentration (mol/liter). This will add sufficient ions to reach up to the specified concentration
conc = 0.15

; number of md simulation steps
nsteps = 500

; number of equilibrium simulation(nvt, npt) steps
eqsteps = 50

; number of structure to save for the md simulation
nframe = 10

; protein forcefield (gromacs engine)
proteinforcefield = amber03

; ligand forcefield (acpype engine): gaff, gaff2
ligandforcefield = gaff
; ligand charge method: bcc, gas
; eem Assign Electronegativity Equilization Method (EEM) atomic partial charges. Bultinck B3LYP/6-31G*/MPA
; eem2015ba Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf B3LYP/6-311G/AIM
; eem2015bm Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf B3LYP/6-311G/MPA
; eem2015bn Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf B3LYP/6-311G/NPA
; eem2015ha Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf HF/6-311G/AIM
; eem2015hm Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf HF/6-311G/MPA
; eem2015hn Assign Electronegativity Equilization Method (EEM) atomic partial charges. Cheminf HF/6-311G/NPA
; gas/gasteiger Assign Gasteiger-Marsili sigma partial charges
; mmff94 Assign MMFF94 partial charges
; qeq Assign QEq (charge equilibration) partial charges (Rappe and Goddard, 1991)
; qtpie Assign QTPIE (charge transfer, polarization and equilibration) partial charges (Chen and Martinez, 2007)
; bcc Assign Am1-BCC atomic partial charge.
ligandCharge = bcc


; parameters for PBSA/GBSA calculation, support all the gmx_MMPBSA parameters
[GBSA]
; calculation name
sys_name = GBSA

; calculation mode, Separated by commas. gb,pb,decomposition
modes = gb

; best parameters for PBSA/GBSA calculations obtained from Wang, Ercheng, et al. Chemical reviews 119.16 (2019): 9478-9508.
igb = 2
indi = 4.0
exdi = 80.0
10 changes: 10 additions & 0 deletions example/protein-protein/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
unigbsa-pipeline -i 1vf6_REC.pdb -l 1vf6_LIG.pdb -c default.ini
# 10/16/2024 16:32:16 PM - INFO - Build protein topology.
# 10/16/2024 16:32:17 PM - INFO - Build ligand topology: 1vf6_LIG
# 10/16/2024 16:32:18 PM - INFO - Running energy minimization: 1vf6_LIG
# 10/16/2024 16:32:20 PM - INFO - Run the MMPB(GB)SA.
# 10/16/2024 16:32:28 PM - INFO - Clean the results.
# ================================================================================
# Results: Energy.csv Dec.csv
# Frames mode delta_G(kcal/mole)
# 1 gb -57.3575
8 changes: 7 additions & 1 deletion tests/test_pipline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@

TEST_EM_CONFIG = os.path.dirname(os.path.abspath(__file__)) + '/config/em.ini'
TEST_MD_CONFIG = os.path.dirname(os.path.abspath(__file__)) + '/config/md.ini'


class TestPipline(unittest.TestCase):
def setUpClass():
warnings.simplefilter('ignore', ResourceWarning)

def base(self, pdbfile, ligandfile):
pdbfile = os.path.abspath(pdbfile)
ligandfile = os.path.abspath(ligandfile)
Expand Down Expand Up @@ -65,6 +67,10 @@ def test_md(self):
ligandfiles = ['../example/1ceb/1ceb_ligand.sdf']
self.pipeline_md(pdbfile, ligandfiles)

def test_protein_protein(self):
pdbfile = '../example/protein-protein/1vf6_REC.pdb'
ligandfiles = ['../example/protein-protein/1vf6_LIG.pdb']
self.pipeline_minima(pdbfile, ligandfiles)

if __name__ == "__main__":
unittest.main()
29 changes: 16 additions & 13 deletions unigbsa/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def traj_pipeline(complexfile, trajfile, topolfile, indexfile, pbsaParas=None, m
print('%6d %4s %18.4f '%(irow['Frames'], irow['mode'], irow['TOTAL']))
return delta_G


def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfile='BindingEnergy.csv', validate=False, verbose=False):
"""
This function takes a receptorfile and ligandfile, and build a complex.pdb and complex.top file
Expand All @@ -68,7 +69,7 @@ def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfi
receptorfile = os.path.abspath(receptorfile)
logging.info('Build protein topology.')
receptor = build_protein(receptorfile, forcefield=simParas['proteinforcefield'])

cwd = os.getcwd()
df = None
ligandnames = []
Expand All @@ -88,15 +89,16 @@ def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfi
topfile = 'complex.top'
logging.info('Build ligand topology: %s'%ligandName)
try:
build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile, ligandforce=simParas['ligandforcefield'], charge_method=simParas['ligandCharge'], nt=nt)
indexfile = build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile, ligandforce=simParas['ligandforcefield'], charge_method=simParas['ligandCharge'], nt=nt)
except Exception as e:
if len(ligandfiles)==1:
traceback.print_exc()
statu = 'F_top'
dl = d
logging.warning('Failed to generate forcefield for ligand: %s'%ligandName)

indexfile = generate_index_file(grofile)
if not os.path.exists(indexfile):
indexfile = generate_index_file(grofile)

if statu == 'S':
try:
Expand Down Expand Up @@ -136,9 +138,9 @@ def single(arg):
if len(ligandfiles) == 1:
logging.info('Build ligand topology: %s' % ligandName)
try:
build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile,
ligandforce=simParas['ligandforcefield'],
charge_method=simParas['ligandCharge'], nt=nt)
indexfile = build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile,
ligandforce=simParas['ligandforcefield'],
charge_method=simParas['ligandCharge'], nt=nt)
except Exception as e:
statu = 'F_top'
if len(ligandfiles) == 1:
Expand Down Expand Up @@ -169,7 +171,8 @@ def single(arg):
statu = 'F_md'
if statu == 'S':
try:
indexfile = generate_index_file(grofile)
if not os.path.exists(indexfile):
indexfile = generate_index_file(grofile)
d1 = traj_pipeline(grofile, trajfile=grofile, topolfile=topfile, indexfile=indexfile, pbsaParas=pbsaParas, mmpbsafile=mmpbsafile, verbose=verbose, nt=nt, input_pdb=receptorfile)
except:
if len(ligandfiles) == 1:
Expand Down Expand Up @@ -247,11 +250,10 @@ def md_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outfile
topfile = 'complex.top'
xtcfile = 'traj_com.xtc'
logging.info('Build ligand topology: %s'%ligandName)
build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile, ligandforce=simParas['ligandforcefield'], charge_method=simParas['ligandCharge'], nt=nt)
indexfile = build_topol(receptor, ligandfile, outpdb=grofile, outtop=topfile, ligandforce=simParas['ligandforcefield'], charge_method=simParas['ligandCharge'], nt=nt)

logging.info('Running simulation: %s'%ligandName)
engine = GMXEngine()

mdgro, mdxtc, outtop = engine.run_to_md(grofile, topfile, boxtype=simParas['boxtype'], boxsize=simParas['boxsize'], conc=simParas['conc'], nsteps=simParas['nsteps'], nframe=simParas['nframe'], eqsteps=simParas['eqsteps'], nt=nt)

cmd = '%s editconf -f %s -o %s -resnr 1 >/dev/null 2>&1'%(GMXEXE, mdgro, grofile)
Expand All @@ -263,7 +265,8 @@ def md_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outfile
shutil.copy(mdxtc, xtcfile)

#logging.info('Running GBSA: %s'%ligandName)
indexfile = generate_index_file(grofile)
if not os.path.exists(indexfile):
indexfile = generate_index_file(grofile)
if 'startframe' not in pbsaParas:
pbsaParas["startframe"] = 2
deltaG = traj_pipeline(grofile, trajfile=xtcfile, topolfile=topfile, indexfile=indexfile, pbsaParas=pbsaParas, mmpbsafile=mmpbsafile, nt=nt, verbose=verbose, input_pdb=receptorfile)
Expand All @@ -284,7 +287,7 @@ def md_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outfile
def main(args=None):
parser = argparse.ArgumentParser(description='MM/GB(PB)SA Calculation. Version: %s'%__version__)
parser.add_argument('-i', dest='receptor', help='Input protein file in pdb format.', required=True)
parser.add_argument('-l', dest='ligand', help='Ligand files to calculate binding energy for.', nargs='+', default=None)
parser.add_argument('-l', dest='ligand', help='Ligand files to calculate binding energy. For small molecular, please use format of sdf or mol, for protein ligand, please use format of pdb.', nargs='+', default=None)
parser.add_argument('-c', dest='config', help='Config file, default: %s'%DEFAULT_CONFIGURE_FILE, default=DEFAULT_CONFIGURE_FILE)
parser.add_argument('-d', dest='ligdir', help='Directory containing many ligand files. file format: .mol or .sdf', default=None)
parser.add_argument('-f', dest='pbsafile', help='gmx_MMPBSA input file. default=None', default=None)
Expand All @@ -297,12 +300,12 @@ def main(args=None):

args = parser.parse_args(args)
receptor, ligands, conf, ligdir, outfile, decomposition, nt, verbose = args.receptor, args.ligand, args.config, args.ligdir, args.outfile, args.decomp, args.threads, args.verbose

if ligands is None:
ligands = []
if ligdir:
for fileName in os.listdir(ligdir):
if fileName.endswith(('mol','sdf')):
if fileName.endswith(('mol', 'sdf')):
ligands.append(os.path.join(ligdir, fileName))
if len(ligands)==0:
raise Exception('No ligand files found.')
Expand Down
7 changes: 3 additions & 4 deletions unigbsa/simulation/mdrun.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import shutil
from unigbsa.settings import GMXEXE, MDPFILESDIR, OMP_NUM_THREADS
from unigbsa.utils import generate_index_file, process_pbc
from unigbsa.utils import generate_index_file_from_topol, process_pbc
from unigbsa.simulation.utils import write_position_restrain

class BaseObject(object):
Expand Down Expand Up @@ -447,21 +447,20 @@ def run_to_md(self, pdbfile, topfile, rundir=None, boxtype='triclinic', boxsize=
cwd = os.getcwd()
os.chdir(rundir)
mdgro, mdxtc = self.gmx_md(nptpdb, topfile, mdpfile=os.path.join(MDPFILESDIR, 'md.mdp'), nsteps=nsteps, nframe=nframe, nt=nt)
indexfile = generate_index_file(mdgro, pbc=True)
indexfile = generate_index_file_from_topol(topfile)
mdxtcpbc = process_pbc(mdxtc, 'md.tpr', indexfile=indexfile, logfile=self.gmxlog)
mdgropbc = process_pbc(mdgro, 'md.tpr', indexfile=indexfile, logfile=self.gmxlog)
mdgro, mdxtc, topfile = os.path.abspath(mdgropbc), os.path.abspath(mdxtcpbc), os.path.abspath(topfile)
os.chdir(cwd)

return mdgro, mdxtc, topfile


def clean(self, pdbfile=None, rundir=None):
"""
If you pass a directory
to the function, it will delete the directory. If you don't pass a directory,
it will delete the file
Args:
pdbfile: the name of the PDB file to be cleaned
rundir: the directory where the simulation will be run.
Expand Down
24 changes: 15 additions & 9 deletions unigbsa/simulation/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from unigbsa.simulation.mdrun import GMXEngine
from unigbsa.simulation.utils import convert_format, guess_filetype, fix_insertions
from unigbsa.simulation.utils import assign_partial_charge, write_position_restrain
from unigbsa.simulation.utils import obtain_net_charge
from unigbsa.simulation.utils import obtain_net_charge, gen_index_for_gbsa

def build_lignad(ligandfile, forcefield="gaff2", charge_method="bcc", engine="acpype", verbose=False, outtop=None, outcoord=None, molname='MOL', itpfile=None, sqm_opt=True, nt=1):
"""
Expand All @@ -31,15 +31,15 @@ def build_lignad(ligandfile, forcefield="gaff2", charge_method="bcc", engine="ac
ligandfile = os.path.abspath(ligandfile)
ligandName = os.path.split(ligandfile)[-1][:-4]+'.TOP'
filetype = guess_filetype(ligandfile)
acceptFileTypes = ('pdb', 'mol2', 'mol')
acceptFileTypes = ('mol2', 'mol')

acpype_charge_methods = ['gas', 'bcc']
if charge_method not in acpype_charge_methods:
ligandfile = assign_partial_charge(ligandfile, filetype, charge_method=charge_method)
charge_method = 'user'
elif filetype != 'mol':
ligandfile = convert_format(ligandfile, filetype)
if not os.path.exists(ligandName):
if not os.path.exists(ligandName):
os.mkdir(ligandName)
charge = obtain_net_charge(ligandfile)
cwd = os.getcwd()
Expand Down Expand Up @@ -145,12 +145,12 @@ def build_protein_tleap(pdbfile, forcefield='', outtop=None, outcoor=None):
def build_protein(pdbfile, forcefield='amber99sb-ildn', outtop=None, outcoord=None):
"""
Build a protein topology and coordinate file from a PDB file
Args:
pdbfile: The name of the PDB file to be used as input.
forcefield: The forcefield to use. Options are 'amber03', 'amber94', 'amber96', 'amber99',
'amber99sb', 'amber99sb-ildn', 'amber99sb-star', 'amber14sb'. Defaults to amber99sb-ildn
Returns:
a tuple of two objects: a topology object and a Structure object.
"""
Expand All @@ -177,7 +177,7 @@ def build_protein(pdbfile, forcefield='amber99sb-ildn', outtop=None, outcoord=No
print(cmd)
os.system('tail -n 50 gromacs.log')
raise Exception('ERROR run gmx! see the log file for details %s'%os.path.abspath("gromacs.log"))

engine = GMXEngine()
boxpdb = engine.gmx_box('1-pdb2gmx.gro', boxtype='triclinic', boxsize=0.9)
#solpdb = engine.gmx_solvate(boxpdb, 'topol.top', maxsol=5)
Expand Down Expand Up @@ -214,9 +214,13 @@ def build_topol(receptor, ligand, outpdb, outtop, proteinforce='amber99sb-ildn',
prottop, protgro = build_protein(receptor, forcefield=proteinforce)
else:
prottop, protgro = receptor

indexfile = 'index.ndx'
if isinstance(ligand, str):
moltop, molgro = build_lignad(ligand, forcefield=ligandforce, charge_method=charge_method, nt=nt, verbose=verbose)
if ligand.endswith('.pdb'):
moltop, molgro = build_protein(ligand, forcefield=proteinforce)
gen_index_for_gbsa(prottop, moltop, indexfile)
else:
moltop, molgro = build_lignad(ligand, forcefield=ligandforce, charge_method=charge_method, nt=nt, verbose=verbose)
elif ligand:
moltop, molgro = ligand

Expand All @@ -225,6 +229,7 @@ def build_topol(receptor, ligand, outpdb, outtop, proteinforce='amber99sb-ildn',
else:
systop = moltop + prottop
sysgro = molgro + protgro

systop.write(outtop)
sysgro.write_pdb(outpdb)
newlines = []
Expand Down Expand Up @@ -260,8 +265,9 @@ def build_topol(receptor, ligand, outpdb, outtop, proteinforce='amber99sb-ildn',
for k,v in atomtypes.items():
if k not in mtypes:
fw.write(v+'\n')

write_position_restrain(outtop)
return indexfile

def main():
pdbfile, ligandfile = sys.argv[1], sys.argv[2]
Expand Down
23 changes: 23 additions & 0 deletions unigbsa/simulation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,3 +496,26 @@ def ligand_validate(sdfile, outfile=None):
return outfile
else:
return sdfile


def gen_index_for_gbsa(rec, lig, outfile='index.ndx'):
lig_atoms, rec_atoms = 0, 0
for key, (mol, _) in rec.molecules.items():
if key.startswith(('MOL', 'protein', 'system', 'Protein')):
rec_atoms += len(mol.atoms)
for key, (mol, _) in lig.molecules.items():
if key.startswith(('MOL', 'protein', 'system', 'Protein')):
lig_atoms += len(mol.atoms)

atom_dict = {
'System': (1, lig_atoms+rec_atoms+1),
'ligand': (1, lig_atoms+1),
'receptor': (lig_atoms+1, rec_atoms+lig_atoms+1)
}
with open(outfile, 'w') as fw:
for k, (s, e) in atom_dict.items():
fw.write(f'\n[ {k} ]\n')
for i, ndx in enumerate(range(s, e)):
fw.write(str(ndx) + ' ')
if i != 0 and i % 10 == 0:
fw.write('\n')
Loading

0 comments on commit 6543335

Please sign in to comment.