Skip to content

Commit

Permalink
Allow for fast generation of datasets using all-to-all options (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
bieniekmateusz committed Jul 4, 2024
1 parent 1b4ba87 commit 6435c9e
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
13 changes: 10 additions & 3 deletions fegrow/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ def add_scaffold(self, template, atom_replacement_index=None):

self._scaffolds.append(template)

def add_rgroups(self, rgroups_linkers, rgroups2=None):
def add_rgroups(self, rgroups_linkers, rgroups2=None, alltoall=False):
"""
Note that if they are Smiles:
- if they have an * atom (e.g. RDKit atom.SetAtomicNum(0)), this will be used for attachment to the scaffold
Expand All @@ -828,11 +828,18 @@ def add_rgroups(self, rgroups_linkers, rgroups2=None):
delayed_build_molecule = dask.delayed(build_molecule)

jobs = [delayed_build_molecule(scaffold, linker) for linker in rgroups_linkers]
# if more rgroups were attached
if rgroups2 is not None:

# if linkers and rgroups are attached, add them in two iterations
if rgroups2 is not None and not alltoall:
# for each attached linker, attach an rgroup with the same position
jobs = [delayed_build_molecule(scaffold_linked, rgroup)
for rgroup, scaffold_linked in
itertools.zip_longest(rgroups2, jobs, fillvalue=jobs[0])]
elif rgroups2 is not None and alltoall:
jobs = [delayed_build_molecule(scaffold_linked, rgroup)
for rgroup, scaffold_linked in
itertools.product(rgroups2, jobs)]

results = self.dask_client.compute(jobs)
built_mols = [r.result() for r in results]

Expand Down
21 changes: 21 additions & 0 deletions fegrow/testing/test_chemspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,27 @@ def test_pipeline_2linkers_2rgroups(sars_scaffold_chunk_sdf):
assert df.loc[1].Mol.HasSubstructMatch(Chem.MolFromSmiles('OC'))


def test_pipeline_2linkers_2rgroups_alltoall():
chemspace = ChemSpace()

link_nc = Chem.MolFromSmiles("[*:0]N=C[*:1]")
link_s = Chem.MolFromSmiles("[*:0]S[*:1]")

r_methanol = Chem.MolFromSmiles("*CO")
r_bromium = Chem.MolFromSmiles("*Br")

scaffold = Chem.MolFromSmiles("F*")
chemspace.add_scaffold(scaffold)

chemspace.add_rgroups([link_nc, link_s],
[r_methanol, r_bromium],
alltoall=True)

df = chemspace.df
assert len(df) == 4
assert set(df.Smiles.to_list()) == {'FN=CBr', 'FSBr', 'OCC=NF', 'OCSF'}


def test_pipeline_1linker_1rgroup_check_h_attachment(sars_scaffold_chunk_sdf):
"""
During multiple mergings, we want to make sure that
Expand Down

0 comments on commit 6435c9e

Please sign in to comment.