Skip to content

Commit

Permalink
remove dublicates if any from the optim process
Browse files Browse the repository at this point in the history
  • Loading branch information
tigranfah committed May 17, 2024
1 parent cff4db7 commit efd8106
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
5 changes: 2 additions & 3 deletions chemlactica/mol_opt/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def optimize(
current_optim_entries = []
with multiprocessing.Pool(processes=config["num_processes"]) as pol:
for i, entry in enumerate(pol.map(create_molecule_entry, output_texts)):
if entry:
if entry and not optim_entries[i].contains_entry(entry):
current_mol_entries.append(entry)
current_optim_entries.append(optim_entries[i])

Expand All @@ -130,7 +130,6 @@ def optimize(
tol_level = 0
if oracle.finish or len(iter_optim_entries) >= config["num_gens_per_iter"]:
break


if oracle.finish:
break
Expand All @@ -152,7 +151,7 @@ def optimize(
# round_entries = list(np.unique(round_entries))[::-1]
# top_k = int(len(all_entries) * config["rej_sample_config"]["rej_perc"])
# if top_k >= config["rej_sample_config"]["num_samples_per_round"]:
if num_iter % 3 == 0 and num_iter > initial_num_iter:
if num_iter % 5 == 0 and num_iter > initial_num_iter:
training_entries = pool.optim_entries
print(f"Num of train examples {len(training_entries)}.")
file.write("Training entries\n")
Expand Down
8 changes: 7 additions & 1 deletion chemlactica/mol_opt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,10 @@ def to_prompt(self, is_generation, config):
else:
prompt += f"[START_SMILES]{self.last_entry.smiles}[END_SMILES]"

return prompt
return prompt

def contains_entry(self, mol_entry: MoleculeEntry):
for entry in self.mol_entries:
if mol_entry == entry:
return True
return False

0 comments on commit efd8106

Please sign in to comment.