Skip to content

Commit

Permalink
add check for time interval
Browse files Browse the repository at this point in the history
  • Loading branch information
kierandidi committed Mar 26, 2024
1 parent 6b1912c commit 878fa96
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
4 changes: 2 additions & 2 deletions proteinworkshop/config/dataset/pdb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ datamodule:

pdb_dataset:
_target_: "proteinworkshop.datasets.pdb_dataset.PDBData"
fraction: 1.0 # Fraction of dataset to use
fraction: 0.01 # Fraction of dataset to use
molecule_type: "protein" # Type of molecule for which to select
experiment_types: ["diffraction", "NMR", "EM", "other"] # All experiment types
max_length: 1000 # Exclude polypeptides greater than length 1000
max_length: 150 # Exclude polypeptides greater than length 1000
min_length: 10 # Exclude peptides of length 10
oligomeric_min: 1 # Include only monomeric proteins
oligomeric_max: 5 # Include up to 5-meric proteins
Expand Down
8 changes: 7 additions & 1 deletion proteinworkshop/datasets/pdb_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,13 @@ def __init__(
self.split_type = split_type
self.split_sequence_similarity = split_sequence_similiarity
self.overwrite_sequence_clusters = overwrite_sequence_clusters
self.split_time_frames = [np.datetime64(date) for date in split_time_frames]
if split_time_frames is None:
self.split_time_frames = split_time_frames
else:
try:
self.split_time_frames = [np.datetime64(date) for date in split_time_frames]
except:
raise TypeError(f"{split_time_frames} does not contain valid dates for np.datetime64 format")
self.splits = ["train", "val", "test"]

def create_dataset(self):
Expand Down

0 comments on commit 878fa96

Please sign in to comment.