Skip to content

Commit

Permalink
Patch cath (#25)
Browse files Browse the repository at this point in the history
* add missing attribute

* update version strings

* add overwrite attr to FoldClassification

* add test to check for overwrite

* fix operator typo

* fix config interpolation error

* fix formatting

* fix paths in test

* add overwrite to EC

* add missing overwrites

* ignore atom3d

* update changelog

---------

Co-authored-by: Arian Jamasb <[email protected]>
  • Loading branch information
a-r-j and Arian Jamasb authored Aug 31, 2023
1 parent 3092377 commit 932841b
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### 0.2.3 (31/08/2023)

* Minor patch; adds missing `overwrite` attribute to `CATHDataModule`, `FoldClassificationDataModule` and `GeneOntologyDataModule`. ([#25](https://github.com/a-r-j/ProteinWorkshop/pull/25))

### 0.2.2 (30/08/2023)

* Fixes raw data download triggered by absence of PDB when using pre-processed datasets ([#24](https://github.com/a-r-j/ProteinWorkshop/pull/24))
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = "Protein Workshop"
author = "Arian R. Jamasb"
release = "0.2.2"
release = "0.2.3"
copyright = f"{datetime.datetime.now().year}, {author}"

# -- General configuration ---------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions proteinworkshop/datasets/cath.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(
self.transform = None

self.in_memory = in_memory
self.overwrite = overwrite

self.batch_size = batch_size
self.pin_memory = pin_memory
Expand Down
1 change: 1 addition & 0 deletions proteinworkshop/datasets/ec_reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
self.num_workers = num_workers
self.shuffle_labels = shuffle_labels
self.format = format
self.overwrite = overwrite

self.prepare_data_per_node = True
logger.info(
Expand Down
1 change: 1 addition & 0 deletions proteinworkshop/datasets/fold_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(
self.structure_dir = self.data_dir / "pdbstyle-1.75"

self.in_memory = in_memory
self.overwrite = overwrite

self.dataset_fraction = dataset_fraction
self.batch_size = batch_size
Expand Down
1 change: 1 addition & 0 deletions proteinworkshop/datasets/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
self.format = format

self.in_memory = in_memory
self.overwrite = overwrite

self.batch_size = batch_size
self.pin_memory = pin_memory
Expand Down
1 change: 1 addition & 0 deletions proteinworkshop/datasets/masif_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(
else:
self.transform = None

self.overwrite = overwrite
self.in_memory = in_memory
self.dataset_fraction = dataset_fraction
self.obsolete = obsolete
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "proteinworkshop"
version = "0.2.2"
version = "0.2.3"
description = ""
authors = ["Arian Jamasb <[email protected]>"]
readme = "README.md"
Expand Down
27 changes: 27 additions & 0 deletions tests/dataset/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,30 @@ def test_instantiate_datasets(tmp_path):

assert dataset, f"Dataset {t} not instantiated!"
assert isinstance(dataset, LightningDataModule)


def test_datasets_have_overwrite_attr(tmp_path):
for t in os.listdir(DATASET_CONFIG_DIR):
config_path = DATASET_CONFIG_DIR / t
cfg = omegaconf.OmegaConf.load(config_path)

if "data_dir" in cfg.datamodule:
cfg.datamodule.data_dir = tmp_path

if "path" in cfg.datamodule:
cfg.datamodule.path = tmp_path

if "pdb_dir" in cfg.datamodule:
cfg.datamodule.pdb_dir = tmp_path

if "transforms" in cfg.datamodule:
cfg.datamodule.transforms = None

if "transform" in cfg.datamodule:
cfg.datamodule.transform = None

if cfg.datamodule._target_ in {"graphein.ml.datasets.foldcomp_dataset.FoldCompLightningDataModule", "proteinworkshop.datasets.atom3d_datamodule.ATOM3DDataModule"}:
continue
else:
dm = instantiate(cfg.datamodule)
assert hasattr(dm, "overwrite"), f"Datamodules {dm} has no overwrite attribute"

0 comments on commit 932841b

Please sign in to comment.