Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(plots): plot inferred time-clonal fate potency correlation #647

Draft
wants to merge 177 commits into
base: beta
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
177 commits
Select commit Hold shift + click to select a range
4fb202b
fix(plots): use vector field basis in shared time uncertainty plot
cameronraysmith Aug 23, 2024
81aef5d
fix(utils): add array-based type class aliases
cameronraysmith Aug 23, 2024
9292cdd
fix(utils): remove unused comments
cameronraysmith Aug 23, 2024
04b8e7d
feat(utils): include size and category information in data string rep…
cameronraysmith Aug 23, 2024
ee3ae34
feat(plots): draft lineage fate correlation plot
cameronraysmith Aug 24, 2024
8fc0d94
refactor(plots): rename vector field uncertainty plot function
cameronraysmith Aug 24, 2024
4843c4b
fix(plots): enable type-checking of plot lineage fate correlation
cameronraysmith Aug 24, 2024
601e0c8
fix(plots): export lineage fate correlation plot from plots package
cameronraysmith Aug 24, 2024
86e54f0
fix(io): serialize `np.bool_` to `bool`
cameronraysmith Aug 24, 2024
3d1316c
refactor(tasks): migrate data subset functionality to io package
cameronraysmith Aug 24, 2024
0adac20
refactor(tasks): move load data function to utils
cameronraysmith Aug 24, 2024
7ede4bc
feat(utils): add function to compute file hashes
cameronraysmith Aug 25, 2024
7f438b3
fix(plots): type-check get clone trajectory function
cameronraysmith Aug 25, 2024
34cfd15
fix(plots): use pandas regex str replacement in lineage fate correlation
cameronraysmith Aug 25, 2024
b7eadc4
refactor(plots): archive unused code from get clone trajectory
cameronraysmith Aug 25, 2024
7622639
feat(styles): add module for explicit specification of colors
cameronraysmith Aug 25, 2024
155a131
fix(plots): parameterize posterior time color map
cameronraysmith Aug 25, 2024
81c5edf
nit(plots): update comments
cameronraysmith Aug 25, 2024
f45f308
fix(plots): include time and angle uncertainty
cameronraysmith Aug 25, 2024
d07e961
feat(tasks): draft time fate correlation task
cameronraysmith Aug 25, 2024
ff63d31
fix(plots): disable transparency in lineage fate correlation
cameronraysmith Aug 25, 2024
7a55e69
feat(workflows): add demo flag
cameronraysmith Aug 26, 2024
f3cf7aa
fix(plots): make posterior time titles and colorbar optional
cameronraysmith Aug 26, 2024
7683f2e
fix(plots): reorder angle uncertainty of lineage fate correlation
cameronraysmith Aug 26, 2024
3e301b7
fix(tasks): map time fate correlation over all data set configurations
cameronraysmith Aug 26, 2024
4f5d517
fix(plots): allow scaled measures as well as vector fields
cameronraysmith Aug 26, 2024
febd9a8
fix(styles): enable unicode in latex
cameronraysmith Aug 26, 2024
e682a63
feat(styles): add function to set matplotlib style globally
cameronraysmith Aug 26, 2024
c83153e
fix(plots): update lineage fate correlation titles and font size
cameronraysmith Aug 26, 2024
9b8e3a9
fix(tasks): update time lineage fate correlation layout parameters
cameronraysmith Aug 26, 2024
5f3bcba
fix(bazel): add mplstyle as data
cameronraysmith Aug 27, 2024
74ec198
fix(summarize): use constant cell type colors
cameronraysmith Aug 27, 2024
03889ba
fix(plots): accept `List[Axes]` input type
cameronraysmith Aug 27, 2024
d8d277d
fix(plots): add legend data to cell type plot
cameronraysmith Aug 27, 2024
fde8e62
fix(tasks): add row labels and external legend
cameronraysmith Aug 27, 2024
e900db0
fix(plots): update vector field summary interface
cameronraysmith Aug 27, 2024
68a82d1
fix(tasks): update time lineage fate correlation legend spacing
cameronraysmith Aug 27, 2024
321dfca
fix(plots): reduce likelihood of mutation by copying mutated input
cameronraysmith Aug 27, 2024
0845fab
fix(plots): automatically increment axis indices in lineage fate corr…
cameronraysmith Aug 27, 2024
0aa0db1
fix(plots): parameterize transparency of posterior time
cameronraysmith Aug 27, 2024
6bfb6fb
fix(tasks): include column-level colorbars in gridspec
cameronraysmith Aug 27, 2024
93a1fae
deps(pyproject): add ipdb to test group
cameronraysmith Aug 27, 2024
81f557f
chore(bazel): sync lock
cameronraysmith Aug 27, 2024
978c2d0
fix(plots): remove redundant data input from lineage fate correlation
cameronraysmith Aug 28, 2024
f738c7b
fix(tasks): refactor time fate correlation and note redundancy with w…
cameronraysmith Aug 28, 2024
c32f530
fix(workflows): add posterior samples and postprocessed data to summa…
cameronraysmith Aug 28, 2024
e5a69c4
feat(workflows): add task to generate time lineage fate correlation a…
cameronraysmith Aug 28, 2024
c77d878
chore(workflows): bump summarize cache to `2024.8.15.1`
cameronraysmith Aug 28, 2024
5c8d858
feat(io): add function to generate and save separate and combined met…
cameronraysmith Aug 28, 2024
bd51032
fix(workflows): include png in time lineage fate correlation plot upl…
cameronraysmith Aug 28, 2024
c418960
fix(workflows): include individual metric tables in CombinedMetricsOu…
cameronraysmith Aug 28, 2024
e43baff
fix(workflows): use helper function to generate and save metrics tables
cameronraysmith Aug 28, 2024
62803f0
refactor(io): rename json module to metrics
cameronraysmith Aug 28, 2024
260af1f
fix(io): default to sans serif in table tex wrapper
cameronraysmith Aug 28, 2024
186ef1e
feat(plots): add functions to render graphical model sketches
cameronraysmith Aug 28, 2024
39f28af
fix(workflows): io.json -> io.metrics
cameronraysmith Aug 28, 2024
cd00e8a
feat(io): log dataset hash on load
cameronraysmith Aug 29, 2024
cd56435
fix(utils): log data hash on load
cameronraysmith Aug 29, 2024
6ef6954
refactor(utils): move hash function to io
cameronraysmith Aug 29, 2024
941653e
refactor(datasets): update hash file import
cameronraysmith Aug 29, 2024
5a0c660
fix(io): log data hash on pickle save/load
cameronraysmith Aug 29, 2024
cbac59e
refactor(utils): move hash file tests to separate module
cameronraysmith Aug 29, 2024
dc77b0d
fix(styles): remove unused mplstyle key
cameronraysmith Aug 29, 2024
5e57d40
fix(plots): filter invalid adjustText warning
cameronraysmith Aug 29, 2024
a9b559b
fix(tasks): set 4 columns for time fate correlation legend
cameronraysmith Aug 29, 2024
8986fc6
fix(plots): update vector field summary plot
cameronraysmith Aug 29, 2024
508ee80
fix(plots): filter invalid adjustText warning if logger is available
cameronraysmith Aug 29, 2024
9479789
test(plots): add modular layout function
cameronraysmith Aug 30, 2024
daf7716
fix(io): use dill to support pickling matplotlib objects
cameronraysmith Aug 30, 2024
061b795
test(scripts): add h5 scripts
cameronraysmith Aug 30, 2024
cb2bbbe
fix(deps): explicitly lower bound dill
cameronraysmith Aug 30, 2024
74254ce
feat(plots): serialize figure layout and its panels separately
cameronraysmith Aug 30, 2024
7db1074
fix(plots): show mean and CV histograms in shared time uncertainty
cameronraysmith Aug 30, 2024
46c28c5
fix(plots): use gridspec for shared time uncertainty plot
cameronraysmith Aug 31, 2024
9529dd7
fix(plots): default to CV in vector field plot
cameronraysmith Aug 31, 2024
b33d7b4
fix(io): change sparsification notice from warning to debug level
cameronraysmith Aug 31, 2024
2049cd8
feat(plots): set base for report
cameronraysmith Aug 31, 2024
2dc9414
fix(models): include std of predictive samples in posterior dict
cameronraysmith Sep 1, 2024
1972b79
fix(plots): allow setting axis-level label for colorbars
cameronraysmith Sep 1, 2024
c2efd4f
fix(analysis): remove unused list wrappers
cameronraysmith Sep 1, 2024
d5bcbe1
feat(plots): support SubplotSpec for plot_gene_ranking
cameronraysmith Sep 3, 2024
989c19b
feat(plots): support SubplotSpec for plot_parameter_posteriors
cameronraysmith Sep 3, 2024
3685fd8
feat(plots): support SubplotSpec for plot_vector_field_summary
cameronraysmith Sep 3, 2024
e0c9466
fix(plots): use pure text title for plot gene ranking
cameronraysmith Sep 4, 2024
daabaee
fix(plots): update major log tick labels for parameter posteriors
cameronraysmith Sep 4, 2024
13e3afd
fix(plots): force min-max colorbar tick labels for bounded variables
cameronraysmith Sep 4, 2024
f205304
feat(plots): support SubplotSpec for rainbowplot
cameronraysmith Sep 4, 2024
7b9f22a
refactor(plots): extract report subgrid plots
cameronraysmith Sep 4, 2024
03eea70
fix(plots): parametrize rainbow plot default fontsize
cameronraysmith Sep 5, 2024
c5a284d
fix(plots): organize reporting function for use in summarization task
cameronraysmith Sep 5, 2024
21543d5
fix(tasks): reorganize summarize in preparation for integration of re…
cameronraysmith Sep 5, 2024
8755cde
feat(workflows): add selected genes configuration to data sets
cameronraysmith Sep 6, 2024
8f85769
fix(plots): move reporting function imports to docstring
cameronraysmith Sep 6, 2024
27d1c2c
refactor(plots): import reporting functions through plots package
cameronraysmith Sep 6, 2024
bfb4599
fix(tasks): import reporting functions in summarize task
cameronraysmith Sep 6, 2024
2b14a7e
refactor(plots): rename selected genes variable
cameronraysmith Sep 6, 2024
4478989
refactor(plots): rename selected genes variable in report
cameronraysmith Sep 6, 2024
8a93aa6
fix(plots): disable text axes in self-generated figure
cameronraysmith Sep 6, 2024
b467baf
nit(plots): reorder report arguments and produce png output
cameronraysmith Sep 6, 2024
2808a98
fix(tasks): use report instead of plot gene summary
cameronraysmith Sep 6, 2024
bbafdfd
fix(workflows): use correct WorkflowConfiguration field name
cameronraysmith Sep 6, 2024
46d4df1
fix(workflows): use SummarizeConfiguration to access selected genes
cameronraysmith Sep 6, 2024
b602d2b
chore(workflows): bump postprocess cache `2024.8.15.1`
cameronraysmith Sep 6, 2024
40b57d2
fix(plots): unpack width ratios in rainbow plot
cameronraysmith Sep 6, 2024
5aab2a5
fix(plots): parameterize state color dictionary
cameronraysmith Sep 6, 2024
e5b9719
fix(tasks): use parameterized state color dictionary
cameronraysmith Sep 6, 2024
9ef2a76
fix(workflows): reset configurations
cameronraysmith Sep 6, 2024
0217660
fix(models): retain gene ranking data for all genes
cameronraysmith Sep 6, 2024
0267bc4
chore(workflows): bump postprocess cache `2024.8.15.2`
cameronraysmith Sep 6, 2024
4b5a75b
feat(io): check and log error for mismatching data download hashes
cameronraysmith Sep 6, 2024
b0d0e76
fix(analysis): add max genes per bin argument to top_mae_genes
cameronraysmith Sep 6, 2024
1952b43
fix(tasks): set top mae genes max genes per bin to 5
cameronraysmith Sep 6, 2024
a77f19e
fix(workflows): upload figure object files
cameronraysmith Sep 6, 2024
917ee7f
fix(plots): use black box plots
cameronraysmith Sep 6, 2024
890244b
fix(analysis): parameterize top mae genes gene filter
cameronraysmith Sep 6, 2024
4cd2ac7
fix(plots): retain order of selected genes in report
cameronraysmith Sep 6, 2024
73352bf
fix(tasks): update vector field summary parameters
cameronraysmith Sep 6, 2024
8653653
feat(io): check data download hashes before and after download prepro…
cameronraysmith Sep 6, 2024
dfe987f
fix(plots): default parameter posteriors to box plot
cameronraysmith Sep 7, 2024
320cb9a
fix(plots): disable default box plot
cameronraysmith Sep 7, 2024
63115c9
feat(utils): add function to compute the quartile coefficient of disp…
cameronraysmith Sep 7, 2024
a90e182
fix(plots): use quartile coefficient of dispersion to quantify tempor…
cameronraysmith Sep 7, 2024
02457d7
fix(report): update docstring
cameronraysmith Sep 7, 2024
388cdb5
fix(plots): update report dpi
cameronraysmith Sep 7, 2024
e875f2e
fix(plots): set vector field uncertainty default plot type to hexbin
cameronraysmith Sep 7, 2024
6fcb630
fix(plots): set posterior time default plot type to hexbin
cameronraysmith Sep 7, 2024
9f6e601
fix(tasks): set min mae percentile from candidate dataframe length
cameronraysmith Sep 7, 2024
0913328
fix(plots): increase box plot width
cameronraysmith Sep 7, 2024
1e2b244
fix(plots): add parameter to truncate high error genes by percentile
cameronraysmith Sep 7, 2024
b5a99ac
fix(plots): set rainbow default plot type to hexbin
cameronraysmith Sep 7, 2024
5d1118f
chore(workflows): bump summarize cache to `2024.8.15.3`
cameronraysmith Sep 9, 2024
ca31986
nit(models): default to kwargs
cameronraysmith Sep 10, 2024
7844204
test(models): exercise guide_type and add_offset parameters of the Ve…
cameronraysmith Sep 12, 2024
50d4970
test(models): remove unused model parameters
cameronraysmith Sep 13, 2024
b18e120
refactor(models): remove unused parameters from velocity_model
cameronraysmith Sep 13, 2024
439d33f
Revert "refactor(models): remove unused parameters from velocity_model"
cameronraysmith Sep 13, 2024
1bb5b24
Revert "test(models): remove unused model parameters"
cameronraysmith Sep 13, 2024
4684551
fix(models): mark apparently unused sample sites
cameronraysmith Sep 13, 2024
9bfa149
fix(plots): include median in parameter plots
cameronraysmith Sep 13, 2024
e472755
refactor(plots): move setup_colors to utils
cameronraysmith Sep 13, 2024
fd6c652
fix(plots): use QCD instead of CV in shared time histogram
cameronraysmith Sep 13, 2024
76ffc3f
fix(plots): synchronize vector field color map with rainbowplots
cameronraysmith Sep 13, 2024
1d71472
fix(plots): pass boxplot flag to parameter posteriors
cameronraysmith Sep 13, 2024
e6b5343
feat(plots): use mae percentile in gene ranking plot
cameronraysmith Sep 14, 2024
8af8450
fix(analysis): update compute volcano data interface types
cameronraysmith Sep 14, 2024
24c3f27
fix(utils): include type in dictionary log
cameronraysmith Sep 15, 2024
c0a0d27
fix(plots): make mae scale default for gene ranking
cameronraysmith Sep 16, 2024
44cd705
fix(plots): remove unused comment from posterior predictive extrapola…
cameronraysmith Sep 16, 2024
8174ff9
fix(plots): remove additional unused comments from posterior predicti…
cameronraysmith Sep 16, 2024
0ac2cdf
fix(analysis): update mae_per_gene function and add docstring
cameronraysmith Sep 17, 2024
d62db2f
fix(analysis): remove unused comments from compute_volcano_data
cameronraysmith Sep 17, 2024
c2ede44
fix(plots): optionally force recomputation of volcano data
cameronraysmith Sep 17, 2024
f31d116
fix(plots): parameterize inclusion of complete angular scale
cameronraysmith Sep 17, 2024
46ad2b5
fix(analysis): add shadow support for jax arrays
cameronraysmith Sep 17, 2024
40a0125
test(analysis): add basic tests for mae_per_gene
cameronraysmith Sep 17, 2024
3986cd4
feat(tests): add function to generate fixture data
cameronraysmith Sep 17, 2024
a7ec817
feat(tests): test ability to load and log data string diff with seria…
cameronraysmith Sep 17, 2024
6d376d9
fix(tests): add serialization test for uns dict with nested numpy array
cameronraysmith Sep 17, 2024
0c71a68
feat(tests): add preprocessed pancreas 50x13 fixture data
cameronraysmith Sep 17, 2024
7910665
fix(io): add debug logging of attempted serialization dictionary to s…
cameronraysmith Sep 17, 2024
d3227ac
fix(tasks): add soft support for selected genes to preprocessing
cameronraysmith Sep 18, 2024
ea08a4a
fix(io): restore types when deserializing json
cameronraysmith Sep 18, 2024
351bc17
fix(tests): note handling of selected genes in generate fixture data
cameronraysmith Sep 18, 2024
d520dcc
fix(tests): add unprocessed data fixture
cameronraysmith Sep 18, 2024
24e8d28
fix(tests): add processed data fixture
cameronraysmith Sep 18, 2024
6ec2b0c
wip(tests): sketch smoke test execution of VelocityModule
cameronraysmith Sep 18, 2024
a096c8c
fix(tests): rename small deserialized data fixture in plots
cameronraysmith Sep 18, 2024
8752319
fix(tests): load preprocessed deserialized data fixture globally
cameronraysmith Sep 18, 2024
be70f2c
fix(interfaces): add default factory for preprocess selected_genes em…
cameronraysmith Sep 18, 2024
0b63cc6
feat(io): optionally check file hashes when (de)serializing json files
cameronraysmith Sep 18, 2024
f03454d
test(io): update serialization tests with expectation specs
cameronraysmith Sep 18, 2024
4198735
fix(tests): update fixture name for preprocessed_3_4 in plot histograms
cameronraysmith Sep 18, 2024
9163a92
fix(tests): exercise hash check in data serialization
cameronraysmith Sep 18, 2024
793a72e
feat(tests): add function to generate postprocessed fixture data
cameronraysmith Sep 18, 2024
f96a775
test(train): add unit test for train task on static fixture data
cameronraysmith Sep 18, 2024
847bbc9
tests(data): add trained pancreas 50 by 7 fixture
cameronraysmith Sep 18, 2024
5baa10a
tests(data): add postprocessed pancreas 50 by 7 fixture
cameronraysmith Sep 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ click = ">=8.1.7"
colorlog = ">=6.7.0"
daft = ">=0.1.2"
diffrax = ">=0.5.0"
dill = ">=0.3.8"
diskcache = ">=5.6.1"
duckdb = ">=1.0.0"
einops = ">=0.7.0"
Expand All @@ -60,6 +61,7 @@ fsspec = ">=2024.3.0"
greenlet = ">=3.0.3"
httpx = ">=0.27.0"
h5py = ">=3.9.0"
# hdf5plugin = ">=4.4.0"
ibis-framework = { extras = ["duckdb"], version = ">=9.2.0" }
jax = ">=0.4.23"
jaxlib = ">=0.4.23"
Expand Down Expand Up @@ -136,6 +138,7 @@ google-api-python-client = { version = ">=2.79.0", optional = true }
hydra-core = { version = ">=1.3.2", optional = true }
hydra-zen = { version = ">=0.12.1", optional = true }
hypothesis = { version = ">=6.71.0", optional = true }
ipdb = { version = ">=0.13.13", optional = true }
ipython = { version = ">=8.11.0", optional = true }
ipywidgets = { version = ">=8.0.0", optional = true }
# isort = { version = ">=5.10.1", optional = true }
Expand Down Expand Up @@ -218,6 +221,7 @@ optional = true
[tool.poetry.group.test.dependencies]
coverage = { version = ">=6.2", extras = ["toml"] }
hypothesis = ">=6.72.1"
ipdb = ">=0.13.13"
ipython = ">=8.11.0"
# poethepoet = ">=0.19.0"
pygments = ">=2.15.0"
Expand Down Expand Up @@ -423,6 +427,9 @@ log_level = "INFO"
# We exclude markers associated with slow tests by default
# but run them in CI with the make target `test-cov-xml`
# which overrides this behavior.
# Add these options to enable the ipdb debugger:
# --pdb
# --pdbcls=IPython.terminal.debugger:Pdb
addopts = """
-m "not slow and not pyensembl"
-rA
Expand Down
202 changes: 202 additions & 0 deletions scripts/h5/h5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import weakref
from pathlib import Path

import h5py
import hdf5plugin
import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Any, Dict, Tuple

from pyrovelocity.io.hash import hash_file
from pyrovelocity.logging import configure_logging

__all__ = ["save_to_h5", "load_from_h5"]

logger = configure_logging(__name__)


@beartype
def save_to_h5(
data: Dict[str, Any],
filename: str | Path,
) -> Tuple[Path, str]:
with h5py.File(filename, "w") as f:
for key, value in data.items():
if isinstance(value, np.ndarray):
f.create_dataset(
key,
data=value,
**hdf5plugin.Blosc2(
cname="zstd",
clevel=3,
filters=hdf5plugin.Blosc2.SHUFFLE,
),
)
elif isinstance(value, pd.DataFrame):
group = f.create_group(key)
for column in value.columns:
group.create_dataset(
column,
data=value[column].values,
**hdf5plugin.Blosc2(
cname="zstd",
clevel=3,
filters=hdf5plugin.Blosc2.SHUFFLE,
),
)
group.attrs["columns"] = value.columns.tolist()
group.attrs["index"] = value.index.tolist()
elif isinstance(value, list):
f.create_dataset(
key,
data=np.array(value, dtype=h5py.special_dtype(vlen=str)),
**hdf5plugin.Blosc2(
cname="zstd",
clevel=3,
filters=hdf5plugin.Blosc2.SHUFFLE,
),
)
else:
logger.warning(
f"Skipping {key}: unsupported type {type(value)}"
)
file_hash = _log_hash(filename, mode="saved")
return Path(filename), file_hash


class LazyArray:
def __init__(self, dataset):
self.dataset = dataset

def __getitem__(self, key):
return np.array(self.dataset[key])

@property
def shape(self):
return self.dataset.shape

@property
def dtype(self):
return self.dataset.dtype

def __getattr__(self, name):
if name.startswith("_"):
raise AttributeError(
f"'{self.__class__.__name__}' object has no attribute '{name}'"
)
return getattr(self.dataset, name)


class LazyDataFrame:
def __init__(self, group):
self.group = group
self.columns = list(self.group.attrs["columns"])
self.index = list(self.group.attrs["index"])

def __getitem__(self, key):
if isinstance(key, str):
return LazyArray(self.group[key])
else:
df = pd.DataFrame(
{col: self.group[col][()] for col in self.columns}
)
df.index = self.index
return df[key]

@property
def shape(self):
return (len(self.index), len(self.columns))

def head(self, n=5):
df = pd.DataFrame({col: self.group[col][:n] for col in self.columns})
df.index = self.index[:n]
return df

def __getattr__(self, name):
if name.startswith("_"):
raise AttributeError(
f"'{self.__class__.__name__}' object has no attribute '{name}'"
)
if name in self.columns:
return LazyArray(self.group[name])
return getattr(pd.DataFrame, name)


class H5Accessor:
def __init__(self, filename):
self._filename = filename
self._file = None
self._open_file()
self._finalizer = weakref.finalize(self, self._close_file)

def _open_file(self):
if self._file is None or not self._file.id:
self._file = h5py.File(self._filename, "r")

def _close_file(self):
if self._file is not None and self._file.id:
self._file.close()

def __getattr__(self, name):
if name.startswith("_"):
raise AttributeError(
f"'{self.__class__.__name__}' object has no attribute '{name}'"
)

try:
self._open_file()
if name not in self._file:
raise AttributeError(f"No such attribute: {name}")

item = self._file[name]
if isinstance(item, h5py.Dataset):
if item.dtype.kind == "S":
return item[()].astype(str).tolist()
else:
return LazyArray(item)
elif isinstance(item, h5py.Group):
if "columns" in item.attrs:
return LazyDataFrame(item)
else:
return {
key: self.__getattr__(f"{name}/{key}")
for key in item.keys()
}
else:
return item
except Exception as e:
if not name.startswith("_"):
print(f"Error accessing {name}: {str(e)}")
return None

def __dir__(self):
try:
self._open_file()
return list(self._file.keys())
except Exception as e:
print(f"Error listing attributes: {str(e)}")
return []

def close(self):
self._close_file()

def __repr__(self):
return f"H5Accessor(filename='{self._filename}')"


@beartype
def load_from_h5(filename: str | Path) -> H5Accessor:
accessor = H5Accessor(filename)
_log_hash(filename, mode="loaded")
return accessor


@beartype
def _log_hash(file_path: str | Path, mode: str = "loaded or saved") -> str:
file_hash = hash_file(file_path=file_path)
logger.info(
f"\nSuccessfully {mode} file: {file_path}\n"
f"SHA-256 hash: {file_hash}\n"
)
return file_hash
Loading