Skip to content

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 committed May 5, 2024
1 parent 6bd72bc commit e9c76db
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 27 deletions.
48 changes: 22 additions & 26 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,6 @@ def can_read(path):
except IOError:
return False

@staticmethod
def resolve_data_shape(data, options):
"""
This method is used to get the dimensions of the data in order to setup
the maxshape.
"""
if isinstance(options['dtype'], np.dtype):
data_shape = (len(data),)
else:
data_shape = get_data_shape(data)

if data_shape is None:
msg = "Could not resolve the shape of the data."
raise ValueError(msg)
else:
return data_shape

@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
{'name': 'mode', 'type': str,
'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). '
Expand Down Expand Up @@ -381,7 +364,9 @@ def copy_file(self, **kwargs):
'default': True},
{'name': 'herd', 'type': 'hdmf.common.resources.HERD',
'doc': 'A HERD object to populate with references.',
'default': None})
'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': 'Bool to set whether datasets are expandable through chunking by default.'})
def write(self, **kwargs):
"""Write the container to an HDF5 file."""
if self.__mode == 'r':
Expand Down Expand Up @@ -821,10 +806,15 @@ def close_linked_files(self):
'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
'default': True},
{'name': 'export_source', 'type': str,
'doc': 'The source of the builders when exporting', 'default': None})
'doc': 'The source of the builders when exporting', 'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': 'Bool to set whether datasets are expandable through chunking by default.'})
def write_builder(self, **kwargs):
f_builder = popargs('builder', kwargs)
link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
link_data, exhaust_dci, export_source = getargs('link_data',
'exhaust_dci',
'export_source',
kwargs)
self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
% (f_builder.name, self.source, kwargs))
for name, gbldr in f_builder.groups.items():
Expand Down Expand Up @@ -1095,14 +1085,16 @@ def write_link(self, **kwargs):
'default': True},
{'name': 'export_source', 'type': str,
'doc': 'The source of the builders when exporting', 'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': 'Bool to set whether datasets are expandable through chunking by default.'},
returns='the Dataset that was created', rtype=Dataset)
def write_dataset(self, **kwargs): # noqa: C901
""" Write a dataset to HDF5
The function uses other dataset-dependent write functions, e.g,
``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
"""
parent, builder = popargs('parent', 'builder', kwargs)
parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if self.get_written(builder):
Expand Down Expand Up @@ -1224,7 +1216,7 @@ def _filler():
return
# If the compound data type contains only regular data (i.e., no references) then we can write it as usual
else:
dset = self.__list_fill__(parent, name, data, options)
dset = self.__list_fill__(parent, name, data, expandable, options)
# Write a dataset containing references, i.e., a region or object reference.
# NOTE: we can ignore options['io_settings'] for scalar data
elif self.__is_ref(options['dtype']):
Expand Down Expand Up @@ -1319,7 +1311,7 @@ def _filler():
self.__dci_queue.append(dataset=dset, data=data)
# Write a regular in memory array (e.g., numpy array, list etc.)
elif hasattr(data, '__len__'):
dset = self.__list_fill__(parent, name, data, options)
dset = self.__list_fill__(parent, name, data, expandable, options)
# Write a regular scalar dataset
else:
dset = self.__scalar_fill__(parent, name, data, options)
Expand Down Expand Up @@ -1447,7 +1439,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
return dset

@classmethod
def __list_fill__(cls, parent, name, data, options=None):
def __list_fill__(cls, parent, name, data, expandable, options=None):
# define the io settings and data type if necessary
io_settings = {}
dtype = None
Expand All @@ -1469,8 +1461,12 @@ def __list_fill__(cls, parent, name, data, options=None):
data_shape = (len(data),)
else:
data_shape = get_data_shape(data)
if 'maxshape' in io_settings:
breakpoint()
if expandable:
if 'maxshape' not in io_settings:
io_settings['maxshape'] = tuple([None]*len(data_shape))
else:
# Don't override existing settings
pass
# Create the dataset
try:
dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)
Expand Down
5 changes: 4 additions & 1 deletion src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def read(self, **kwargs):
@docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'},
{'name': 'herd', 'type': 'hdmf.common.resources.HERD',
'doc': 'A HERD object to populate with references.',
'default': None}, allow_extra=True)
'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': 'Bool to set whether datasets are expandable through chunking by default.'},
allow_extra=True)
def write(self, **kwargs):
container = popargs('container', kwargs)
herd = popargs('herd', kwargs)
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from hdmf.testing import TestCase, remove_test_file
from hdmf.common.resources import HERD
from hdmf.term_set import TermSet, TermSetWrapper
from hdmf.utils import get_data_shape


from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
Expand Down Expand Up @@ -163,6 +164,7 @@ def test_write_dataset_list(self):
self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a.tolist(), attributes={}))
dset = self.f['test_dataset']
self.assertTrue(np.all(dset[:] == a))
self.assertEqual(get_data_shape(dset), (None, None, None))

def test_write_dataset_list_compress_gzip(self):
a = H5DataIO(np.arange(30).reshape(5, 2, 3),
Expand Down

0 comments on commit e9c76db

Please sign in to comment.