checkpoint

hdmf-dev · May 5, 2024 · e9c76db · e9c76db
1 parent 6bd72bc
commit e9c76db
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 27 deletions.
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -49,23 +49,6 @@ def can_read(path):
         except IOError:
             return False
 
-    @staticmethod
-    def resolve_data_shape(data, options):
-        """
-        This method is used to get the dimensions of the data in order to setup
-        the maxshape.
-        """
-        if isinstance(options['dtype'], np.dtype):
-            data_shape = (len(data),)
-        else:
-            data_shape = get_data_shape(data)
-
-        if data_shape is None:
-            msg = "Could not resolve the shape of the data."
-            raise ValueError(msg)
-        else:
-            return data_shape
-
     @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
             {'name': 'mode', 'type': str,
              'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). '
@@ -381,7 +364,9 @@ def copy_file(self, **kwargs):
              'default': True},
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
-             'default': None})
+             'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': 'Bool to set whether datasets are expandable through chunking by default.'})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -821,10 +806,15 @@ def close_linked_files(self):
              'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
              'default': True},
             {'name': 'export_source', 'type': str,
-             'doc': 'The source of the builders when exporting', 'default': None})
+             'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': 'Bool to set whether datasets are expandable through chunking by default.'})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
+        link_data, exhaust_dci, export_source = getargs('link_data',
+                                                        'exhaust_dci',
+                                                        'export_source',
+                                                        kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
         for name, gbldr in f_builder.groups.items():
@@ -1095,14 +1085,16 @@ def write_link(self, **kwargs):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': 'Bool to set whether datasets are expandable through chunking by default.'},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5
 
         The function uses other dataset-dependent write functions, e.g,
         ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
         """
-        parent, builder = popargs('parent', 'builder', kwargs)
+        parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
         link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
         self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
         if self.get_written(builder):
@@ -1224,7 +1216,7 @@ def _filler():
                 return
             # If the compound data type contains only regular data (i.e., no references) then we can write it as usual
             else:
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, expandable, options)
         # Write a dataset containing references, i.e., a region or object reference.
         # NOTE: we can ignore options['io_settings'] for scalar data
         elif self.__is_ref(options['dtype']):
@@ -1319,7 +1311,7 @@ def _filler():
                 self.__dci_queue.append(dataset=dset, data=data)
             # Write a regular in memory array (e.g., numpy array, list etc.)
             elif hasattr(data, '__len__'):
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, expandable, options)
             # Write a regular scalar dataset
             else:
                 dset = self.__scalar_fill__(parent, name, data, options)
@@ -1447,7 +1439,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
         return dset
 
     @classmethod
-    def __list_fill__(cls, parent, name, data, options=None):
+    def __list_fill__(cls, parent, name, data, expandable, options=None):
         # define the io settings and data type if necessary
         io_settings = {}
         dtype = None
@@ -1469,8 +1461,12 @@ def __list_fill__(cls, parent, name, data, options=None):
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
-        if 'maxshape' in io_settings:
-            breakpoint()
+        if expandable:
+            if 'maxshape' not in io_settings:
+                io_settings['maxshape'] = tuple([None]*len(data_shape))
+            else:
+                # Don't override existing settings
+                pass
         # Create the dataset
         try:
             dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)

diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py
@@ -77,7 +77,10 @@ def read(self, **kwargs):
     @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'},
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
-             'default': None}, allow_extra=True)
+             'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': 'Bool to set whether datasets are expandable through chunking by default.'},
+            allow_extra=True)
     def write(self, **kwargs):
         container = popargs('container', kwargs)
         herd = popargs('herd', kwargs)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
@@ -28,6 +28,7 @@
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.common.resources import HERD
 from hdmf.term_set import TermSet, TermSetWrapper
+from hdmf.utils import get_data_shape
 
 
 from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
@@ -163,6 +164,7 @@ def test_write_dataset_list(self):
         self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a.tolist(), attributes={}))
         dset = self.f['test_dataset']
         self.assertTrue(np.all(dset[:] == a))
+        self.assertEqual(get_data_shape(dset), (None, None, None))
 
     def test_write_dataset_list_compress_gzip(self):
         a = H5DataIO(np.arange(30).reshape(5, 2, 3),