From 23ed57fd091f498db891bc4a01d10739363daef4 Mon Sep 17 00:00:00 2001 From: oruebel Date: Mon, 15 Jan 2024 22:31:17 -0800 Subject: [PATCH 1/5] Fix #154 determine datashape for unlimtited HDF5 datasets on write --- src/hdmf_zarr/backend.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 39b1dc9e..41241baf 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1193,7 +1193,13 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 data_shape = io_settings.pop('shape') # If we have a numeric numpy array then use its shape elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.number) or dtype == np.bool_: - data_shape = get_data_shape(data) + # HDMF's get_data_shape may return the maxshape of an HDF5 dataset which can include None values + # which Zarr does not allow for dataset shape. Check for the shape attribute first before falling + # back on get_data_shape + if hasattr(data, 'shape') and data.shape is not None: + data_shape = data.shape + else: + data_shape = get_data_shape(data) # Deal with object dtype elif isinstance(dtype, np.dtype): data = data[:] # load the data in case we come from HDF5 or another on-disk data source we don't know From 52ce004c69233b99f28e4b0cb3082c50db9b60a4 Mon Sep 17 00:00:00 2001 From: oruebel Date: Mon, 15 Jan 2024 22:36:26 -0800 Subject: [PATCH 2/5] Update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 358ad12e..dea286db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Enhancements * Enhanced `ZarrIO` and `ZarrDataIO` to infer io settings (e.g., chunking and compression) from HDF5 datasets to preserve storage settings on export if possible @oruebel [#153](https://github.com/hdmf-dev/hdmf-zarr/pull/153) +### Bug Fixes +* Fixed bug when converting HDF5 datasets with unlimited dimensions @oruebel [#155](https://github.com/hdmf-dev/hdmf-zarr/pull/155) + ## 0.5.0 (December 8, 2023) ### Enhancements From 224574fb8145aefafc9fee9976a24672df9f5a62 Mon Sep 17 00:00:00 2001 From: oruebel Date: Mon, 15 Jan 2024 22:44:01 -0800 Subject: [PATCH 3/5] Add unit test using maxshape with None values in HDF5 --- tests/unit/test_io_convert.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index b7f119a2..0f320b95 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -868,6 +868,12 @@ def __get_data_array(self, foo_container): """For a container created by __roundtrip_data return the data array""" return foo_container.buckets['bucket1'].foos['foo1'].my_data + def test_maxshape(self): + """test when maxshape is set for the dataset""" + data = H5DataIO(data=list(range(5)), maxshape=(None,)) + self.__roundtrip_data(data=data) + self.assertContainerEqual(self.out_container, self.read_container, ignore_hdmf_attrs=True) + def test_nofilters(self): """basic test that export without any options specified is working as expected""" data = list(range(5)) From d12299e594fe030797ce0fc3399cd6dbb7bcb43f Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 16 Jan 2024 12:13:25 -0800 Subject: [PATCH 4/5] Exclude backup code from codecov --- src/hdmf_zarr/backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 41241baf..bccada32 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1191,15 +1191,16 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 # Determine the shape and update the dtype if necessary when dtype==object if 'shape' in io_settings: # Use the shape set by the user data_shape = io_settings.pop('shape') - # If we have a numeric numpy array then use its shape + # If we have a numeric numpy-like array (e.g., numpy.array or h5py.Dataset) then use its shape elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.number) or dtype == np.bool_: # HDMF's get_data_shape may return the maxshape of an HDF5 dataset which can include None values # which Zarr does not allow for dataset shape. Check for the shape attribute first before falling # back on get_data_shape if hasattr(data, 'shape') and data.shape is not None: data_shape = data.shape - else: - data_shape = get_data_shape(data) + # This is a fall-back just in case. However this should not happen for standard numpy and h5py arrays + else: # pragma: no cover + data_shape = get_data_shape(data) # pragma: no cover # Deal with object dtype elif isinstance(dtype, np.dtype): data = data[:] # load the data in case we come from HDF5 or another on-disk data source we don't know From 21d5a6db2842c44c5eb8562872bd43e00ca796dd Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Tue, 16 Jan 2024 12:27:30 -0800 Subject: [PATCH 5/5] Simplify io options logic --- src/hdmf_zarr/backend.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index bccada32..7ca788c1 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1174,9 +1174,8 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 io_settings = dict() if options is not None: dtype = options.get('dtype') - io_settings = options.get('io_settings') - if io_settings is None: - io_settings = dict() + if options.get('io_settings') is not None: + io_settings = options.get('io_settings') # Determine the dtype if not isinstance(dtype, type): try: