From b0f068ee956870ce640dabf118260a97b18b1564 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Mon, 19 Aug 2024 00:34:38 -0400 Subject: [PATCH 1/3] speed up loading of namespaces: skip register type when already registered when loading namespace (#1102) * skip register type when already registered when loading namespace * update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update namespace.py --------- Co-authored-by: Matthew Avaylon --- CHANGELOG.md | 1 + src/hdmf/spec/namespace.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a6369094..c83a7ac3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Added new attribute "dimension_labels" on `DatasetBuilder` which specifies the names of the dimensions used in the dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081) +- Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102) ## HDMF 3.14.2 (July 7, 2024) diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index a2ae0bd37..f0417175f 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -466,15 +466,19 @@ def __load_namespace(self, namespace, reader, resolve=True): return included_types def __register_type(self, ndt, inc_ns, catalog, registered_types): - spec = inc_ns.get_spec(ndt) - spec_file = inc_ns.catalog.get_spec_source_file(ndt) - self.__register_dependent_types(spec, inc_ns, catalog, registered_types) - if isinstance(spec, DatasetSpec): - built_spec = self.dataset_spec_cls.build_spec(spec) + if ndt in registered_types: + # already registered + pass else: - built_spec = self.group_spec_cls.build_spec(spec) - registered_types.add(ndt) - catalog.register_spec(built_spec, spec_file) + spec = inc_ns.get_spec(ndt) + spec_file = inc_ns.catalog.get_spec_source_file(ndt) + self.__register_dependent_types(spec, inc_ns, catalog, registered_types) + if isinstance(spec, DatasetSpec): + built_spec = self.dataset_spec_cls.build_spec(spec) + else: + built_spec = self.group_spec_cls.build_spec(spec) + registered_types.add(ndt) + catalog.register_spec(built_spec, spec_file) def __register_dependent_types(self, spec, inc_ns, catalog, registered_types): """Ensure that classes for all types used by this type are registered From 875712be786974606730db4e36668138bb1e09fe Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Mon, 19 Aug 2024 09:14:50 -0400 Subject: [PATCH 2/3] return shallow copy in build_const_args (#1103) Co-authored-by: Matthew Avaylon --- CHANGELOG.md | 1 + src/hdmf/spec/spec.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c83a7ac3b..5d904fc5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ dataset based on the shape of the dataset data and the dimension names in the spec for the data type. This attribute is available on build (during the write process), but not on read of a dataset from a file. @rly [#1081](https://github.com/hdmf-dev/hdmf/pull/1081) - Speed up loading namespaces by skipping register_type when already registered. @magland [#1102](https://github.com/hdmf-dev/hdmf/pull/1102) +- Speed up namespace loading: return a shallow copy rather than a deep copy in build_const_args. @magland [#1103](https://github.com/hdmf-dev/hdmf/pull/1103) ## HDMF 3.14.2 (July 7, 2024) diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 358cc3256..64af0171f 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1,7 +1,6 @@ import re from abc import ABCMeta from collections import OrderedDict -from copy import deepcopy from warnings import warn from ..utils import docval, getargs, popargs, get_docval @@ -84,7 +83,7 @@ class ConstructableDict(dict, metaclass=ABCMeta): def build_const_args(cls, spec_dict): ''' Build constructor arguments for this ConstructableDict class from a dictionary ''' # main use cases are when spec_dict is a ConstructableDict or a spec dict read from a file - return deepcopy(spec_dict) + return spec_dict.copy() @classmethod def build_spec(cls, spec_dict): From 2f339d14bf50d1a58bbd6f88b389a42523e4f191 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 19 Aug 2024 06:20:49 -0700 Subject: [PATCH 3/3] Improve "already exists" error message (#1165) * Improve "already exists" error message * Fix test, improve msg * Update changelog --------- Co-authored-by: Matthew Avaylon --- CHANGELOG.md | 1 + src/hdmf/container.py | 4 +++- tests/unit/test_multicontainerinterface.py | 5 ++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d904fc5c..63106bcd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Enhancements - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) +- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165) ## HDMF 3.14.3 (July 29, 2024) diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 287809406..67d8bcc2d 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -1142,7 +1142,9 @@ def _func(self, **kwargs): # still need to mark self as modified self.set_modified() if tmp.name in d: - msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name) + msg = (f"Cannot add {tmp.__class__} '{tmp.name}' at 0x{id(tmp)} to dict attribute '{attr_name}' in " + f"{cls} '{self.name}'. {d[tmp.name].__class__} '{tmp.name}' at 0x{id(d[tmp.name])} " + f"already exists in '{attr_name}' and has the same name.") raise ValueError(msg) d[tmp.name] = tmp return container diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py index c705d0a6e..6da81c2cc 100644 --- a/tests/unit/test_multicontainerinterface.py +++ b/tests/unit/test_multicontainerinterface.py @@ -198,7 +198,10 @@ def test_add_single_dup(self): """Test that adding a container to the attribute dict correctly adds the container.""" obj1 = Container('obj1') foo = Foo(obj1) - msg = "'obj1' already exists in Foo 'Foo'" + msg = (f"Cannot add 'obj1' at 0x{id(obj1)} to dict attribute " + "'containers' in 'Foo'. " + f" 'obj1' at 0x{id(obj1)} already exists in 'containers' " + "and has the same name.") with self.assertRaisesWith(ValueError, msg): foo.add_container(obj1)