From ef424a53c754859bcbb862a1d5cb1a05cc38fac8 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 29 Dec 2023 13:07:25 +0100 Subject: [PATCH 1/2] Implement AttrsDict.group() (syntactic sugar) --- src/legendmeta/jsondb.py | 59 +++++++++++++++++++++++++++++++++++++++- tests/test_jsondb.py | 3 ++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/legendmeta/jsondb.py b/src/legendmeta/jsondb.py index 7536a48..8e39e98 100644 --- a/src/legendmeta/jsondb.py +++ b/src/legendmeta/jsondb.py @@ -195,6 +195,49 @@ def map(self, label: str, unique: bool = True) -> AttrsDict: self.__cached_remaps__[label] = newmap return newmap + def group(self, label: str) -> AttrsDict: + """Group dictionary according to a `label`. + + This is equivalent to :meth:`.map` with `unique` set to ``False``. + + Parameters + ---------- + label + name (key) at which the new label can be found. If nested in + dictionaries, use ``.`` to separate levels, e.g. + ``level1.level2.label``. + + Examples + -------- + >>> d = AttrsDict({ + ... "a": { + ... "type": "A", + ... "data": 1 + ... }, + ... "b": { + ... "type": "A", + ... "data": 2 + ... }, + ... "c": { + ... "type": "B", + ... "data": 3 + ... }, + ... }) + >>> d.group("type").keys() + dict_keys(['A', 'B']) + >>> d.group("type").A.values() + dict_values([{'type': 'A', 'data': 1}, {'type': 'A', 'data': 2}]) + >>> d.group("type").B.values() + dict_values([{'type': 'B', 'data': 3}]) + >>> d.group("type").A.map("data")[1] + {'type': 'A', 'data': 1} + + See Also + -------- + map + """ + return self.map(label, unique=False) + # d |= other_d should still produce a valid AttrsDict def __ior__(self, other: dict | AttrsDict) -> AttrsDict: return AttrsDict(super().__ior__(other)) @@ -353,7 +396,7 @@ def on( return db_ptr def map(self, label: str, unique: bool = True) -> AttrsDict: - """Remap dictionary according to a second unique `key`. + """Remap dictionary according to a second unique `label`. See Also -------- @@ -366,6 +409,20 @@ def map(self, label: str, unique: bool = True) -> AttrsDict: """ return self.__store__.map(label, unique=unique) + def group(self, label: str) -> AttrsDict: + """Group dictionary according to a second unique `label`. + + See Also + -------- + AttrsDict.group + + Warning + ------- + If the database is lazy, you must call :meth:`.scan` in advance to + populate it, otherwise groupings cannot be created. + """ + return self.__store__.group(label) + def __getitem__(self, item: str | Path) -> JsonDB | AttrsDict | list: """Access files or directories in the database.""" # resolve relative paths / links, but keep it relative to self.__path__ diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index 6dc37fa..a8722e0 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -201,6 +201,9 @@ def test_mapping(): assert jdb.map("system", unique=False)[2].map("label")[1].data == 3 assert jdb.map("system", unique=False)[1].map("label")[2].data == 1 + assert jdb.group("system")[2].map("label")[1].data == 3 + assert jdb.group("system")[1].map("label")[2].data == 1 + with pytest.raises(ValueError): jdb.map("non-existent-label") From 3ad061a1d436d350ad14f761376299f15971272d Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 29 Dec 2023 13:15:09 +0100 Subject: [PATCH 2/2] Update docs to use JsonDB.group() --- docs/source/kitchen-sink.rst | 22 +++++++++------------- docs/source/tutorial.rst | 12 ++++++------ 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/docs/source/kitchen-sink.rst b/docs/source/kitchen-sink.rst index 121df02..75d5622 100644 --- a/docs/source/kitchen-sink.rst +++ b/docs/source/kitchen-sink.rst @@ -30,7 +30,7 @@ How many kilograms of germanium are currently deployed in the LEGEND cryostat? or, alternatively: >>> # get only HPGe channels by mapping for "system" ->>> geds = lmeta.channelmap(datetime.now()).map("system", unique=False).geds +>>> geds = lmeta.channelmap(datetime.now()).group("system").geds >>> # collect and sum up masses >>> masses = [v.production.mass_in_g for v in geds.values()] >>> numpy.cumsum(masses)[-1] @@ -45,9 +45,9 @@ Calls to :meth:`.AttrsDict.map` can be chained together to build complex queries >>> # get HPGes, only ICPCs and only if their analysis status is ON >>> dets = ( ... lmeta.channelmap(datetime.now()) -... .map("system", unique=False).geds -... .map("type", unique=False).icpc -... .map("analysis.usability", unique=False).on +... .group("system").geds +... .group("type").icpc +... .group("analysis.usability").on ...) >>> # collect and sum up mass * enrichment (assuming that the enrichment fraction is also in mass) >>> data = [v.production.mass_in_g * v.production.enrichment for v in dets.values()] @@ -60,8 +60,8 @@ How many kilograms of germanium were not "OFF" on 23 Aug 2023? >>> geds = ( ... lmeta.channelmap(datetime(2023, 8, 23)) -... .map("system", unique=False).geds -... .map("analysis.usability", unique=False) +... .group("system").geds +... .group("analysis.usability") ...) >>> mass = 0 >>> @@ -77,7 +77,7 @@ Which channel IDs correspond to detectors in string 1? >>> ids = ( ... lmeta.channelmap() -... .map("location.string", unique=False)[1] +... .group("location.string")[1] ... .map("daq.rawid") ...).keys() dict_keys([1104000, 1104001, 1104002, 1104003, 1104004, 1104005, 1105600, 1105602, 1105603]) @@ -111,7 +111,7 @@ What is the current amount of exposure of HPGes usable for analysis? continue runinfo = lmeta.dataprod.runinfo[period][run].phy - chmap = lmeta.channelmap(runinfo.start_key).map("system", unique=False).geds + chmap = lmeta.channelmap(runinfo.start_key).group("system").geds for _, gedet in chmap.items(): if gedet.analysis.usability not in ("off", "ac"): @@ -145,11 +145,7 @@ What is the exposure of each single HPGe usable for analysis over a selection of runinfo = lmeta.dataprod.runinfo[period][run].phy chmap = lmeta.channelmap(runinfo.start_key) - chmap = ( - chmap.map("system", unique=False) - .geds.map("analysis.usability", unique=False) - .on - ) + chmap = chmap.group("system").geds.group("analysis.usability").on for _, gedet in chmap.items(): exposures.setdefault(gedet.name, 0) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 9db9b4d..d9a2963 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -116,8 +116,8 @@ channel map: call above and, in addition, augments the channel map with the information from the detector database. Check it out! -Remapping metadata ------------------- +Remapping and grouping metadata +------------------------------- A second important method of ``JsonDB`` is :meth:`.JsonDB.map`, which allows to query ``(key, value)`` dictionaries with an alternative unique key defined in @@ -137,12 +137,12 @@ corresponding to a certain DAQ channel: If the requested key is not unique, an exception will be raised. :meth:`.JsonDB.map` can, however, handle non-unique keys too and return a dictionary of matching entries instead, keyed by an arbitrary integer to allow -further :meth:`.JsonDB.map` calls. The behavior is achieved by setting the -``unique`` argument flag. A typical application is retrieving all channels -attached to the same CC4: +further :meth:`.JsonDB.map` calls. The behavior is achieved by using +:meth:`.JsonDB.group` or by setting the ``unique`` argument flag. A typical +application is retrieving all channels attached to the same CC4: >>> chmap = lmeta.hardware.configuration.channelmaps.on(datetime.now()) ->>> chmap.map("electronics.cc4.id", unique=False)["C3"] +>>> chmap.group("electronics.cc4.id")["C3"] {0: {'name': 'V02160A', 'system': 'geds', 'location': {'string': 1, 'position': 1},