From 0a2765e22329028a85083a3291b928a0f9cc585e Mon Sep 17 00:00:00 2001 From: Derek Jones Date: Tue, 29 Oct 2024 14:43:38 -0700 Subject: [PATCH] Invalidate nest-resolver caches during assignment. Resolves GH#159. --- src/nested_pandas/nestedframe/core.py | 16 +++++++++++----- .../nestedframe/test_nestedframe.py | 10 ++++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py index bf2ea6f..0890732 100644 --- a/src/nested_pandas/nestedframe/core.py +++ b/src/nested_pandas/nestedframe/core.py @@ -94,13 +94,19 @@ def __getitem__(self, item): super().__setitem__(top_nest, _NestedFieldResolver(top_nest, self._outer)) return super().__getitem__(top_nest) - def __setitem__(self, key, value): + def __setitem__(self, item, _): # Called to update the resolver with intermediate values. # The important point is to intercept the call so that the evaluator - # does not create any new resolvers on the fly. Storing the value - # is not important, since that will have been done already in - # the NestedFrame. - pass + # does not create any new resolvers on the fly. We do NOT want to + # store the given value, since the resolver does lazy-loading. + # What we DO want to do, however, is to invalidate the cache for + # any field resolver for a given nest that is receiving an assignment. + # Since the resolvers are created as-needed in __getitem__, all we need + # to do is delete them from the local cache when this pattern is detected. + if "." in item: + top_nest = item.split(".")[0].strip() + if top_nest in self._outer.nested_columns and super().__contains__(top_nest): + del self[top_nest] # force re-creation in __setitem__ class _NestedFieldResolver: diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index eb7c3f3..fa5da34 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -884,14 +884,20 @@ def test_eval_assignment(): assert (nf_n3["p2.d"] == nf_n2["p2.c2"] + nf["packed.d"] * 2 + nf["b"]).all() # Now test multiline and inplace=True + # Verify the resolution of GH#159, where a nested column created in + # an existing nest during a multi-line eval was not being recognized + # in a subsequent line. nf.eval( """ c = a + b - p2.e = packed.d * 2 + c + packed.e = packed.d * 2 + p2.e = packed.e + c p2.f = p2.e + b """, inplace=True, ) - assert len(nf.p2.nest.fields) == 2 + assert set(nf.nested_columns) == {"packed", "p2"} + assert set(nf.packed.nest.fields) == {"c", "d", "e", "time"} + assert set(nf.p2.nest.fields) == {"e", "f"} assert (nf["p2.e"] == nf["packed.d"] * 2 + nf.c).all() assert (nf["p2.f"] == nf["p2.e"] + nf.b).all()