Bugfix for incorrect dimension-accessors when initializing from pandas (

#763)
IAMconsortium · Aug 4, 2023 · c902b2c · c902b2c
1 parent 4bfcec0
commit c902b2c
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 0 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -14,6 +14,7 @@ When importing an xlsx file created with pyam < 2.0, which has an "exclude" colu
 
 ## Individual updates
 
+- [#763](https://github.com/IAMconsortium/pyam/pull/763) Implement a fix against carrying over unused levels when initializing from an indexed pandas object
 - [#759](https://github.com/IAMconsortium/pyam/pull/759) Excise "exclude" column from meta and add a own attribute
 - [#747](https://github.com/IAMconsortium/pyam/pull/747) Drop support for Python 3.7 #747
 

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -378,6 +378,9 @@ def format_data(df, index, **kwargs):
 
         df = df.reorder_levels(index + REQUIRED_COLS + [time_col] + extra_cols).dropna()
 
+        # remove unused levels to guard against issue #762
+        df.index = df.index.remove_unused_levels()
+
     else:
         if isinstance(df, pd.Series):
             if not df.name:

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -128,6 +128,20 @@ def test_init_df_from_timeseries(test_df):
     pd.testing.assert_frame_equal(df.timeseries(), test_df.timeseries())
 
 
+def test_init_df_from_timeseries_unused_levels(test_df):
+    # this test guards against regression for the bug
+    # reported in https://github.com/IAMconsortium/pyam/issues/762
+
+    for (model, scenario), data in test_df.timeseries().groupby(["model", "scenario"]):
+        # we're only interested in the second model-scenario combination
+        if model == "model_a" and scenario == "scen_b":
+            df = IamDataFrame(data)
+
+    # pandas 2.0 does not remove unused levels (here: "Primary Energy|Coal") in groupby
+    # we check that unused levels are removed at initialization of the IamDataFrame
+    assert df.variable == ["Primary Energy"]
+
+
 def test_init_df_with_extra_col(test_pd_df):
     tdf = test_pd_df.copy()