diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 171057e9b..b5df329c5 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -14,6 +14,7 @@ When importing an xlsx file created with pyam < 2.0, which has an "exclude" colu ## Individual updates +- [#763](https://github.com/IAMconsortium/pyam/pull/763) Implement a fix against carrying over unused levels when initializing from an indexed pandas object - [#759](https://github.com/IAMconsortium/pyam/pull/759) Excise "exclude" column from meta and add a own attribute - [#747](https://github.com/IAMconsortium/pyam/pull/747) Drop support for Python 3.7 #747 diff --git a/pyam/utils.py b/pyam/utils.py index c64b3be5e..87017aad4 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -378,6 +378,9 @@ def format_data(df, index, **kwargs): df = df.reorder_levels(index + REQUIRED_COLS + [time_col] + extra_cols).dropna() + # remove unused levels to guard against issue #762 + df.index = df.index.remove_unused_levels() + else: if isinstance(df, pd.Series): if not df.name: diff --git a/tests/test_core.py b/tests/test_core.py index 41ba0da4e..698f1babc 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -128,6 +128,20 @@ def test_init_df_from_timeseries(test_df): pd.testing.assert_frame_equal(df.timeseries(), test_df.timeseries()) +def test_init_df_from_timeseries_unused_levels(test_df): + # this test guards against regression for the bug + # reported in https://github.com/IAMconsortium/pyam/issues/762 + + for (model, scenario), data in test_df.timeseries().groupby(["model", "scenario"]): + # we're only interested in the second model-scenario combination + if model == "model_a" and scenario == "scen_b": + df = IamDataFrame(data) + + # pandas 2.0 does not remove unused levels (here: "Primary Energy|Coal") in groupby + # we check that unused levels are removed at initialization of the IamDataFrame + assert df.variable == ["Primary Energy"] + + def test_init_df_with_extra_col(test_pd_df): tdf = test_pd_df.copy()