From c902b2c14d697aefb13782273e30c54395834e1d Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 4 Aug 2023 10:24:31 +0200
Subject: [PATCH] Bugfix for incorrect dimension-accessors when initializing
 from pandas  (#763)

---
 RELEASE_NOTES.md   |  1 +
 pyam/utils.py      |  3 +++
 tests/test_core.py | 14 ++++++++++++++
 3 files changed, 18 insertions(+)

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 171057e9b..b5df329c5 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -14,6 +14,7 @@ When importing an xlsx file created with pyam < 2.0, which has an "exclude" colu
 
 ## Individual updates
 
+- [#763](https://github.com/IAMconsortium/pyam/pull/763) Implement a fix against carrying over unused levels when initializing from an indexed pandas object
 - [#759](https://github.com/IAMconsortium/pyam/pull/759) Excise "exclude" column from meta and add a own attribute
 - [#747](https://github.com/IAMconsortium/pyam/pull/747) Drop support for Python 3.7 #747
 
diff --git a/pyam/utils.py b/pyam/utils.py
index c64b3be5e..87017aad4 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -378,6 +378,9 @@ def format_data(df, index, **kwargs):
 
         df = df.reorder_levels(index + REQUIRED_COLS + [time_col] + extra_cols).dropna()
 
+        # remove unused levels to guard against issue #762
+        df.index = df.index.remove_unused_levels()
+
     else:
         if isinstance(df, pd.Series):
             if not df.name:
diff --git a/tests/test_core.py b/tests/test_core.py
index 41ba0da4e..698f1babc 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -128,6 +128,20 @@ def test_init_df_from_timeseries(test_df):
     pd.testing.assert_frame_equal(df.timeseries(), test_df.timeseries())
 
 
+def test_init_df_from_timeseries_unused_levels(test_df):
+    # this test guards against regression for the bug
+    # reported in https://github.com/IAMconsortium/pyam/issues/762
+
+    for (model, scenario), data in test_df.timeseries().groupby(["model", "scenario"]):
+        # we're only interested in the second model-scenario combination
+        if model == "model_a" and scenario == "scen_b":
+            df = IamDataFrame(data)
+
+    # pandas 2.0 does not remove unused levels (here: "Primary Energy|Coal") in groupby
+    # we check that unused levels are removed at initialization of the IamDataFrame
+    assert df.variable == ["Primary Energy"]
+
+
 def test_init_df_with_extra_col(test_pd_df):
     tdf = test_pd_df.copy()