From 142086a7c6b9334221b0bf93ce3f1f9f0ddc0c8a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 19 Apr 2023 11:43:17 +0100 Subject: [PATCH 01/32] support interchange protocol --- seaborn/axisgrid.py | 5 +++++ seaborn/categorical.py | 1 + seaborn/distributions.py | 2 ++ seaborn/relational.py | 3 +++ seaborn/utils.py | 12 ++++++++++++ 5 files changed, 23 insertions(+) diff --git a/seaborn/axisgrid.py b/seaborn/axisgrid.py index 7534909920..5b7e3bebca 100644 --- a/seaborn/axisgrid.py +++ b/seaborn/axisgrid.py @@ -372,6 +372,7 @@ def __init__( margin_titles=False, xlim=None, ylim=None, subplot_kws=None, gridspec_kws=None, ): + data = utils.try_convert_to_pandas(data) super().__init__() @@ -1239,6 +1240,8 @@ def __init__( """ + data = utils.try_convert_to_pandas(data) + super().__init__() # Sort out the variables that define the grid @@ -2087,6 +2090,8 @@ def pairplot( # Avoid circular import from .distributions import histplot, kdeplot + data = utils.try_convert_to_pandas(data) + # Handle deprecations if size is not None: height = size diff --git a/seaborn/categorical.py b/seaborn/categorical.py index b7ab1cc3f2..f6a974ca8a 100644 --- a/seaborn/categorical.py +++ b/seaborn/categorical.py @@ -3289,6 +3289,7 @@ def catplot( margin_titles=False, facet_kws=None, ci="deprecated", **kwargs ): + data = utils.try_convert_to_pandas(data) # Determine the plotting function try: diff --git a/seaborn/distributions.py b/seaborn/distributions.py index b1a4c9da37..a58129042e 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -33,6 +33,7 @@ _check_argument, _assign_default_kwargs, _default_color, + try_convert_to_pandas, ) from .palettes import color_palette from .external import husl @@ -1392,6 +1393,7 @@ def histplot( # Other appearance keywords **kwargs, ): + data = try_convert_to_pandas(data) p = _DistributionPlotter( data=data, diff --git a/seaborn/relational.py b/seaborn/relational.py index de3cf68348..84305d84ab 100644 --- a/seaborn/relational.py +++ b/seaborn/relational.py @@ -13,6 +13,7 @@ adjust_legend_subtitles, _default_color, _deprecate_ci, + try_convert_to_pandas, ) from ._statistics import EstimateAggregator from .axisgrid import FacetGrid, _facet_docs @@ -704,6 +705,7 @@ def scatterplot( markers=True, style_order=None, legend="auto", ax=None, **kwargs ): + data = try_convert_to_pandas(data) variables = _ScatterPlotter.get_semantics(locals()) p = _ScatterPlotter(data=data, variables=variables, legend=legend) @@ -799,6 +801,7 @@ def relplot( legend="auto", kind="scatter", height=5, aspect=1, facet_kws=None, **kwargs ): + data = try_convert_to_pandas(data) if kind == "scatter": diff --git a/seaborn/utils.py b/seaborn/utils.py index a948740166..e41e3d215a 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -1,4 +1,6 @@ """Utility functions, mostly for internal use.""" +from __future__ import annotations + import os import inspect import warnings @@ -889,3 +891,13 @@ def _disable_autolayout(): def _version_predates(lib: ModuleType, version: str) -> bool: """Helper function for checking version compatibility.""" return Version(lib.__version__) < Version(version) + + +def try_convert_to_pandas(data: object | None) -> pd.DataFrame: + if data is None: + return None + elif isinstance(data, pd.DataFrame): + return data + elif hasattr(data, "__dataframe__") and not _version_predates(pd, "2.0.2"): + return pd.api.interchange.from_dataframe(data) + return pd.DataFrame(data) From 55df47ba462ab06fdfc58af8a673dd34796b03e2 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 4 May 2023 16:42:11 +0100 Subject: [PATCH 02/32] raise if trying to interchange before pd 2.0.2 --- seaborn/utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/seaborn/utils.py b/seaborn/utils.py index e41e3d215a..2bff239d12 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -644,7 +644,8 @@ def load_dataset(name, cache=True, data_home=None, **kws): elif name == "dowjones": df["Date"] = pd.to_datetime(df["Date"]) - return df + import polars + return polars.from_pandas(df) def axis_ticklabels_overlap(labels): @@ -898,6 +899,11 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: return None elif isinstance(data, pd.DataFrame): return data - elif hasattr(data, "__dataframe__") and not _version_predates(pd, "2.0.2"): + elif hasattr(data, "__dataframe__"): + if _version_predates(pd, "2.0.2"): + raise RuntimeError( + "Plotting non-pandas DataFrames requires at least pandas '2.0.2'. " + "Please upgrade pandas." + ) return pd.api.interchange.from_dataframe(data) return pd.DataFrame(data) From 088cb087fad965775afde8636f6661361bd797a2 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 7 May 2023 12:52:08 +0100 Subject: [PATCH 03/32] revert temporary change --- seaborn/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/seaborn/utils.py b/seaborn/utils.py index 2bff239d12..2d59ae547b 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -644,8 +644,7 @@ def load_dataset(name, cache=True, data_home=None, **kws): elif name == "dowjones": df["Date"] = pd.to_datetime(df["Date"]) - import polars - return polars.from_pandas(df) + return df def axis_ticklabels_overlap(labels): From 03c1717a387f1d43cb36047e44ea246b17427ed5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 7 May 2023 14:10:47 +0100 Subject: [PATCH 04/32] simplify --- seaborn/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/seaborn/utils.py b/seaborn/utils.py index 2d59ae547b..561c39ce8c 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -896,8 +896,6 @@ def _version_predates(lib: ModuleType, version: str) -> bool: def try_convert_to_pandas(data: object | None) -> pd.DataFrame: if data is None: return None - elif isinstance(data, pd.DataFrame): - return data elif hasattr(data, "__dataframe__"): if _version_predates(pd, "2.0.2"): raise RuntimeError( @@ -905,4 +903,4 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: "Please upgrade pandas." ) return pd.api.interchange.from_dataframe(data) - return pd.DataFrame(data) + return data From 7dd9ff605ada2bd4ef363bdf360fe7888657d992 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 7 May 2023 14:41:11 +0100 Subject: [PATCH 05/32] fixup --- seaborn/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seaborn/utils.py b/seaborn/utils.py index 561c39ce8c..6908ba3b76 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -896,6 +896,8 @@ def _version_predates(lib: ModuleType, version: str) -> bool: def try_convert_to_pandas(data: object | None) -> pd.DataFrame: if data is None: return None + elif isinstance(data, pd.DataFrame): + return data elif hasattr(data, "__dataframe__"): if _version_predates(pd, "2.0.2"): raise RuntimeError( From ad48c8a3ecabc4c1095963954b26421b92d59844 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 12:04:16 +0100 Subject: [PATCH 06/32] try adding polars workflow --- .github/workflows/ci.yaml | 35 ++++++++++++++++++++++++++++++++ tests/conftest.py | 42 ++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8493ea5be2..660bf8a402 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -90,6 +90,41 @@ jobs: uses: codecov/codecov-action@v3 if: ${{ success() }} + run-interchange-protocol-tests: + runs-on: ubuntu-latest + + env: + SEABORN_TEST_INTERCHANGE_PROTOCOL: 1 + + strategy: + matrix: + python: ["3.11"] + install: [full] + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + + - name: Install polars + run: | + pip install --upgrade pip wheel + pip install --upgrade polars + + - name: Install seaborn + run: | + pip install .[dev,stats] -r ci/deps_pinned.txt + + - name: Run tests + run: make test + + - name: Upload coverage + uses: codecov/codecov-action@v3 + if: ${{ success() }} + lint: runs-on: ubuntu-latest strategy: diff --git a/tests/conftest.py b/tests/conftest.py index 01d93a4941..535ae2e8e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,21 @@ +import os + import numpy as np import pandas as pd import pytest +def maybe_convert_to_polars(df): + # If the SEABORN_TEST_INTERCHANGE_PROTOCOL=1 environment variable + # is set, then check tests work when starting with a non-pandas + # DataFrame (here, polars). + if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL') == '1': + import polars as pl + return pl.from_pandas(df) + return df + + @pytest.fixture(autouse=True) def close_figs(): yield @@ -29,13 +41,13 @@ def wide_df(rng): columns = list("abc") index = pd.RangeIndex(10, 50, 2, name="wide_index") values = rng.normal(size=(len(index), len(columns))) - return pd.DataFrame(values, index=index, columns=columns) + return maybe_convert_to_polars(pd.DataFrame(values, index=index, columns=columns)) @pytest.fixture def wide_array(wide_df): - return wide_df.to_numpy() + return maybe_convert_to_polars(wide_df.to_numpy()) # TODO s/flat/thin? @@ -43,7 +55,7 @@ def wide_array(wide_df): def flat_series(rng): index = pd.RangeIndex(10, 30, name="t") - return pd.Series(rng.normal(size=20), index, name="s") + return maybe_convert_to_polars(pd.Series(rng.normal(size=20), index, name="s")) @pytest.fixture @@ -62,7 +74,7 @@ def flat_list(flat_series): def flat_data(rng, request): index = pd.RangeIndex(10, 30, name="t") - series = pd.Series(rng.normal(size=20), index, name="s") + series = maybe_convert_to_polars(pd.Series(rng.normal(size=20), index, name="s")) if request.param == "series": data = series elif request.param == "array": @@ -75,8 +87,14 @@ def flat_data(rng, request): @pytest.fixture def wide_list_of_series(rng): - return [pd.Series(rng.normal(size=20), np.arange(20), name="a"), - pd.Series(rng.normal(size=10), np.arange(5, 15), name="b")] + return [ + maybe_convert_to_polars( + pd.Series(rng.normal(size=20), np.arange(20), name="a") + ), + maybe_convert_to_polars( + pd.Series(rng.normal(size=10), np.arange(5, 15), name="b") + ) + ] @pytest.fixture @@ -133,7 +151,7 @@ def long_df(rng): df["s_cat"] = df["s"].astype("category") df["s_str"] = df["s"].astype(str) - return df + return maybe_convert_to_polars(df) @pytest.fixture @@ -146,12 +164,12 @@ def long_dict(long_df): def repeated_df(rng): n = 100 - return pd.DataFrame(dict( + return maybe_convert_to_polars(pd.DataFrame(dict( x=np.tile(np.arange(n // 2), 2), y=rng.normal(size=n), a=rng.choice(list("abc"), n), u=np.repeat(np.arange(2), n // 2), - )) + ))) @pytest.fixture @@ -161,7 +179,7 @@ def null_df(rng, long_df): for col in df: idx = rng.permutation(df.index)[:10] df.loc[idx, col] = np.nan - return df + return maybe_convert_to_polars(df) @pytest.fixture @@ -171,10 +189,10 @@ def object_df(rng, long_df): # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) - return df + return maybe_convert_to_polars(df) @pytest.fixture def null_series(flat_series): - return pd.Series(index=flat_series.index, dtype='float64') + return maybe_convert_to_polars(pd.Series(index=flat_series.index, dtype='float64')) From a0bd3f7425dbe56b235fcde99e1a36ab043c89ba Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 12:11:16 +0100 Subject: [PATCH 07/32] 3.10 --- .github/workflows/ci.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 660bf8a402..761b59f000 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,8 +98,7 @@ jobs: strategy: matrix: - python: ["3.11"] - install: [full] + python: ["3.10"] steps: - uses: actions/checkout@v3 From 8edcf1468105273d14890916e42acef5a824aec0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 12:17:25 +0100 Subject: [PATCH 08/32] try fixup; --- .github/workflows/ci.yaml | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 761b59f000..a27c7bcdd5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -115,7 +115,7 @@ jobs: - name: Install seaborn run: | - pip install .[dev,stats] -r ci/deps_pinned.txt + pip install .[dev] - name: Run tests run: make test diff --git a/tests/conftest.py b/tests/conftest.py index 535ae2e8e2..c6d62e000e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ def maybe_convert_to_polars(df): # If the SEABORN_TEST_INTERCHANGE_PROTOCOL=1 environment variable # is set, then check tests work when starting with a non-pandas # DataFrame (here, polars). - if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL') == '1': + if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': import polars as pl return pl.from_pandas(df) return df From 22df733e41dd60615f52aa9a17ea6e143c0d91ad Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 12:26:28 +0100 Subject: [PATCH 09/32] include pyarrow install --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a27c7bcdd5..60c0461bc2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -111,7 +111,7 @@ jobs: - name: Install polars run: | pip install --upgrade pip wheel - pip install --upgrade polars + pip install --upgrade polars pyarrow - name: Install seaborn run: | From fa37b5659fdc7e5434cc0f1c43d3523e75a0ffa5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 12:49:33 +0100 Subject: [PATCH 10/32] pandas nightly --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 60c0461bc2..ea73181a62 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -111,6 +111,8 @@ jobs: - name: Install polars run: | pip install --upgrade pip wheel + pip uninstall pandas -y + pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas pip install --upgrade polars pyarrow - name: Install seaborn From b5c4ff851dec1045d28a1b39babc7ace439ddcac Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 14:30:21 +0100 Subject: [PATCH 11/32] wip --- seaborn/_oldcore.py | 2 ++ tests/conftest.py | 22 ++++++++++----- tests/test_categorical.py | 55 +++++++++++++++++++++++++------------ tests/test_core.py | 14 ++++++++-- tests/test_distributions.py | 4 +++ tests/test_relational.py | 51 ++++++++++++++++++++++++---------- 6 files changed, 107 insertions(+), 41 deletions(-) diff --git a/seaborn/_oldcore.py b/seaborn/_oldcore.py index 9bfebccc20..31317ed9f4 100644 --- a/seaborn/_oldcore.py +++ b/seaborn/_oldcore.py @@ -23,6 +23,7 @@ desaturate, get_color_cycle, remove_na, + try_convert_to_pandas, ) @@ -793,6 +794,7 @@ def _assign_variables_wideform(self, data=None, **kwargs): else: # Otherwise assume we have some collection of vectors. + data = try_convert_to_pandas(data) # Handle Python sequences such that entries end up in the columns, # not in the rows, of the intermediate wide DataFrame. diff --git a/tests/conftest.py b/tests/conftest.py index c6d62e000e..0d953d5536 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,9 @@ def maybe_convert_to_polars(df): return pl.from_pandas(df) return df +@pytest.fixture() +def using_polars() -> bool: + return os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1' @pytest.fixture(autouse=True) def close_figs(): @@ -47,7 +50,7 @@ def wide_df(rng): @pytest.fixture def wide_array(wide_df): - return maybe_convert_to_polars(wide_df.to_numpy()) + return wide_df.to_numpy() # TODO s/flat/thin? @@ -151,7 +154,8 @@ def long_df(rng): df["s_cat"] = df["s"].astype("category") df["s_str"] = df["s"].astype(str) - return maybe_convert_to_polars(df) + # pl.from_pandas fails here + return df @pytest.fixture @@ -179,7 +183,8 @@ def null_df(rng, long_df): for col in df: idx = rng.permutation(df.index)[:10] df.loc[idx, col] = np.nan - return maybe_convert_to_polars(df) + # polars.from_pandas fails here + return df @pytest.fixture @@ -189,10 +194,13 @@ def object_df(rng, long_df): # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) - return maybe_convert_to_polars(df) + # Can't convert to polars + return df @pytest.fixture -def null_series(flat_series): - - return maybe_convert_to_polars(pd.Series(index=flat_series.index, dtype='float64')) +def null_series(flat_series, using_polars): + if using_polars: + import polars as pl + return pl.Series([], dtype=pl.Float64) + return pd.Series(index=flat_series.index, dtype='float64') diff --git a/tests/test_categorical.py b/tests/test_categorical.py index f1722b9f79..282321ba7b 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -1,4 +1,5 @@ import itertools +import os from functools import partial import warnings @@ -565,15 +566,17 @@ def test_labels_long(self, long_df, orient): hue_levels = categorical_order(long_df[kws["hue"]]) assert hue_labels == hue_levels - def test_labels_wide(self, wide_df): + def test_labels_wide(self, wide_df, using_polars): - wide_df = wide_df.rename_axis("cols", axis=1) + if not using_polars: + wide_df = wide_df.rename_axis("cols", axis=1) ax = self.func(wide_df) # To populate texts; only needed on older matplotlibs _draw_figure(ax.figure) - assert ax.get_xlabel() == wide_df.columns.name + if not using_polars: + assert ax.get_xlabel() == wide_df.columns.name labels = [t.get_text() for t in ax.get_xticklabels()] for label, level in zip(labels, wide_df.columns): assert label == level @@ -1227,7 +1230,7 @@ def check_box(self, bxp, data, orient, pos, width=0.8): assert tuple(med[val_idx]) == (p50, p50) assert np.allclose(med[pos_idx], (pos - width / 2, pos + width / 2)) - def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): + def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5, using_polars=False): pos_idx, val_idx = self.orient_indices(orient) @@ -1240,8 +1243,12 @@ def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): p25, p75 = np.percentile(data, [25, 75]) iqr = p75 - p25 - adj_lo = data[data >= (p25 - iqr * whis)].min() - adj_hi = data[data <= (p75 + iqr * whis)].max() + if isinstance(data, pd.Series): + adj_lo = data[data >= (p25 - iqr * whis)].min() + adj_hi = data[data <= (p75 + iqr * whis)].max() + else: # polars + adj_lo = data.filter(data >= (p25 - iqr * whis)).min() + adj_hi = data.filter(data <= (p75 + iqr * whis)).max() assert whis_lo[val_idx].max() == p25 assert whis_lo[val_idx].min() == approx(adj_lo) @@ -1255,18 +1262,21 @@ def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): assert np.allclose(caps_hi[val_idx], (adj_hi, adj_hi)) assert np.allclose(caps_hi[pos_idx], (pos - capsize / 2, pos + capsize / 2)) - flier_data = data[(data < adj_lo) | (data > adj_hi)] + if isinstance(data, pd.Series): + flier_data = data[(data < adj_lo) | (data > adj_hi)] + else: + flier_data = data.filter((data < adj_lo) | (data > adj_hi)) assert sorted(fliers[val_idx]) == sorted(flier_data) assert np.allclose(fliers[pos_idx], pos) @pytest.mark.parametrize("orient,col", [("x", "y"), ("y", "z")]) - def test_single_var(self, long_df, orient, col): + def test_single_var(self, long_df, orient, col, using_polars): var = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, **{var: col}) bxp = ax.containers[0][0] self.check_box(bxp, long_df[col], orient, 0) - self.check_whiskers(bxp, long_df[col], orient, 0) + self.check_whiskers(bxp, long_df[col], orient, 0, using_polars=using_polars) @pytest.mark.parametrize("orient,col", [(None, "x"), ("x", "y"), ("y", "z")]) def test_vector_data(self, long_df, orient, col): @@ -1278,14 +1288,14 @@ def test_vector_data(self, long_df, orient, col): self.check_whiskers(bxp, long_df[col], orient, 0) @pytest.mark.parametrize("orient", ["h", "v"]) - def test_wide_data(self, wide_df, orient): + def test_wide_data(self, wide_df, orient, using_polars): orient = {"h": "y", "v": "x"}[orient] ax = boxplot(wide_df, orient=orient) for i, bxp in enumerate(ax.containers): col = wide_df.columns[i] self.check_box(bxp[i], wide_df[col], orient, i) - self.check_whiskers(bxp[i], wide_df[col], orient, i) + self.check_whiskers(bxp[i], wide_df[col], orient, i, using_polars=using_polars) @pytest.mark.parametrize("orient", ["x", "y"]) def test_grouped(self, long_df, orient): @@ -1901,13 +1911,16 @@ def test_single_var(self, orient): assert getattr(bar, f"get_{prop}")() == approx(vals.mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) - def test_wide_df(self, wide_df, orient): + def test_wide_df(self, wide_df, orient, using_polars): ax = barplot(wide_df, orient=orient) orient = {"h": "y", "v": "x"}.get(orient, orient) prop = {"x": "height", "y": "width"}[orient] for i, bar in enumerate(ax.patches): - assert getattr(bar, f"get_{prop}")() == approx(wide_df.iloc[:, i].mean()) + if using_polars: + assert getattr(bar, f"get_{prop}")() == approx(wide_df[:, i].mean()) + else: + assert getattr(bar, f"get_{prop}")() == approx(wide_df.iloc[:, i].mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) def test_vector_orient(self, orient): @@ -2396,7 +2409,7 @@ def test_single_var(self, orient): assert getattr(line, f"get_{orient}data")() == approx(vals.mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) - def test_wide_df(self, wide_df, orient): + def test_wide_df(self, wide_df, orient, using_polars): ax = pointplot(wide_df, orient=orient) orient = {"h": "y", "v": "x"}.get(orient, orient) @@ -2406,10 +2419,16 @@ def test_wide_df(self, wide_df, orient): getattr(line, f"get_{orient}data")(), np.arange(len(wide_df.columns)), ) - assert_array_almost_equal( - getattr(line, f"get_{depend}data")(), - wide_df.mean(axis=0), - ) + if using_polars: + assert_array_almost_equal( + getattr(line, f"get_{depend}data")(), + wide_df.mean(axis=0).to_numpy().flatten(), + ) + else: + assert_array_almost_equal( + getattr(line, f"get_{depend}data")(), + wide_df.mean(axis=0), + ) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) def test_vector_orient(self, orient): diff --git a/tests/test_core.py b/tests/test_core.py index 1e52868863..8ba90cef4c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,5 @@ import itertools +import os import numpy as np import pandas as pd import matplotlib as mpl @@ -105,6 +106,7 @@ def test_plotter_reinit(self, long_df): assert p._hue_map.palette == palette assert p._hue_map.levels == hue_order + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='tmp hopefully') def test_hue_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, hue=null_series)) @@ -116,11 +118,15 @@ def test_hue_map_null(self, flat_series, null_series): assert m.norm is None assert m.lookup_table is None - def test_hue_map_categorical(self, wide_df, long_df): + # @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') + def test_hue_map_categorical(self, wide_df, long_df, using_polars): p = VectorPlotter(data=wide_df) m = HueMapping(p) - assert m.levels == wide_df.columns.to_list() + if using_polars: + assert m.levels == wide_df.columns + else: + assert m.levels == wide_df.columns.to_list() assert m.map_type == "categorical" assert m.cmap is None @@ -382,6 +388,7 @@ def test_plotter_reinit(self, long_df): assert p._size_map.lookup_table == dict(zip(size_order, sizes)) assert p._size_map.levels == size_order + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_size_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, size=null_series)) @@ -529,6 +536,7 @@ def test_plotter_reinit(self, long_df): assert p._style_map.levels == style_order assert p._style_map(style_order, "marker") == markers + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_style_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, style=null_series)) @@ -616,6 +624,7 @@ def test_map_style(self, long_df): class TestVectorPlotter: + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_flat_variables(self, flat_data): p = VectorPlotter() @@ -805,6 +814,7 @@ def test_long_unmatched_size_error(self, long_df, flat_array): with pytest.raises(ValueError, match=err): VectorPlotter(data=long_df, variables={"x": "x", "hue": flat_array}) + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_categorical_columns(self, wide_df): wide_df.columns = pd.CategoricalIndex(wide_df.columns) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 4cada7d516..1600d7716e 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,4 +1,5 @@ import itertools +import os import warnings import numpy as np @@ -196,6 +197,7 @@ def test_bivariate_data(self, long_df): self.assert_rug_equal(ax1.collections[0], ax2.collections[0]) self.assert_rug_equal(ax1.collections[1], ax2.collections[1]) + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_vs_long_data(self, wide_df): f, (ax1, ax2) = plt.subplots(ncols=2) @@ -241,6 +243,7 @@ def test_a_deprecation(self, flat_series): self.assert_rug_equal(*ax.collections) @pytest.mark.parametrize("variable", ["x", "y"]) + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_axis_deprecation(self, flat_series, variable): f, ax = plt.subplots() @@ -390,6 +393,7 @@ def test_long_vectors(self, long_df, variable): for a, b in itertools.product(ydata, ydata): assert_array_equal(a, b) + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_vs_long_data(self, wide_df): f, (ax1, ax2) = plt.subplots(ncols=2) diff --git a/tests/test_relational.py b/tests/test_relational.py index ca7970d433..9c5a573d2f 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -1,4 +1,5 @@ from itertools import product +import os import warnings import numpy as np @@ -89,7 +90,8 @@ def test_color(self, long_df): class TestRelationalPlotter(Helpers): - def test_wide_df_variables(self, wide_df): + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') + def test_wide_df_variables(self, wide_df, using_polars): p = _RelationalPlotter() p.assign_variables(data=wide_df) @@ -98,7 +100,10 @@ def test_wide_df_variables(self, wide_df): assert len(p.plot_data) == np.prod(wide_df.shape) x = p.plot_data["x"] - expected_x = np.tile(wide_df.index, wide_df.shape[1]) + if using_polars: + expected_x = np.tile(np.arange(len(wide_df)), wide_df.shape[1]) + else: + expected_x = np.tile(wide_df.index, wide_df.shape[1]) assert_array_equal(x, expected_x) y = p.plot_data["y"] @@ -106,7 +111,10 @@ def test_wide_df_variables(self, wide_df): assert_array_equal(y, expected_y) hue = p.plot_data["hue"] - expected_hue = np.repeat(wide_df.columns.to_numpy(), wide_df.shape[0]) + if using_polars: + expected_hue = np.repeat(wide_df.columns, wide_df.shape[0]) + else: + expected_hue = np.repeat(wide_df.columns.to_numpy(), wide_df.shape[0]) assert_array_equal(hue, expected_hue) style = p.plot_data["style"] @@ -221,7 +229,7 @@ def test_flat_list_variables(self, flat_list): assert p.variables["x"] is None assert p.variables["y"] is None - def test_flat_series_variables(self, flat_series): + def test_flat_series_variables(self, flat_series, using_polars): p = _RelationalPlotter() p.assign_variables(data=flat_series) @@ -230,17 +238,22 @@ def test_flat_series_variables(self, flat_series): assert len(p.plot_data) == len(flat_series) x = p.plot_data["x"] - expected_x = flat_series.index + if using_polars: + expected_x = np.arange(len(flat_series)) + else: + expected_x = flat_series.index assert_array_equal(x, expected_x) y = p.plot_data["y"] expected_y = flat_series assert_array_equal(y, expected_y) - assert p.variables["x"] is flat_series.index.name - assert p.variables["y"] is flat_series.name + if not using_polars: + assert p.variables["x"] is flat_series.index.name + assert p.variables["y"] is flat_series.name - def test_wide_list_of_series_variables(self, wide_list_of_series): + @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') + def test_wide_list_of_series_variables(self, wide_list_of_series, using_polars): p = _RelationalPlotter() p.assign_variables(data=wide_list_of_series) @@ -252,18 +265,28 @@ def test_wide_list_of_series_variables(self, wide_list_of_series): assert len(p.plot_data) == chunks * chunk_size - index_union = np.unique( - np.concatenate([s.index for s in wide_list_of_series]) - ) + if using_polars: + index_union = np.unique( + np.concatenate([np.arange(len(s)) for s in wide_list_of_series]) + ) + else: + index_union = np.unique( + np.concatenate([s.index for s in wide_list_of_series]) + ) x = p.plot_data["x"] expected_x = np.tile(index_union, chunks) assert_array_equal(x, expected_x) y = p.plot_data["y"] - expected_y = np.concatenate([ - s.reindex(index_union) for s in wide_list_of_series - ]) + if using_polars: + expected_y = np.concatenate([ + np.arange(len(index_union)) for s in wide_list_of_series + ]) + else: + expected_y = np.concatenate([ + s.reindex(index_union) for s in wide_list_of_series + ]) assert_array_equal(y, expected_y) hue = p.plot_data["hue"] From 064b2e601458c1389c506bbda65593664b8a1852 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 15:08:17 +0100 Subject: [PATCH 12/32] fixup --- seaborn/_oldcore.py | 4 ++-- tests/conftest.py | 5 ++--- tests/test_categorical.py | 19 +++++++++++-------- tests/test_core.py | 20 +++++++++++++------- tests/test_distributions.py | 4 ---- tests/test_relational.py | 17 +++++++++-------- 6 files changed, 37 insertions(+), 32 deletions(-) diff --git a/seaborn/_oldcore.py b/seaborn/_oldcore.py index 31317ed9f4..390a64fc0f 100644 --- a/seaborn/_oldcore.py +++ b/seaborn/_oldcore.py @@ -776,7 +776,7 @@ def _assign_variables_wideform(self, data=None, **kwargs): # (Could be accomplished with a more general to_series() interface) flat_data = pd.Series(data).copy() names = { - "@values": flat_data.name, + "@values": getattr(data, 'name', None), "@index": flat_data.index.name } @@ -924,7 +924,7 @@ def _assign_variables_longform(self, data=None, **kwargs): val in data or (isinstance(val, (str, bytes)) and val in index) ) - except (KeyError, TypeError): + except (KeyError, TypeError, ValueError): val_as_data_key = False if val_as_data_key: diff --git a/tests/conftest.py b/tests/conftest.py index 0d953d5536..0aa3caa8a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,10 +15,12 @@ def maybe_convert_to_polars(df): return pl.from_pandas(df) return df + @pytest.fixture() def using_polars() -> bool: return os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1' + @pytest.fixture(autouse=True) def close_figs(): yield @@ -154,7 +156,6 @@ def long_df(rng): df["s_cat"] = df["s"].astype("category") df["s_str"] = df["s"].astype(str) - # pl.from_pandas fails here return df @@ -183,7 +184,6 @@ def null_df(rng, long_df): for col in df: idx = rng.permutation(df.index)[:10] df.loc[idx, col] = np.nan - # polars.from_pandas fails here return df @@ -194,7 +194,6 @@ def object_df(rng, long_df): # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) - # Can't convert to polars return df diff --git a/tests/test_categorical.py b/tests/test_categorical.py index 282321ba7b..5d40d22e8e 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -1,5 +1,4 @@ import itertools -import os from functools import partial import warnings @@ -670,10 +669,13 @@ def test_supplied_color_array(self, long_df): ("x", "dataframe"), ("x", "dict"), ] ) - def test_wide(self, wide_df, orient, data_type): + def test_wide(self, wide_df, orient, data_type, using_polars): if data_type == "dict": - wide_df = {k: v.to_numpy() for k, v in wide_df.items()} + if using_polars: + wide_df = {col: wide_df[col].to_numpy() for col in wide_df.columns} + else: + wide_df = {k: v.to_numpy() for k, v in wide_df.items()} ax = self.func(data=wide_df, orient=orient) _draw_figure(ax.figure) @@ -1230,7 +1232,7 @@ def check_box(self, bxp, data, orient, pos, width=0.8): assert tuple(med[val_idx]) == (p50, p50) assert np.allclose(med[pos_idx], (pos - width / 2, pos + width / 2)) - def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5, using_polars=False): + def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): pos_idx, val_idx = self.orient_indices(orient) @@ -1276,7 +1278,7 @@ def test_single_var(self, long_df, orient, col, using_polars): ax = boxplot(long_df, **{var: col}) bxp = ax.containers[0][0] self.check_box(bxp, long_df[col], orient, 0) - self.check_whiskers(bxp, long_df[col], orient, 0, using_polars=using_polars) + self.check_whiskers(bxp, long_df[col], orient, 0) @pytest.mark.parametrize("orient,col", [(None, "x"), ("x", "y"), ("y", "z")]) def test_vector_data(self, long_df, orient, col): @@ -1295,7 +1297,7 @@ def test_wide_data(self, wide_df, orient, using_polars): for i, bxp in enumerate(ax.containers): col = wide_df.columns[i] self.check_box(bxp[i], wide_df[col], orient, i) - self.check_whiskers(bxp[i], wide_df[col], orient, i, using_polars=using_polars) + self.check_whiskers(bxp[i], wide_df[col], orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) def test_grouped(self, long_df, orient): @@ -1918,9 +1920,10 @@ def test_wide_df(self, wide_df, orient, using_polars): prop = {"x": "height", "y": "width"}[orient] for i, bar in enumerate(ax.patches): if using_polars: - assert getattr(bar, f"get_{prop}")() == approx(wide_df[:, i].mean()) + expected = approx(wide_df[:, i].mean()) else: - assert getattr(bar, f"get_{prop}")() == approx(wide_df.iloc[:, i].mean()) + expected = approx(wide_df.iloc[:, i].mean()) + assert getattr(bar, f"get_{prop}")() == expected @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) def test_vector_orient(self, orient): diff --git a/tests/test_core.py b/tests/test_core.py index 8ba90cef4c..42e6bc3a42 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -106,7 +106,10 @@ def test_plotter_reinit(self, long_df): assert p._hue_map.palette == palette assert p._hue_map.levels == hue_order - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='tmp hopefully') + @pytest.mark.xfail( + os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', + reason='different-length inputs not yet supported for non-pandas' + ) def test_hue_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, hue=null_series)) @@ -118,7 +121,6 @@ def test_hue_map_null(self, flat_series, null_series): assert m.norm is None assert m.lookup_table is None - # @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') def test_hue_map_categorical(self, wide_df, long_df, using_polars): p = VectorPlotter(data=wide_df) @@ -388,7 +390,10 @@ def test_plotter_reinit(self, long_df): assert p._size_map.lookup_table == dict(zip(size_order, sizes)) assert p._size_map.levels == size_order - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') + @pytest.mark.xfail( + os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', + reason='different-length inputs not yet supported for non-pandas' + ) def test_size_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, size=null_series)) @@ -536,7 +541,10 @@ def test_plotter_reinit(self, long_df): assert p._style_map.levels == style_order assert p._style_map(style_order, "marker") == markers - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') + @pytest.mark.xfail( + os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', + reason='different-length inputs not yet supported for non-pandas' + ) def test_style_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, style=null_series)) @@ -624,8 +632,7 @@ def test_map_style(self, long_df): class TestVectorPlotter: - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') - def test_flat_variables(self, flat_data): + def test_flat_variables(self, flat_data, using_polars): p = VectorPlotter() p.assign_variables(data=flat_data) @@ -814,7 +821,6 @@ def test_long_unmatched_size_error(self, long_df, flat_array): with pytest.raises(ValueError, match=err): VectorPlotter(data=long_df, variables={"x": "x", "hue": flat_array}) - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_categorical_columns(self, wide_df): wide_df.columns = pd.CategoricalIndex(wide_df.columns) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 1600d7716e..4cada7d516 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,5 +1,4 @@ import itertools -import os import warnings import numpy as np @@ -197,7 +196,6 @@ def test_bivariate_data(self, long_df): self.assert_rug_equal(ax1.collections[0], ax2.collections[0]) self.assert_rug_equal(ax1.collections[1], ax2.collections[1]) - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_vs_long_data(self, wide_df): f, (ax1, ax2) = plt.subplots(ncols=2) @@ -243,7 +241,6 @@ def test_a_deprecation(self, flat_series): self.assert_rug_equal(*ax.collections) @pytest.mark.parametrize("variable", ["x", "y"]) - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_axis_deprecation(self, flat_series, variable): f, ax = plt.subplots() @@ -393,7 +390,6 @@ def test_long_vectors(self, long_df, variable): for a, b in itertools.product(ydata, ydata): assert_array_equal(a, b) - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='polars does not have rename_axis') def test_wide_vs_long_data(self, wide_df): f, (ax1, ax2) = plt.subplots(ncols=2) diff --git a/tests/test_relational.py b/tests/test_relational.py index 9c5a573d2f..b18930a940 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -1,5 +1,4 @@ from itertools import product -import os import warnings import numpy as np @@ -90,7 +89,6 @@ def test_color(self, long_df): class TestRelationalPlotter(Helpers): - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') def test_wide_df_variables(self, wide_df, using_polars): p = _RelationalPlotter() @@ -121,10 +119,14 @@ def test_wide_df_variables(self, wide_df, using_polars): expected_style = expected_hue assert_array_equal(style, expected_style) - assert p.variables["x"] == wide_df.index.name - assert p.variables["y"] is None - assert p.variables["hue"] == wide_df.columns.name - assert p.variables["style"] == wide_df.columns.name + if using_polars: + assert p.variables["x"] is None + assert p.variables["hue"] is None + assert p.variables["style"] is None + else: + assert p.variables["x"] == wide_df.index.name + assert p.variables["hue"] == wide_df.columns.name + assert p.variables["style"] == wide_df.columns.name def test_wide_df_with_nonnumeric_variables(self, long_df): @@ -252,7 +254,6 @@ def test_flat_series_variables(self, flat_series, using_polars): assert p.variables["x"] is flat_series.index.name assert p.variables["y"] is flat_series.name - @pytest.mark.xfail(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0')== '1', reason='looks wrong?') def test_wide_list_of_series_variables(self, wide_list_of_series, using_polars): p = _RelationalPlotter() @@ -281,7 +282,7 @@ def test_wide_list_of_series_variables(self, wide_list_of_series, using_polars): y = p.plot_data["y"] if using_polars: expected_y = np.concatenate([ - np.arange(len(index_union)) for s in wide_list_of_series + s.to_pandas().reindex(index_union) for s in wide_list_of_series ]) else: expected_y = np.concatenate([ From 3f32596317d5effbe710919205ac3cd3f71c62fc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 15:47:47 +0100 Subject: [PATCH 13/32] reduce dependency to pandas 2.0.1 --- .github/workflows/ci.yaml | 3 +-- seaborn/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ea73181a62..717aec83de 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -111,8 +111,7 @@ jobs: - name: Install polars run: | pip install --upgrade pip wheel - pip uninstall pandas -y - pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas + pip install pandas>=2.0.1 pip install --upgrade polars pyarrow - name: Install seaborn diff --git a/seaborn/utils.py b/seaborn/utils.py index 5b550eb64a..b8f0ab3868 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -904,9 +904,9 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: elif isinstance(data, pd.DataFrame): return data elif hasattr(data, "__dataframe__"): - if _version_predates(pd, "2.0.2"): + if _version_predates(pd, "2.0.1"): raise RuntimeError( - "Plotting non-pandas DataFrames requires at least pandas '2.0.2'. " + "Plotting non-pandas DataFrames requires at least pandas '2.0.1'. " "Please upgrade pandas." ) return pd.api.interchange.from_dataframe(data) From f4f3317de2da5abab464192c4efc2fc4ed367923 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 19 May 2023 16:25:51 +0100 Subject: [PATCH 14/32] test that all load_dataset examples can actually interchange --- .github/workflows/ci.yaml | 4 +++- seaborn/utils.py | 4 ++-- tests/test_utils.py | 5 +++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 717aec83de..6626c09b60 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -111,7 +111,9 @@ jobs: - name: Install polars run: | pip install --upgrade pip wheel - pip install pandas>=2.0.1 + # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) + pip install --upgrade numpy + pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas pip install --upgrade polars pyarrow - name: Install seaborn diff --git a/seaborn/utils.py b/seaborn/utils.py index b8f0ab3868..5b550eb64a 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -904,9 +904,9 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: elif isinstance(data, pd.DataFrame): return data elif hasattr(data, "__dataframe__"): - if _version_predates(pd, "2.0.1"): + if _version_predates(pd, "2.0.2"): raise RuntimeError( - "Plotting non-pandas DataFrames requires at least pandas '2.0.1'. " + "Plotting non-pandas DataFrames requires at least pandas '2.0.2'. " "Please upgrade pandas." ) return pd.api.interchange.from_dataframe(data) diff --git a/tests/test_utils.py b/tests/test_utils.py index 28c836e999..143c02a0f6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ """Tests for seaborn utility functions.""" +import os import re import tempfile from types import ModuleType @@ -432,6 +433,10 @@ def test_move_legend_input_checks(): def check_load_dataset(name): ds = load_dataset(name, cache=False) + if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': + # Check that the example datasets can actually be interchanged. + import polars as pl + ds = pd.api.interchange.from_dataframe(pl.from_pandas(ds)) assert isinstance(ds, pd.DataFrame) From 1103aa94682c7f3c568062b1b7022fad19f4809a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 09:14:54 +0100 Subject: [PATCH 15/32] better msg --- seaborn/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/seaborn/utils.py b/seaborn/utils.py index 5b550eb64a..8844b22310 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -905,9 +905,7 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: return data elif hasattr(data, "__dataframe__"): if _version_predates(pd, "2.0.2"): - raise RuntimeError( - "Plotting non-pandas DataFrames requires at least pandas '2.0.2'. " - "Please upgrade pandas." - ) + msg = "Interchanging to pandas requires at least pandas version '2.0.2'." + raise RuntimeError(msg) return pd.api.interchange.from_dataframe(data) return data From 338e1198680645ccaa995149251c1328335d6c00 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 09:30:01 +0100 Subject: [PATCH 16/32] coverage --- .github/workflows/ci.yaml | 4 ++-- ci/deps_pinned.txt | 1 + seaborn/utils.py | 9 ++++++--- tests/test_utils.py | 13 ++++++++----- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6626c09b60..c920dffdd8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -108,13 +108,13 @@ jobs: with: python-version: ${{ matrix.python }} - - name: Install polars + - name: Install pandas nightly and pyarrow run: | pip install --upgrade pip wheel # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) pip install --upgrade numpy pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas - pip install --upgrade polars pyarrow + pip install --upgrade pyarrow - name: Install seaborn run: | diff --git a/ci/deps_pinned.txt b/ci/deps_pinned.txt index 27a51b4043..b2a57ed576 100644 --- a/ci/deps_pinned.txt +++ b/ci/deps_pinned.txt @@ -1,5 +1,6 @@ numpy~=1.20.0 pandas~=1.2.0 +polars~=0.17.0 matplotlib~=3.3.0 scipy~=1.7.0 statsmodels~=0.12.0 diff --git a/seaborn/utils.py b/seaborn/utils.py index 8844b22310..3877fff18b 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -903,9 +903,12 @@ def try_convert_to_pandas(data: object | None) -> pd.DataFrame: return None elif isinstance(data, pd.DataFrame): return data + elif hasattr(data, "__dataframe__") and _version_predates(pd, "2.0.2"): + msg = ( + "Interchanging to pandas requires at least pandas version '2.0.2'. " + "Please upgrade pandas to at least version '2.0.2'." + ) + raise RuntimeError(msg) elif hasattr(data, "__dataframe__"): - if _version_predates(pd, "2.0.2"): - msg = "Interchanging to pandas requires at least pandas version '2.0.2'." - raise RuntimeError(msg) return pd.api.interchange.from_dataframe(data) return data diff --git a/tests/test_utils.py b/tests/test_utils.py index 143c02a0f6..ca4f9165b7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,5 @@ """Tests for seaborn utility functions.""" -import os +import polars as pl import re import tempfile from types import ModuleType @@ -433,11 +433,14 @@ def test_move_legend_input_checks(): def check_load_dataset(name): ds = load_dataset(name, cache=False) - if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': - # Check that the example datasets can actually be interchanged. - import polars as pl - ds = pd.api.interchange.from_dataframe(pl.from_pandas(ds)) assert isinstance(ds, pd.DataFrame) + # Check that the example datasets can actually be interchanged. + if _version_predates(pd, '2.0.2'): + with pytest.raises(RuntimeError, match='Please upgrade pandas'): + utils.try_convert_to_pandas(pl.from_pandas(ds)) + else: + ds = utils.try_convert_to_pandas(pl.from_pandas(ds)) + assert isinstance(ds, pd.DataFrame) def check_load_cached_dataset(name): From 5a44bedfdc0e0e54f4d1fbd0051120339f39ab0d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 09:41:46 +0100 Subject: [PATCH 17/32] pyarrow --- .github/workflows/ci.yaml | 3 +-- ci/deps_pinned.txt | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c920dffdd8..cdd94936fe 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -108,13 +108,12 @@ jobs: with: python-version: ${{ matrix.python }} - - name: Install pandas nightly and pyarrow + - name: Install pandas nightly run: | pip install --upgrade pip wheel # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) pip install --upgrade numpy pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas - pip install --upgrade pyarrow - name: Install seaborn run: | diff --git a/ci/deps_pinned.txt b/ci/deps_pinned.txt index b2a57ed576..f1c681018a 100644 --- a/ci/deps_pinned.txt +++ b/ci/deps_pinned.txt @@ -1,6 +1,7 @@ numpy~=1.20.0 pandas~=1.2.0 polars~=0.17.0 +pyarrow~=12.0.0 matplotlib~=3.3.0 scipy~=1.7.0 statsmodels~=0.12.0 From 73f35d5328be5e78a40a87461f531f9b5ccd5616 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 09:51:03 +0100 Subject: [PATCH 18/32] fix deps --- .github/workflows/ci.yaml | 3 ++- tests/test_utils.py | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cdd94936fe..b15a0b68b8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -108,12 +108,13 @@ jobs: with: python-version: ${{ matrix.python }} - - name: Install pandas nightly + - name: Install polars and pandas nightly run: | pip install --upgrade pip wheel # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) pip install --upgrade numpy pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas + pip install --upgrade polars - name: Install seaborn run: | diff --git a/tests/test_utils.py b/tests/test_utils.py index ca4f9165b7..ae74ecf4b0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,4 @@ """Tests for seaborn utility functions.""" -import polars as pl import re import tempfile from types import ModuleType @@ -435,12 +434,17 @@ def check_load_dataset(name): ds = load_dataset(name, cache=False) assert isinstance(ds, pd.DataFrame) # Check that the example datasets can actually be interchanged. - if _version_predates(pd, '2.0.2'): - with pytest.raises(RuntimeError, match='Please upgrade pandas'): - utils.try_convert_to_pandas(pl.from_pandas(ds)) + try: + import polars as pl + except ModuleNotFoundError: + pass else: - ds = utils.try_convert_to_pandas(pl.from_pandas(ds)) - assert isinstance(ds, pd.DataFrame) + if _version_predates(pd, '2.0.2'): + with pytest.raises(RuntimeError, match='Please upgrade pandas'): + utils.try_convert_to_pandas(pl.from_pandas(ds)) + else: + ds = utils.try_convert_to_pandas(pl.from_pandas(ds)) + assert isinstance(ds, pd.DataFrame) def check_load_cached_dataset(name): From 63c21eecdd29cd71a5965a7f15e746269236f583 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 10:02:01 +0100 Subject: [PATCH 19/32] gotta remember pyarrow --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b15a0b68b8..2c8b1bb083 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -114,7 +114,7 @@ jobs: # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) pip install --upgrade numpy pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas - pip install --upgrade polars + pip install --upgrade polars pyarrow - name: Install seaborn run: | From 0e9586fb8ac66f5ee766c26a2fe61b0b05919e18 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 12:15:41 +0100 Subject: [PATCH 20/32] wip --- doc/_tutorial/properties.ipynb | 4 +- seaborn/_core/plot.py | 6 +- seaborn/categorical.py | 3 +- tests/conftest.py | 17 ++- tests/test_categorical.py | 199 ++++++++++++++++++++++++--------- tests/test_core.py | 10 +- 6 files changed, 174 insertions(+), 65 deletions(-) diff --git a/doc/_tutorial/properties.ipynb b/doc/_tutorial/properties.ipynb index 70de0e9ea2..e2638742e5 100644 --- a/doc/_tutorial/properties.ipynb +++ b/doc/_tutorial/properties.ipynb @@ -1105,9 +1105,9 @@ ], "metadata": { "kernelspec": { - "display_name": "py310", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "py310" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index a73af9bfd8..2d25d44289 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -42,7 +42,7 @@ from seaborn._compat import set_scale_obj, set_layout_engine from seaborn.rcmod import axes_style, plotting_context from seaborn.palettes import color_palette -from seaborn.utils import _version_predates +from seaborn.utils import _version_predates, try_convert_to_pandas from typing import TYPE_CHECKING, TypedDict if TYPE_CHECKING: @@ -351,6 +351,10 @@ def _resolve_positionals( if data is not None: raise TypeError("`data` given by both name and position.") data, args = args[0], args[1:] + elif hasattr(args[0], '__dataframe__'): + if data is not None: + raise TypeError("`data` given by both name and position.") + data, args = try_convert_to_pandas(args[0]), args[1:] if len(args) == 2: x, y = args diff --git a/seaborn/categorical.py b/seaborn/categorical.py index 125b6657b1..2972813ba8 100644 --- a/seaborn/categorical.py +++ b/seaborn/categorical.py @@ -2490,7 +2490,7 @@ def stripplot( hue_norm=None, native_scale=False, formatter=None, legend="auto", ax=None, **kwargs ): - + data = utils.try_convert_to_pandas(data) p = _CategoricalPlotterNew( data=data, variables=_CategoricalPlotterNew.get_semantics(locals()), @@ -2618,6 +2618,7 @@ def swarmplot( ax=None, **kwargs ): + data = utils.try_convert_to_pandas(data) p = _CategoricalPlotterNew( data=data, variables=_CategoricalPlotterNew.get_semantics(locals()), diff --git a/tests/conftest.py b/tests/conftest.py index 0aa3caa8a8..b174306252 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,7 +133,7 @@ def wide_dict_of_lists(wide_list_of_series): @pytest.fixture -def long_df(rng): +def long_df(rng, using_polars): n = 100 df = pd.DataFrame(dict( @@ -156,6 +156,9 @@ def long_df(rng): df["s_cat"] = df["s"].astype("category") df["s_str"] = df["s"].astype(str) + if using_polars: + import polars as pl + return pl.from_pandas(df.drop('s_cat', axis=1)) return df @@ -178,13 +181,15 @@ def repeated_df(rng): @pytest.fixture -def null_df(rng, long_df): - - df = long_df.copy() +def null_df(rng, long_df, using_polars): + if using_polars: + df = long_df.to_pandas().copy() + else: + df = long_df.copy() for col in df: idx = rng.permutation(df.index)[:10] df.loc[idx, col] = np.nan - return df + return maybe_convert_to_polars(df) @pytest.fixture @@ -194,7 +199,7 @@ def object_df(rng, long_df): # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) - return df + return maybe_convert_to_polars(df) @pytest.fixture diff --git a/tests/test_categorical.py b/tests/test_categorical.py index 5d40d22e8e..e9349d709a 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -540,7 +540,7 @@ def common_kws(self): return {} @pytest.mark.parametrize("orient", ["x", "y"]) - def test_labels_long(self, long_df, orient): + def test_labels_long(self, long_df, orient, using_polars): depend = {"x": "y", "y": "x"}[orient] kws = {orient: "a", depend: "y", "hue": "b"} @@ -555,14 +555,20 @@ def test_labels_long(self, long_df, orient): get_ori_labels = getattr(ax, f"get_{orient}ticklabels") ori_labels = [t.get_text() for t in get_ori_labels()] - ori_levels = categorical_order(long_df[kws[orient]]) + if using_polars: + ori_levels = categorical_order(long_df.to_pandas()[kws[orient]]) + else: + ori_levels = categorical_order(long_df[kws[orient]]) assert ori_labels == ori_levels legend = ax.get_legend() assert legend.get_title().get_text() == kws["hue"] hue_labels = [t.get_text() for t in legend.texts] - hue_levels = categorical_order(long_df[kws["hue"]]) + if using_polars: + hue_levels = categorical_order(long_df.to_pandas()[kws["hue"]]) + else: + hue_levels = categorical_order(long_df[kws["hue"]]) assert hue_labels == hue_levels def test_labels_wide(self, wide_df, using_polars): @@ -737,7 +743,7 @@ def test_flat(self, flat_series, orient): ({"val": "y", "cat": "s_cat", "hue": None}, None), ], ) - def test_positions(self, long_df, variables, orient): + def test_positions(self, long_df, variables, orient, using_polars): cat_var = variables["cat"] val_var = variables["val"] @@ -745,10 +751,16 @@ def test_positions(self, long_df, variables, orient): var_names = list(variables.values()) x_var, y_var, *_ = var_names + if using_polars and y_var == 's_cat': + return + ax = self.func( data=long_df, x=x_var, y=y_var, hue=hue_var, orient=orient, ) + if using_polars: + long_df = long_df.to_pandas() + _draw_figure(ax.figure) cat_idx = var_names.index(cat_var) @@ -785,7 +797,7 @@ def test_positions(self, long_df, variables, orient): {"cat": "a", "val": "y", "hue": "f"}, ], ) - def test_positions_dodged(self, long_df, variables): + def test_positions_dodged(self, long_df, variables, using_polars): cat_var = variables["cat"] val_var = variables["val"] @@ -797,6 +809,8 @@ def test_positions_dodged(self, long_df, variables): data=long_df, x=x_var, y=y_var, hue=hue_var, dodge=True, ) + if using_polars: + long_df = long_df.to_pandas() cat_vals = categorical_order(long_df[cat_var]) hue_vals = categorical_order(long_df[hue_var]) @@ -824,15 +838,21 @@ def test_positions_dodged(self, long_df, variables): assert 0 <= np.ptp(cat_pos) <= nest_width @pytest.mark.parametrize("cat_var", ["a", "s", "d"]) - def test_positions_unfixed(self, long_df, cat_var): + def test_positions_unfixed(self, long_df, cat_var, using_polars): - long_df = long_df.sort_values(cat_var) + if using_polars: + long_df = long_df.sort(cat_var) + else: + long_df = long_df.sort_values(cat_var) kws = dict(size=.001) if "stripplot" in str(self.func): # can't use __name__ with partial kws["jitter"] = False ax = self.func(data=long_df, x=cat_var, y="y", native_scale=True, **kws) + + if using_polars: + long_df = long_df.to_pandas() for i, (cat_level, cat_data) in enumerate(long_df.groupby(cat_var)): @@ -892,16 +912,19 @@ def test_order(self, x_type, order): assert not positions.size @pytest.mark.parametrize("hue_var", ["a", "b"]) - def test_hue_categorical(self, long_df, hue_var): + def test_hue_categorical(self, long_df, hue_var, using_polars): cat_var = "b" + pal_name = "muted" + ax = self.func(data=long_df, x=cat_var, y="y", hue=hue_var, palette=pal_name) + + if using_polars: + long_df = long_df.to_pandas() + hue_levels = categorical_order(long_df[hue_var]) cat_levels = categorical_order(long_df[cat_var]) - - pal_name = "muted" palette = dict(zip(hue_levels, color_palette(pal_name))) - ax = self.func(data=long_df, x=cat_var, y="y", hue=hue_var, palette=pal_name) for i, level in enumerate(cat_levels): @@ -917,9 +940,13 @@ def test_hue_categorical(self, long_df, hue_var): assert tuple(color) == to_rgba(palette[hue]) @pytest.mark.parametrize("hue_var", ["a", "b"]) - def test_hue_dodged(self, long_df, hue_var): + def test_hue_dodged(self, long_df, hue_var, using_polars): ax = self.func(data=long_df, x="y", y="a", hue=hue_var, dodge=True) + + if using_polars: + long_df = long_df.to_pandas() + colors = color_palette(n_colors=long_df[hue_var].nunique()) collections = iter(ax.collections) @@ -937,12 +964,15 @@ def test_hue_dodged(self, long_df, hue_var): "val_var,val_col,hue_col", list(itertools.product(["x", "y"], ["b", "y", "t"], [None, "a"])), ) - def test_single(self, long_df, val_var, val_col, hue_col): + def test_single(self, long_df, val_var, val_col, hue_col, using_polars): var_kws = {val_var: val_col, "hue": hue_col} ax = self.func(data=long_df, **var_kws) _draw_figure(ax.figure) + if using_polars: + long_df = long_df.to_pandas() + axis_vars = ["x", "y"] val_idx = axis_vars.index(val_var) cat_idx = int(not val_idx) @@ -1003,9 +1033,13 @@ def test_three_points(self): for point_color in ax.collections[0].get_facecolor(): assert tuple(point_color) == to_rgba("C0") - def test_legend_categorical(self, long_df): + def test_legend_categorical(self, long_df, using_polars): ax = self.func(data=long_df, x="y", y="a", hue="b") + + if using_polars: + long_df = long_df.to_pandas() + legend_texts = [t.get_text() for t in ax.legend_.texts] expected = categorical_order(long_df["b"]) assert legend_texts == expected @@ -1021,18 +1055,22 @@ def test_legend_disabled(self, long_df): ax = self.func(data=long_df, x="y", y="a", hue="b", legend=False) assert ax.legend_ is None - def test_palette_from_color_deprecation(self, long_df): + def test_palette_from_color_deprecation(self, long_df, using_polars): color = (.9, .4, .5) hex_color = mpl.colors.to_hex(color) hue_var = "a" - n_hue = long_df[hue_var].nunique() - palette = color_palette(f"dark:{hex_color}", n_hue) with pytest.warns(FutureWarning, match="Setting a gradient palette"): ax = self.func(data=long_df, x="z", hue=hue_var, color=color) + if using_polars: + long_df = long_df.to_pandas() + + n_hue = long_df[hue_var].nunique() + palette = color_palette(f"dark:{hex_color}", n_hue) + points = ax.collections[0] for point_color in points.get_facecolors(): assert to_rgb(point_color) in palette @@ -1156,7 +1194,7 @@ def test_jitter_unfixed(self, long_df): "orient,jitter", itertools.product(["v", "h"], [True, .1]), ) - def test_jitter(self, long_df, orient, jitter): + def test_jitter(self, long_df, orient, jitter, using_polars): cat_var, val_var = "a", "y" if orient == "x": @@ -1166,12 +1204,15 @@ def test_jitter(self, long_df, orient, jitter): x_var, y_var = val_var, cat_var cat_idx, val_idx = 1, 0 - cat_vals = categorical_order(long_df[cat_var]) ax = stripplot( data=long_df, x=x_var, y=y_var, jitter=jitter, ) + if using_polars: + long_df = long_df.to_pandas() + cat_vals = categorical_order(long_df[cat_var]) + if jitter is True: jitter_range = .4 else: @@ -1300,10 +1341,12 @@ def test_wide_data(self, wide_df, orient, using_polars): self.check_whiskers(bxp[i], wide_df[col], orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_grouped(self, long_df, orient): + def test_grouped(self, long_df, orient, using_polars): value = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, **{orient: "a", value: "z"}) + if using_polars: + long_df = long_df.to_pandas() bxp, = ax.containers levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): @@ -1312,10 +1355,12 @@ def test_grouped(self, long_df, orient): self.check_whiskers(bxp[i], data, orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_hue_grouped(self, long_df, orient): + def test_hue_grouped(self, long_df, orient, using_polars): value = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, hue="c", **{orient: "a", value: "z"}) + if using_polars: + long_df = long_df.to_pandas() for i, hue_level in enumerate(categorical_order(long_df["c"])): bxp = ax.containers[i] for j, level in enumerate(categorical_order(long_df["a"])): @@ -1326,11 +1371,17 @@ def test_hue_grouped(self, long_df, orient): self.check_box(bxp[j], data, orient, pos, width) self.check_whiskers(bxp[j], data, orient, pos, capsize) - def test_hue_not_dodged(self, long_df): + def test_hue_not_dodged(self, long_df, using_polars): - levels = categorical_order(long_df["b"]) - hue = long_df["b"].isin(levels[:2]) + if using_polars: + levels = categorical_order(long_df.to_pandas()["b"]) + hue = long_df["b"].is_in(levels[:2]) + else: + levels = categorical_order(long_df["b"]) + hue = long_df["b"].isin(levels[:2]) ax = boxplot(long_df, x="b", y="z", hue=hue) + if using_polars: + long_df = long_df.to_pandas() bxps = ax.containers for i, level in enumerate(levels): idx = int(i < 2) @@ -1338,7 +1389,7 @@ def test_hue_not_dodged(self, long_df): self.check_box(bxps[idx][i % 2], data, "x", i) self.check_whiskers(bxps[idx][i % 2], data, "x", i) - def test_dodge_native_scale(self, long_df): + def test_dodge_native_scale(self, long_df, using_polars): centers = categorical_order(long_df["s"]) hue_levels = categorical_order(long_df["c"]) @@ -1346,6 +1397,8 @@ def test_dodge_native_scale(self, long_df): width = 0.8 * spacing / len(hue_levels) offset = width / len(hue_levels) ax = boxplot(long_df, x="s", y="z", hue="c", native_scale=True) + if using_polars: + long_df = long_df.to_pandas() for i, hue_level in enumerate(hue_levels): bxp = ax.containers[i] for j, center in enumerate(centers): @@ -1432,9 +1485,11 @@ def test_whis(self, long_df): bxp = ax.containers[0][0] self.check_whiskers(bxp, data, "y", 0, whis=2) - def test_gap(self, long_df): + def test_gap(self, long_df, using_polars): ax = boxplot(long_df, x="a", y="z", hue="c", gap=.1) + if using_polars: + long_df = long_df.to_pandas() for i, hue_level in enumerate(categorical_order(long_df["c"])): bxp = ax.containers[i] for j, level in enumerate(categorical_order(long_df["a"])): @@ -1565,20 +1620,24 @@ def test_wide_data(self, wide_df, orient): self.check_violin(poly, wide_df[col], orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_grouped(self, long_df, orient): + def test_grouped(self, long_df, orient, using_polars): value = {"x": "y", "y": "x"}[orient] ax = violinplot(long_df, **{orient: "a", value: "z"}, cut=0) + if using_polars: + long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): data = long_df.loc[long_df["a"] == level, "z"] self.check_violin(ax.collections[i], data, orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_hue_grouped(self, long_df, orient): + def test_hue_grouped(self, long_df, orient, using_polars): value = {"x": "y", "y": "x"}[orient] ax = violinplot(long_df, hue="c", **{orient: "a", value: "z"}, cut=0) + if using_polars: + long_df = long_df.to_pandas() polys = iter(ax.collections) for i, level in enumerate(categorical_order(long_df["a"])): for j, hue_level in enumerate(categorical_order(long_df["c"])): @@ -1588,17 +1647,23 @@ def test_hue_grouped(self, long_df, orient): width = 0.4 self.check_violin(next(polys), data, orient, pos, width) - def test_hue_not_dodged(self, long_df): + def test_hue_not_dodged(self, long_df, using_polars): - levels = categorical_order(long_df["b"]) - hue = long_df["b"].isin(levels[:2]) + if using_polars: + levels = categorical_order(long_df.to_pandas()["b"]) + hue = long_df["b"].is_in(levels[:2]) + else: + levels = categorical_order(long_df["b"]) + hue = long_df["b"].isin(levels[:2]) ax = violinplot(long_df, x="b", y="z", hue=hue, cut=0) + if using_polars: + long_df = long_df.to_pandas() for i, level in enumerate(levels): poly = ax.collections[i] data = long_df.loc[long_df["b"] == level, "z"] self.check_violin(poly, data, "x", i) - def test_dodge_native_scale(self, long_df): + def test_dodge_native_scale(self, long_df, using_polars): centers = categorical_order(long_df["s"]) hue_levels = categorical_order(long_df["c"]) @@ -1606,6 +1671,8 @@ def test_dodge_native_scale(self, long_df): width = 0.8 * spacing / len(hue_levels) offset = width / len(hue_levels) ax = violinplot(long_df, x="s", y="z", hue="c", native_scale=True, cut=0) + if using_polars: + long_df = long_df.to_pandas() violins = iter(ax.collections) for center in centers: for i, hue_level in enumerate(hue_levels): @@ -1615,13 +1682,15 @@ def test_dodge_native_scale(self, long_df): poly = next(violins) self.check_violin(poly, data, "x", pos, width) - def test_dodge_native_scale_log(self, long_df): + def test_dodge_native_scale_log(self, long_df, using_polars): pos = 10 ** long_df["s"] ax = mpl.figure.Figure().subplots() ax.set_xscale("log") variables = dict(x=pos, y="z", hue="c") violinplot(long_df, **variables, native_scale=True, density_norm="width", ax=ax) + if using_polars: + long_df = long_df.to_pandas() widths = [] n_violins = long_df["s"].nunique() * long_df["c"].nunique() for poly in ax.collections[:n_violins]: @@ -1637,9 +1706,11 @@ def test_color(self, long_df): for poly in ax.collections: assert same_color(poly.get_facecolor(), color) - def test_hue_colors(self, long_df): + def test_hue_colors(self, long_df, using_polars): ax = violinplot(long_df, x="a", y="y", hue="b", saturation=1) + if using_polars: + long_df = long_df.to_pandas() n_levels = long_df["b"].nunique() for i, poly in enumerate(ax.collections): assert same_color(poly.get_facecolor(), f"C{i % n_levels}") @@ -1718,28 +1789,34 @@ def test_inner_quartiles(self, long_df, orient): assert pts[0, pos_idx] == -pts[1, pos_idx] @pytest.mark.parametrize("orient", ["x", "y"]) - def test_inner_stick(self, long_df, orient): + def test_inner_stick(self, long_df, orient, using_polars): pos_idx, val_idx = self.orient_indices(orient) ax = violinplot(long_df["y"], orient=orient, inner="stick") + if using_polars: + long_df = long_df.to_pandas() for i, pts in enumerate(ax.collections[1].get_segments()): for pt in pts: assert pt[val_idx] == long_df["y"].iloc[i] assert pts[0, pos_idx] == -pts[1, pos_idx] @pytest.mark.parametrize("orient", ["x", "y"]) - def test_inner_points(self, long_df, orient): + def test_inner_points(self, long_df, orient, using_polars): pos_idx, val_idx = self.orient_indices(orient) ax = violinplot(long_df["y"], orient=orient, inner="points") + if using_polars: + long_df = long_df.to_pandas() points = ax.collections[1] for i, pt in enumerate(points.get_offsets()): assert pt[val_idx] == long_df["y"].iloc[i] assert pt[pos_idx] == 0 - def test_split_single(self, long_df): + def test_split_single(self, long_df, using_polars): ax = violinplot(long_df, x="a", y="z", split=True, cut=0) + if using_polars: + long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): data = long_df.loc[long_df["a"] == level, "z"] @@ -1747,9 +1824,11 @@ def test_split_single(self, long_df): verts = ax.collections[i].get_paths()[0].vertices assert np.isclose(verts[:, 0], i + .4).sum() >= 100 - def test_split_multi(self, long_df): + def test_split_multi(self, long_df, using_polars): ax = violinplot(long_df, x="a", y="z", hue="c", split=True, cut=0) + if using_polars: + long_df = long_df.to_pandas() polys = iter(ax.collections) for i, level in enumerate(categorical_order(long_df["a"])): for j, hue_level in enumerate(categorical_order(long_df["c"])): @@ -2149,32 +2228,38 @@ def test_native_scale_log_transform_dodged(self): for x_i, bar in zip(x[2:], ax.patches[2:]): assert bar.get_x() == approx(x_i) - def test_estimate_default(self, long_df): + def test_estimate_default(self, long_df, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].mean() ax = barplot(long_df, x=agg_var, y=val_var, errorbar=None) + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].mean() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) - def test_estimate_string(self, long_df): + def test_estimate_string(self, long_df, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].median() ax = barplot(long_df, x=agg_var, y=val_var, estimator="median", errorbar=None) + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].median() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) - def test_estimate_func(self, long_df): + def test_estimate_func(self, long_df, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].median() ax = barplot(long_df, x=agg_var, y=val_var, estimator=np.median, errorbar=None) + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].median() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) @@ -2187,12 +2272,14 @@ def test_estimate_log_transform(self, long_df): bar, = ax.patches assert bar.get_width() == 10 ** np.log10(long_df["z"]).mean() - def test_errorbars(self, long_df): + def test_errorbars(self, long_df, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) ax = barplot(long_df, x=agg_var, y=val_var, errorbar="sd") + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) order = categorical_order(long_df[agg_var]) for i, line in enumerate(ax.lines): row = agg_df.loc[order[i]] @@ -2501,12 +2588,14 @@ def test_xy_native_scale(self): assert_array_equal(line.get_ydata(), y) @pytest.mark.parametrize("estimator", ["mean", np.mean]) - def test_estimate(self, long_df, estimator): + def test_estimate(self, long_df, estimator, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].agg(estimator) ax = pointplot(long_df, x=agg_var, y=val_var, errorbar=None) + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].agg(estimator) order = categorical_order(long_df[agg_var]) for i, xy in enumerate(ax.lines[0].get_xydata()): assert tuple(xy) == approx((i, agg_df[order[i]])) @@ -2519,12 +2608,14 @@ def test_estimate_log_transform(self, long_df): val, = ax.lines[0].get_xdata() assert val == 10 ** np.log10(long_df["z"]).mean() - def test_errorbars(self, long_df): + def test_errorbars(self, long_df, using_polars): agg_var, val_var = "a", "y" - agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) ax = pointplot(long_df, x=agg_var, y=val_var, errorbar="sd") + if using_polars: + long_df = long_df.to_pandas() + agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) order = categorical_order(long_df[agg_var]) for i, line in enumerate(ax.lines[1:]): row = agg_df.loc[order[i]] @@ -2845,14 +2936,16 @@ def test_hue_dodged(self): assert same_color(bar.get_facecolor(), f"C{i // 2}") @pytest.mark.parametrize("stat", ["percent", "probability", "proportion"]) - def test_stat(self, long_df, stat): + def test_stat(self, long_df, stat, using_polars): col = "a" + ax = countplot(long_df, x=col, stat=stat) + if using_polars: + long_df = long_df.to_pandas() order = categorical_order(long_df[col]) expected = long_df[col].value_counts(normalize=True) if stat == "percent": expected *= 100 - ax = countplot(long_df, x=col, stat=stat) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(expected[order[i]]) @@ -3592,7 +3685,7 @@ def test_beeswarm(self, long_df): p = Beeswarm() data = long_df["y"] d = data.diff().mean() * 1.5 - x = np.zeros(data.size) + x = np.zeros(len(data)) y = np.sort(data) r = np.full_like(y, d) orig_xyr = np.c_[x, y, r] diff --git a/tests/test_core.py b/tests/test_core.py index 42e6bc3a42..6ace618677 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -172,13 +172,19 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): # Test long data p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="a")) m = HueMapping(p) - assert m.levels == categorical_order(long_df["a"]) + if using_polars: + assert m.levels == categorical_order(long_df.to_pandas()["a"]) + else: + assert m.levels == categorical_order(long_df["a"]) assert m.map_type == "categorical" assert m.cmap is None # Test default palette m = HueMapping(p) - hue_levels = categorical_order(long_df["a"]) + if using_polars: + hue_levels = categorical_order(long_df.to_pandas()["a"]) + else: + hue_levels = categorical_order(long_df["a"]) expected_colors = color_palette(n_colors=len(hue_levels)) expected_lookup_table = dict(zip(hue_levels, expected_colors)) assert m.lookup_table == expected_lookup_table From 9f1927fae63dfeaac00b177e5a9acb84485be1cf Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 12:19:19 +0100 Subject: [PATCH 21/32] wip --- tests/test_core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 6ace618677..08bd9fc449 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -209,8 +209,13 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): assert m.map_type == "categorical" for val in [0, 1]: + if using_polars: + import polars as pl + data = long_df.filter(pl.col('c')==val) + else: + data = long_df[long_df["c"]] p = VectorPlotter( - data=long_df[long_df["c"] == val], + data=data, variables=dict(x="x", y="y", hue="c"), ) m = HueMapping(p) From 9c4aedba78f9085cfaf2a7b109fa988c4b6c7669 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 13:20:00 +0100 Subject: [PATCH 22/32] wip --- seaborn/_oldcore.py | 2 + seaborn/_statistics.py | 3 +- tests/_core/test_data.py | 30 +++++++-- tests/conftest.py | 8 ++- tests/test_core.py | 92 +++++++++++++++++--------- tests/test_distributions.py | 73 +++++++++++++++----- tests/test_relational.py | 128 ++++++++++++++++++++++++++---------- tests/test_statistics.py | 20 ++++-- 8 files changed, 257 insertions(+), 99 deletions(-) diff --git a/seaborn/_oldcore.py b/seaborn/_oldcore.py index 390a64fc0f..506561cd6b 100644 --- a/seaborn/_oldcore.py +++ b/seaborn/_oldcore.py @@ -896,6 +896,8 @@ def _assign_variables_longform(self, data=None, **kwargs): # Data is optional; all variables can be defined as vectors if data is None: data = {} + else: + data = try_convert_to_pandas(data) # TODO should we try a data.to_dict() or similar here to more # generally accept objects with that interface? diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py index ea9c15d26a..5fd08a0799 100644 --- a/seaborn/_statistics.py +++ b/seaborn/_statistics.py @@ -35,7 +35,7 @@ class instantiation. _no_scipy = True from .algorithms import bootstrap -from .utils import _check_argument +from .utils import _check_argument, try_convert_to_pandas class KDE: @@ -481,6 +481,7 @@ def __init__(self, estimator, errorbar=None, **boot_kws): def __call__(self, data, var): """Aggregate over `var` column of `data` with estimate and error interval.""" + data = try_convert_to_pandas(data) vals = data[var] if callable(self.estimator): # You would think we could pass to vals.agg, and yet: diff --git a/tests/_core/test_data.py b/tests/_core/test_data.py index aeffc9ed7d..973d0e4668 100644 --- a/tests/_core/test_data.py +++ b/tests/_core/test_data.py @@ -19,7 +19,10 @@ def long_variables(self): variables = dict(x="x", y="y", color="a", size="z", style="s_cat") return variables - def test_named_vectors(self, long_df, long_variables): + def test_named_vectors(self, long_df, long_variables, using_polars): + if using_polars: + # no s_cat + return p = PlotData(long_df, long_variables) assert p.source_data is long_df @@ -28,7 +31,10 @@ def test_named_vectors(self, long_df, long_variables): assert p.names[key] == val assert_vector_equal(p.frame[key], long_df[val]) - def test_named_and_given_vectors(self, long_df, long_variables): + def test_named_and_given_vectors(self, long_df, long_variables, using_polars): + if using_polars: + # no s_cat + return long_variables["y"] = long_df["b"] long_variables["size"] = long_df["z"].to_numpy() @@ -47,7 +53,10 @@ def test_named_and_given_vectors(self, long_df, long_variables): assert p.ids["y"] == "b" assert p.ids["size"] == id(long_variables["size"]) - def test_index_as_variable(self, long_df, long_variables): + def test_index_as_variable(self, long_df, long_variables, using_polars): + if using_polars: + # no index + return index = pd.Index(np.arange(len(long_df)) * 2 + 10, name="i", dtype=int) long_variables["x"] = "i" @@ -56,7 +65,10 @@ def test_index_as_variable(self, long_df, long_variables): assert p.names["x"] == p.ids["x"] == "i" assert_vector_equal(p.frame["x"], pd.Series(index, index)) - def test_multiindex_as_variables(self, long_df, long_variables): + def test_multiindex_as_variables(self, long_df, long_variables, using_polars): + if using_polars: + # no index + return index_i = pd.Index(np.arange(len(long_df)) * 2 + 10, name="i", dtype=int) index_j = pd.Index(np.arange(len(long_df)) * 3 + 5, name="j", dtype=int) @@ -96,7 +108,10 @@ def test_tuple_as_variable_key(self, rng): assert_vector_equal(p.frame[var], df[key]) assert p.names[var] == p.ids[var] == str(key) - def test_dict_as_data(self, long_dict, long_variables): + def test_dict_as_data(self, long_dict, long_variables, using_polars): + if using_polars: + # no s_cat + return p = PlotData(long_dict, long_variables) assert p.source_data is long_dict @@ -107,7 +122,10 @@ def test_dict_as_data(self, long_dict, long_variables): "vector_type", ["series", "numpy", "list"], ) - def test_vectors_various_types(self, long_df, long_variables, vector_type): + def test_vectors_various_types(self, long_df, long_variables, vector_type, using_polars): + if using_polars: + # no s_cat + return variables = {key: long_df[val] for key, val in long_variables.items()} if vector_type == "numpy": diff --git a/tests/conftest.py b/tests/conftest.py index b174306252..0bef4859f0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -193,9 +193,11 @@ def null_df(rng, long_df, using_polars): @pytest.fixture -def object_df(rng, long_df): - - df = long_df.copy() +def object_df(rng, long_df, using_polars): + if using_polars: + df = long_df.to_pandas().copy() + else: + df = long_df.copy() # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) diff --git a/tests/test_core.py b/tests/test_core.py index 08bd9fc449..c703b7f822 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -225,24 +225,31 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): # Test Timestamp data p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="t")) m = HueMapping(p) - assert m.levels == [pd.Timestamp(t) for t in long_df["t"].unique()] + if using_polars: + assert m.levels == [pd.Timestamp(t) for t in long_df.to_pandas()["t"].unique()] + else: + assert m.levels == [pd.Timestamp(t) for t in long_df["t"].unique()] assert m.map_type == "datetime" # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", hue="a_cat")) m = HueMapping(p) - assert m.levels == long_df["a_cat"].cat.categories.to_list() + if using_polars: + assert m.levels == long_df.to_pandas()["a_cat"].cat.categories.to_list() + else: + assert m.levels == long_df["a_cat"].cat.categories.to_list() assert m.map_type == "categorical" # Test numeric data with category type - p = VectorPlotter( - data=long_df, - variables=dict(x="x", y="y", hue="s_cat") - ) - m = HueMapping(p) - assert m.levels == categorical_order(long_df["s_cat"]) - assert m.map_type == "categorical" - assert m.cmap is None + if not using_polars: + p = VectorPlotter( + data=long_df, + variables=dict(x="x", y="y", hue="s_cat") + ) + m = HueMapping(p) + assert m.levels == categorical_order(long_df["s_cat"]) + assert m.map_type == "categorical" + assert m.cmap is None # Test categorical palette specified for numeric data p = VectorPlotter( @@ -345,9 +352,11 @@ def test_hue_map_without_hue_dataa(self, long_df): with pytest.warns(UserWarning, match="Ignoring `palette`"): HueMapping(p, palette="viridis") - def test_saturation(self, long_df): + def test_saturation(self, long_df, using_polars): p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="a")) + if using_polars: + long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) palette = color_palette("viridis", len(levels)) saturation = 0.8 @@ -459,7 +468,7 @@ def test_map_size_numeric(self, long_df): with pytest.raises(ValueError): SizeMapping(p, norm="bad_norm") - def test_map_size_categorical(self, long_df): + def test_map_size_categorical(self, long_df, using_polars): p = VectorPlotter( data=long_df, @@ -493,7 +502,10 @@ def test_map_size_categorical(self, long_df): # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", size="a_cat")) m = SizeMapping(p) - assert m.levels == long_df["a_cat"].cat.categories.to_list() + if using_polars: + assert m.levels == long_df.to_pandas()["a_cat"].cat.categories.to_list() + else: + assert m.levels == long_df["a_cat"].cat.categories.to_list() assert m.map_type == "categorical" # Test sizes list with wrong length @@ -564,7 +576,7 @@ def test_style_map_null(self, flat_series, null_series): assert m.map_type is None assert m.lookup_table is None - def test_map_style(self, long_df): + def test_map_style(self, long_df, using_polars): p = VectorPlotter( data=long_df, @@ -606,6 +618,8 @@ def test_map_style(self, long_df): # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", style="a_cat")) m = StyleMapping(p) + if using_polars: + long_df = long_df.to_pandas() assert m.levels == long_df["a_cat"].cat.categories.to_list() # Test style order with defaults @@ -680,11 +694,14 @@ def test_long_df(self, long_df, long_variables): for key, val in long_variables.items(): assert_array_equal(p.plot_data[key], long_df[val]) - def test_long_df_with_index(self, long_df, long_variables): + def test_long_df_with_index(self, long_df, long_variables, using_polars): p = VectorPlotter() + if using_polars: + # no index + return p.assign_variables( - data=long_df.set_index("a"), + data=long_df.set_index('a'), variables=long_variables, ) assert p.input_format == "long" @@ -693,7 +710,10 @@ def test_long_df_with_index(self, long_df, long_variables): for key, val in long_variables.items(): assert_array_equal(p.plot_data[key], long_df[val]) - def test_long_df_with_multiindex(self, long_df, long_variables): + def test_long_df_with_multiindex(self, long_df, long_variables, using_polars): + if using_polars: + # no index (let along multiindex) + return p = VectorPlotter() p.assign_variables( @@ -785,8 +805,11 @@ def test_units(self, repeated_df): assert_array_equal(p.plot_data["units"], repeated_df["u"]) @pytest.mark.parametrize("name", [3, 4.5]) - def test_long_numeric_name(self, long_df, name): + def test_long_numeric_name(self, long_df, name, using_polars): + if using_polars: + # Only string names allowed + return long_df[name] = long_df["x"] p = VectorPlotter() p.assign_variables(data=long_df, variables={"x": name}) @@ -888,7 +911,7 @@ def test_iter_data_quantitites(self, long_df): out = p.iter_data(["hue"]) assert len(list(out)) == n_subsets - n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].values)))) + n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].to_numpy())))) p = VectorPlotter( data=long_df, @@ -908,7 +931,7 @@ def test_iter_data_quantitites(self, long_df): var1, var2, var3 = "a", "s", "b" cols = [var1, var2, var3] - n_subsets = len(set(list(map(tuple, long_df[cols].values)))) + n_subsets = len(set(list(map(tuple, long_df[cols].to_numpy())))) p = VectorPlotter( data=long_df, @@ -938,7 +961,7 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("hue"): assert list(sub_vars) == ["hue"] - assert sub_vars["hue"] in long_df[var].values + assert sub_vars["hue"] in long_df[var].to_numpy() p = VectorPlotter( data=long_df, @@ -946,7 +969,7 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("size"): assert list(sub_vars) == ["size"] - assert sub_vars["size"] in long_df[var].values + assert sub_vars["size"] in long_df[var].to_numpy() p = VectorPlotter( data=long_df, @@ -954,8 +977,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data(semantics): assert list(sub_vars) == ["hue", "style"] - assert sub_vars["hue"] in long_df[var].values - assert sub_vars["style"] in long_df[var].values + assert sub_vars["hue"] in long_df[var].to_numpy() + assert sub_vars["style"] in long_df[var].to_numpy() assert sub_vars["hue"] == sub_vars["style"] var1, var2 = "a", "s" @@ -966,8 +989,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data(semantics): assert list(sub_vars) == ["hue", "size"] - assert sub_vars["hue"] in long_df[var1].values - assert sub_vars["size"] in long_df[var2].values + assert sub_vars["hue"] in long_df[var1].to_numpy() + assert sub_vars["size"] in long_df[var2].to_numpy() semantics = ["hue", "col", "row"] p = VectorPlotter( @@ -976,8 +999,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("hue"): assert list(sub_vars) == ["hue", "col"] - assert sub_vars["hue"] in long_df[var1].values - assert sub_vars["col"] in long_df[var2].values + assert sub_vars["hue"] in long_df[var1].to_numpy() + assert sub_vars["col"] in long_df[var2].to_numpy() def test_iter_data_values(self, long_df): @@ -1008,14 +1031,16 @@ def test_iter_data_values(self, long_df): rows &= p.plot_data["size"] == sub_vars["size"] assert_frame_equal(sub_data, p.plot_data[rows]) - def test_iter_data_reverse(self, long_df): + def test_iter_data_reverse(self, long_df, using_polars): - reversed_order = categorical_order(long_df["a"])[::-1] p = VectorPlotter( data=long_df, variables=dict(x="x", y="y", hue="a") ) iterator = p.iter_data("hue", reverse=True) + if using_polars: + long_df = long_df.to_pandas() + reversed_order = categorical_order(long_df["a"])[::-1] for i, (sub_vars, _) in enumerate(iterator): assert sub_vars["hue"] == reversed_order[i] @@ -1408,7 +1433,7 @@ def test_scale_datetime(self, long_df): with pytest.raises(NotImplementedError): p.scale_datetime("x") - def test_scale_categorical(self, long_df): + def test_scale_categorical(self, long_df, using_polars): p = VectorPlotter(data=long_df, variables={"x": "x"}) p.scale_categorical("y") @@ -1427,7 +1452,10 @@ def test_scale_categorical(self, long_df): p = VectorPlotter(data=long_df, variables={"x": "a"}) p.scale_categorical("x") assert not p._var_ordered["x"] - assert_array_equal(p.var_levels["x"], categorical_order(long_df["a"])) + if using_polars: + assert_array_equal(p.var_levels["x"], categorical_order(long_df.to_pandas()["a"])) + else: + assert_array_equal(p.var_levels["x"], categorical_order(long_df["a"])) p = VectorPlotter(data=long_df, variables={"x": "a_cat"}) p.scale_categorical("x") diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 4cada7d516..8786369d8f 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -272,9 +272,11 @@ def test_rug_data(self, flat_array): assert_array_equal(segments[:, 1, 1], np.full(n, height)) assert_array_equal(segments[:, 1, 0], flat_array) - def test_rug_colors(self, long_df): + def test_rug_colors(self, long_df, using_polars): ax = rugplot(data=long_df, x="x", hue="a") + if using_polars: + long_df = long_df.to_pandas() order = categorical_order(long_df["a"]) palette = color_palette() @@ -703,7 +705,7 @@ def test_common_norm(self, long_df): xdata, ydata = line.get_xydata().T assert integrate(ydata, xdata) == pytest.approx(1) - def test_common_grid(self, long_df): + def test_common_grid(self, long_df, using_polars): f, (ax1, ax2) = plt.subplots(ncols=2) @@ -717,6 +719,8 @@ def test_common_grid(self, long_df): data=long_df, x="x", hue="a", hue_order=order, common_grid=True, cut=0, ax=ax2, ) + if using_polars: + long_df = long_df.to_pandas() for line, level in zip(ax1.lines[::-1], order): xdata = line.get_xdata() @@ -892,14 +896,17 @@ def test_axis_labels(self, long_df): assert ax2.get_xlabel() == "Density" assert ax2.get_ylabel() == "y" - def test_legend(self, long_df): + def test_legend(self, long_df, using_polars): ax = kdeplot(data=long_df, x="x", hue="a") assert ax.legend_.get_title().get_text() == "a" legend_labels = ax.legend_.get_texts() - order = categorical_order(long_df["a"]) + if using_polars: + order = categorical_order(long_df.to_pandas()["a"]) + else: + order = categorical_order(long_df["a"]) for label, level in zip(legend_labels, order): assert label.get_text() == level @@ -1433,9 +1440,11 @@ def test_unique_bins(self, wide_df): assert_array_almost_equal(start, wide_df[col].min()) assert_array_almost_equal(stop, wide_df[col].max()) - def test_weights_with_missing(self, null_df): + def test_weights_with_missing(self, null_df, using_polars): ax = histplot(null_df, x="x", weights="s", bins=5) + if using_polars: + null_df = null_df.to_pandas() bar_heights = [bar.get_height() for bar in ax.patches] total_weight = null_df[["x", "s"]].dropna()["s"].sum() @@ -2067,7 +2076,7 @@ def test_color_limits(self, long_df): (counts <= f(counts, pthresh)).T.flat, ) - def test_hue_color_limits(self, long_df): + def test_hue_color_limits(self, long_df, using_polars): _, (ax1, ax2, ax3, ax4) = plt.subplots(4) kws = dict(data=long_df, x="x", y="y", hue="c", bins=4) @@ -2077,7 +2086,11 @@ def test_hue_color_limits(self, long_df): full_counts, _ = hist(long_df["x"], long_df["y"]) sub_counts = [] - for _, sub_df in long_df.groupby(kws["hue"]): + if using_polars: + group_by = long_df.groupby(kws['hue'], maintain_order=True) + else: + group_by = long_df.groupby(kws['hue']) + for _, sub_df in group_by: c, _ = hist(sub_df["x"], sub_df["y"]) sub_counts.append(c) @@ -2259,7 +2272,7 @@ class TestDisPlot: dict(x="x", y="y"), ], ) - def test_versus_single_histplot(self, long_df, kwargs): + def test_versus_single_histplot(self, long_df, kwargs, using_polars): ax = histplot(long_df, **kwargs) g = displot(long_df, **kwargs) @@ -2269,7 +2282,11 @@ def test_versus_single_histplot(self, long_df, kwargs): assert_legends_equal(ax.legend_, g._legend) if kwargs: - long_df["_"] = "_" + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.lit('_').alias('_')) + else: + long_df["_"] = "_" g2 = displot(long_df, col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2289,7 +2306,7 @@ def test_versus_single_histplot(self, long_df, kwargs): dict(x="x", y="y"), ], ) - def test_versus_single_kdeplot(self, long_df, kwargs): + def test_versus_single_kdeplot(self, long_df, kwargs, using_polars): ax = kdeplot(data=long_df, **kwargs) g = displot(long_df, kind="kde", **kwargs) @@ -2299,7 +2316,11 @@ def test_versus_single_kdeplot(self, long_df, kwargs): assert_legends_equal(ax.legend_, g._legend) if kwargs: - long_df["_"] = "_" + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.lit('_').alias('_')) + else: + long_df["_"] = "_" g2 = displot(long_df, kind="kde", col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2317,7 +2338,7 @@ def test_versus_single_kdeplot(self, long_df, kwargs): dict(x="x", hue="a", palette="muted"), ], ) - def test_versus_single_ecdfplot(self, long_df, kwargs): + def test_versus_single_ecdfplot(self, long_df, kwargs, using_polars): ax = ecdfplot(data=long_df, **kwargs) g = displot(long_df, kind="ecdf", **kwargs) @@ -2327,7 +2348,11 @@ def test_versus_single_ecdfplot(self, long_df, kwargs): assert_legends_equal(ax.legend_, g._legend) if kwargs: - long_df["_"] = "_" + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.lit('_').alias('_')) + else: + long_df["_"] = "_" g2 = displot(long_df, kind="ecdf", col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2338,7 +2363,7 @@ def test_versus_single_ecdfplot(self, long_df, kwargs): dict(x="x", hue="a"), ] ) - def test_with_rug(self, long_df, kwargs): + def test_with_rug(self, long_df, kwargs, using_polars): ax = plt.figure().subplots() histplot(data=long_df, **kwargs, ax=ax) @@ -2348,7 +2373,11 @@ def test_with_rug(self, long_df, kwargs): assert_plots_equal(ax, g.ax, labels=False) - long_df["_"] = "_" + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.lit('_').alias('_')) + else: + long_df["_"] = "_" g2 = displot(long_df, col="_", rug=True, **kwargs) assert_plots_equal(ax, g2.ax, labels=False) @@ -2373,11 +2402,16 @@ def test_facets(self, long_df, facet_var): assert text in facet_ax.get_title() @pytest.mark.parametrize("multiple", ["dodge", "stack", "fill"]) - def test_facet_multiple(self, long_df, multiple): + def test_facet_multiple(self, long_df, multiple, using_polars): bins = np.linspace(0, 20, 5) + if using_polars: + import polars as pl + data = long_df.filter(pl.col('c')==0) + else: + data = long_df[long_df['c']==0] ax = histplot( - data=long_df[long_df["c"] == 0], + data=data, x="x", hue="a", hue_order=["a", "b", "c"], multiple=multiple, bins=bins, ) @@ -2450,7 +2484,10 @@ def test_bivariate_hist_norm(self, rng): clim2 = g.axes.flat[1].collections[0].get_clim() assert clim1[1] > clim2[1] - def test_facetgrid_data(self, long_df): + def test_facetgrid_data(self, long_df, using_polars): + if using_polars: + # This test doesn't pass a DataFrame anyway + return g = displot( data=long_df.to_dict(orient="list"), diff --git a/tests/test_relational.py b/tests/test_relational.py index b18930a940..fb699cf073 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -128,16 +128,22 @@ def test_wide_df_variables(self, wide_df, using_polars): assert p.variables["hue"] == wide_df.columns.name assert p.variables["style"] == wide_df.columns.name - def test_wide_df_with_nonnumeric_variables(self, long_df): + def test_wide_df_with_nonnumeric_variables(self, long_df, using_polars): p = _RelationalPlotter() p.assign_variables(data=long_df) assert p.input_format == "wide" assert list(p.variables) == ["x", "y", "hue", "style"] - numeric_df = long_df.select_dtypes("number") + if using_polars: + import polars as pl + numeric_df = long_df.select(pl.col(pl.NUMERIC_DTYPES)) + else: + numeric_df = long_df.select_dtypes("number") assert len(p.plot_data) == np.prod(numeric_df.shape) + if using_polars: + numeric_df = numeric_df.to_pandas() x = p.plot_data["x"] expected_x = np.tile(numeric_df.index, numeric_df.shape[1]) @@ -469,7 +475,7 @@ def test_wide_dict_of_lists_variables(self, wide_dict_of_lists): assert p.variables["hue"] is None assert p.variables["style"] is None - def test_relplot_simple(self, long_df): + def test_relplot_simple(self, long_df, using_polars): g = relplot(data=long_df, x="x", y="y", kind="scatter") x, y = g.ax.collections[0].get_offsets().T @@ -477,6 +483,8 @@ def test_relplot_simple(self, long_df): assert_array_equal(y, long_df["y"]) g = relplot(data=long_df, x="x", y="y", kind="line") + if using_polars: + long_df = long_df.to_pandas() x, y = g.ax.lines[0].get_xydata().T expected = long_df.groupby("x").y.mean() assert_array_equal(x, expected.index) @@ -485,7 +493,7 @@ def test_relplot_simple(self, long_df): with pytest.raises(ValueError): g = relplot(data=long_df, x="x", y="y", kind="not_a_kind") - def test_relplot_complex(self, long_df): + def test_relplot_complex(self, long_df, using_polars): for sem in ["hue", "size", "style"]: g = relplot(data=long_df, x="x", y="y", **{sem: "a"}) @@ -497,7 +505,10 @@ def test_relplot_complex(self, long_df): g = relplot( data=long_df, x="x", y="y", col="c", **{sem: "a"} ) - grouped = long_df.groupby("c") + if using_polars: + grouped = long_df.to_pandas().groupby("c") + else: + grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) @@ -507,25 +518,36 @@ def test_relplot_complex(self, long_df): g = relplot( data=long_df, x="x", y="y", hue="b", col="c", **{sem: "a"} ) - grouped = long_df.groupby("c") + if using_polars: + grouped = long_df.to_pandas().groupby("c") + else: + grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) assert_array_equal(y, grp_df["y"]) for sem in ["hue", "size", "style"]: + if using_polars: + data = long_df.sort(['c', 'b']) + else: + data = long_df.sort_values(["c", "b"]) + g = relplot( - data=long_df.sort_values(["c", "b"]), + data=data, x="x", y="y", col="b", row="c", **{sem: "a"} ) - grouped = long_df.groupby(["c", "b"]) + if using_polars: + grouped = long_df.to_pandas().groupby(['c', 'b']) + else: + grouped = long_df.groupby(['c', 'b']) for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) assert_array_equal(y, grp_df["y"]) @pytest.mark.parametrize("vector_type", ["series", "numpy", "list"]) - def test_relplot_vectors(self, long_df, vector_type): + def test_relplot_vectors(self, long_df, vector_type, using_polars): semantics = dict(x="x", y="y", hue="f", col="c") kws = {key: long_df[val] for key, val in semantics.items()} @@ -534,6 +556,8 @@ def test_relplot_vectors(self, long_df, vector_type): elif vector_type == "list": kws = {k: v.to_list() for k, v in kws.items()} g = relplot(data=long_df, **kws) + if using_polars: + long_df = long_df.to_pandas() grouped = long_df.groupby("c") assert len(g.axes_dict) == len(grouped) for (_, grp_df), ax in zip(grouped, g.axes.flat): @@ -548,13 +572,15 @@ def test_relplot_wide(self, wide_df): assert_array_equal(y, wide_df.to_numpy().T.ravel()) assert not g.ax.get_ylabel() - def test_relplot_hues(self, long_df): + def test_relplot_hues(self, long_df, using_polars): palette = ["r", "b", "g"] g = relplot( x="x", y="y", hue="a", style="b", col="c", palette=palette, data=long_df ) + if using_polars: + long_df = long_df.to_pandas() palette = dict(zip(long_df["a"].unique(), palette)) grouped = long_df.groupby("c") @@ -563,7 +589,7 @@ def test_relplot_hues(self, long_df): expected_hues = [palette[val] for val in grp_df["a"]] assert same_color(points.get_facecolors(), expected_hues) - def test_relplot_sizes(self, long_df): + def test_relplot_sizes(self, long_df, using_polars): sizes = [5, 12, 7] g = relplot( @@ -571,6 +597,8 @@ def test_relplot_sizes(self, long_df): x="x", y="y", size="a", hue="b", col="c", sizes=sizes, ) + if using_polars: + long_df = long_df.to_pandas() sizes = dict(zip(long_df["a"].unique(), sizes)) grouped = long_df.groupby("c") @@ -579,7 +607,7 @@ def test_relplot_sizes(self, long_df): expected_sizes = [sizes[val] for val in grp_df["a"]] assert_array_equal(points.get_sizes(), expected_sizes) - def test_relplot_styles(self, long_df): + def test_relplot_styles(self, long_df, using_polars): markers = ["o", "d", "s"] g = relplot( @@ -587,6 +615,8 @@ def test_relplot_styles(self, long_df): x="x", y="y", style="a", hue="b", col="c", markers=markers, ) + if using_polars: + long_df = long_df.to_pandas() paths = [] for m in markers: @@ -600,9 +630,13 @@ def test_relplot_styles(self, long_df): expected_paths = [paths[val] for val in grp_df["a"]] assert self.paths_equal(points.get_paths(), expected_paths) - def test_relplot_stringy_numerics(self, long_df): + def test_relplot_stringy_numerics(self, long_df, using_polars): - long_df["x_str"] = long_df["x"].astype(str) + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.col('x').cast(pl.Utf8).alias('x_str')) + else: + long_df["x_str"] = long_df["x"].astype(str) g = relplot(data=long_df, x="x", y="y", hue="x_str") points = g.ax.collections[0] @@ -618,14 +652,17 @@ def test_relplot_stringy_numerics(self, long_df): assert not mask.any() assert_array_equal(xys, long_df[["x", "y"]]) - def test_relplot_legend(self, long_df): + def test_relplot_legend(self, long_df, using_polars): g = relplot(data=long_df, x="x", y="y") assert g._legend is None g = relplot(data=long_df, x="x", y="y", hue="a") texts = [t.get_text() for t in g._legend.texts] - expected_texts = long_df["a"].unique() + if using_polars: + expected_texts = long_df.to_pandas()["a"].unique() + else: + expected_texts = long_df["a"].unique() assert_array_equal(texts, expected_texts) g = relplot(data=long_df, x="x", y="y", hue="s", size="s") @@ -637,7 +674,11 @@ def test_relplot_legend(self, long_df): palette = color_palette("deep", len(long_df["b"].unique())) a_like_b = dict(zip(long_df["a"].unique(), long_df["b"].unique())) - long_df["a_like_b"] = long_df["a"].map(a_like_b) + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.col('a').map_dict(a_like_b).alias("a_like_b")) + else: + long_df["a_like_b"] = long_df["a"].map(a_like_b) g = relplot( data=long_df, x="x", y="y", hue="b", style="a_like_b", @@ -664,7 +705,10 @@ def test_relplot_unshared_axis_labels(self, long_df): for ax in g.axes[:, 1:].flat: assert ax.get_ylabel() == "" - def test_relplot_data(self, long_df): + def test_relplot_data(self, long_df, using_polars): + if using_polars: + # Test doesn't pass DataFrame + return g = relplot( data=long_df.to_dict(orient="list"), @@ -678,11 +722,14 @@ def test_relplot_data(self, long_df): assert_array_equal(g.data["y_var"], long_df["y"]) assert_array_equal(g.data["_hue_"], long_df["a"]) - def test_facet_variable_collision(self, long_df): + def test_facet_variable_collision(self, long_df, using_polars): # https://github.com/mwaskom/seaborn/issues/2488 col_data = long_df["c"] - long_df = long_df.assign(size=col_data) + if using_polars: + long_df = long_df.with_columns(size=col_data) + else: + long_df = long_df.assign(size=col_data) g = relplot( data=long_df, @@ -912,7 +959,7 @@ def test_legend_data(self, long_df): handles, labels = ax.get_legend_handles_labels() assert labels == expected_levels - def test_plot(self, long_df, repeated_df): + def test_plot(self, long_df, repeated_df, using_polars): f, ax = plt.subplots() @@ -922,6 +969,8 @@ def test_plot(self, long_df, repeated_df): sort=False, estimator=None ) + if using_polars: + long_df = long_df.to_pandas() p.plot(ax, {}) line, = ax.lines assert_array_equal(line.get_xdata(), long_df.x.to_numpy()) @@ -1130,19 +1179,28 @@ def test_non_aggregated_data(self): assert_array_equal(line.get_xdata(), x) assert_array_equal(line.get_ydata(), y) - def test_orient(self, long_df): + def test_orient(self, long_df, using_polars): - long_df = long_df.drop("x", axis=1).rename(columns={"s": "y", "y": "x"}) + if using_polars: + long_df = long_df.drop("x").rename({"s": "y", "y": "x"}) + else: + long_df = long_df.drop("x", axis=1).rename(columns={"s": "y", "y": "x"}) ax1 = plt.figure().subplots() lineplot(data=long_df, x="x", y="y", orient="y", errorbar="sd") assert len(ax1.lines) == len(ax1.collections) line, = ax1.lines - expected = long_df.groupby("y").agg({"x": "mean"}).reset_index() + if using_polars: + expected = long_df.to_pandas().groupby("y").agg({"x": "mean"}).reset_index() + else: + expected = long_df.groupby("y").agg({"x": "mean"}).reset_index() assert_array_almost_equal(line.get_xdata(), expected["x"]) assert_array_almost_equal(line.get_ydata(), expected["y"]) ribbon_y = ax1.collections[0].get_paths()[0].vertices[:, 1] - assert_array_equal(np.unique(ribbon_y), long_df["y"].sort_values().unique()) + if using_polars: + assert_array_equal(np.unique(ribbon_y), long_df.to_pandas()["y"].sort_values().unique()) + else: + assert_array_equal(np.unique(ribbon_y), long_df["y"].sort_values().unique()) ax2 = plt.figure().subplots() lineplot( @@ -1289,13 +1347,13 @@ def test_lineplot_smoke( lineplot(x="x", y="y", data=long_df) ax.clear() - lineplot(x=long_df.x, y=long_df.y) + lineplot(x=long_df['x'], y=long_df['y']) ax.clear() - lineplot(x=long_df.x, y="y", data=long_df) + lineplot(x=long_df['x'], y="y", data=long_df) ax.clear() - lineplot(x="x", y=long_df.y.to_numpy(), data=long_df) + lineplot(x="x", y=long_df['y'].to_numpy(), data=long_df) ax.clear() lineplot(x="x", y="t", data=long_df) @@ -1555,7 +1613,7 @@ def test_legend_data(self, long_df): with pytest.raises(ValueError): p.add_legend_data(ax) - def test_plot(self, long_df, repeated_df): + def test_plot(self, long_df, repeated_df, using_polars): f, ax = plt.subplots() @@ -1631,7 +1689,11 @@ def test_plot(self, long_df, repeated_df): assert same_color(points.get_facecolors(), expected_colors) assert self.paths_equal(points.get_paths(), expected_paths) - x_str = long_df["x"].astype(str) + if using_polars: + import polars as pl + x_str = long_df["x"].cast(pl.Utf8) + else: + x_str = long_df["x"].astype(str) p = _ScatterPlotter( data=long_df, variables=dict(x="x", y="y", hue=x_str), ) @@ -1846,13 +1908,13 @@ def test_scatterplot_smoke( scatterplot(x="x", y="y", data=long_df) ax.clear() - scatterplot(x=long_df.x, y=long_df.y) + scatterplot(x=long_df['x'], y=long_df['y']) ax.clear() - scatterplot(x=long_df.x, y="y", data=long_df) + scatterplot(x=long_df['x'], y="y", data=long_df) ax.clear() - scatterplot(x="x", y=long_df.y.to_numpy(), data=long_df) + scatterplot(x="x", y=long_df['y'].to_numpy(), data=long_df) ax.clear() scatterplot(x="x", y="y", hue="a", data=long_df) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index f8fa444f22..f4b3deee8d 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -504,7 +504,7 @@ def test_func_estimator(self, long_df): func = np.mean agg = EstimateAggregator(func) out = agg(long_df, "x") - assert out["x"] == func(long_df["x"]) + assert out["x"] == func(long_df["x"].to_numpy()) def test_name_estimator(self, long_df): @@ -521,19 +521,27 @@ def func(x): out = agg(long_df, "x") assert out["x"] == func(long_df["x"]) - def test_se_errorbars(self, long_df): + def test_se_errorbars(self, long_df, using_polars): agg = EstimateAggregator("mean", "se") out = agg(long_df, "x") assert out["x"] == long_df["x"].mean() - assert out["xmin"] == (long_df["x"].mean() - long_df["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + long_df["x"].sem()) + if using_polars: + assert out["xmin"] == (long_df["x"].mean() - long_df.to_pandas()["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + long_df.to_pandas()["x"].sem()) + else: + assert out["xmin"] == (long_df["x"].mean() - long_df["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + long_df["x"].sem()) agg = EstimateAggregator("mean", ("se", 2)) out = agg(long_df, "x") assert out["x"] == long_df["x"].mean() - assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) + if using_polars: + assert out["xmin"] == (long_df["x"].mean() - 2 * long_df.to_pandas()["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + 2 * long_df.to_pandas()["x"].sem()) + else: + assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) def test_sd_errorbars(self, long_df): From e7e84f5e42e161f4729aaeb65b598752c2b0d82b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 14:30:47 +0100 Subject: [PATCH 23/32] wip --- tests/_core/test_data.py | 65 +++++++--- tests/_core/test_plot.py | 210 ++++++++++++++++++++++++--------- tests/_core/test_properties.py | 9 +- 3 files changed, 211 insertions(+), 73 deletions(-) diff --git a/tests/_core/test_data.py b/tests/_core/test_data.py index 973d0e4668..a4117d20c6 100644 --- a/tests/_core/test_data.py +++ b/tests/_core/test_data.py @@ -227,7 +227,10 @@ def test_key_with_no_data_raises(self): with pytest.raises(ValueError, match=msg): PlotData(None, {var: key}) - def test_data_vector_different_lengths_raises(self, long_df): + def test_data_vector_different_lengths_raises(self, long_df, using_polars): + if using_polars: + # Does not raise (error specifically checks for pandas.DataFrame) + return vector = np.arange(len(long_df) - 5) msg = "Length of ndarray vectors must match length of `data`" @@ -252,12 +255,14 @@ def test_contains_operation(self, long_df): assert "y" not in p assert "color" in p - def test_join_add_variable(self, long_df): + def test_join_add_variable(self, long_df, using_polars): v1 = {"x": "x", "y": "f"} v2 = {"color": "a"} p1 = PlotData(long_df, v1) + if using_polars: + long_df = long_df.to_pandas() p2 = p1.join(None, v2) for var, key in dict(**v1, **v2).items(): @@ -265,12 +270,14 @@ def test_join_add_variable(self, long_df): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_replace_variable(self, long_df): + def test_join_replace_variable(self, long_df, using_polars): v1 = {"x": "x", "y": "y"} v2 = {"y": "s"} p1 = PlotData(long_df, v1) + if using_polars: + long_df = long_df.to_pandas() p2 = p1.join(None, v2) variables = v1.copy() @@ -294,12 +301,14 @@ def test_join_remove_variable(self, long_df): assert drop_var not in p2.frame assert drop_var not in p2.names - def test_join_all_operations(self, long_df): + def test_join_all_operations(self, long_df, using_polars): v1 = {"x": "x", "y": "y", "color": "a"} v2 = {"y": "s", "size": "s", "color": None} p1 = PlotData(long_df, v1) + if using_polars: + long_df = long_df.to_pandas() p2 = p1.join(None, v2) for var, key in v2.items(): @@ -309,12 +318,14 @@ def test_join_all_operations(self, long_df): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_all_operations_same_data(self, long_df): + def test_join_all_operations_same_data(self, long_df, using_polars): v1 = {"x": "x", "y": "y", "color": "a"} v2 = {"y": "s", "size": "s", "color": None} p1 = PlotData(long_df, v1) + if using_polars: + long_df = long_df.to_pandas() p2 = p1.join(long_df, v2) for var, key in v2.items(): @@ -324,7 +335,7 @@ def test_join_all_operations_same_data(self, long_df): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_add_variable_new_data(self, long_df): + def test_join_add_variable_new_data(self, long_df, using_polars): d1 = long_df[["x", "y"]] d2 = long_df[["a", "s"]] @@ -333,13 +344,15 @@ def test_join_add_variable_new_data(self, long_df): v2 = {"color": "a"} p1 = PlotData(d1, v1) + if using_polars: + long_df = long_df.to_pandas() p2 = p1.join(d2, v2) for var, key in dict(**v1, **v2).items(): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_replace_variable_new_data(self, long_df): + def test_join_replace_variable_new_data(self, long_df, using_polars): d1 = long_df[["x", "y"]] d2 = long_df[["a", "s"]] @@ -349,6 +362,8 @@ def test_join_replace_variable_new_data(self, long_df): p1 = PlotData(d1, v1) p2 = p1.join(d2, v2) + if using_polars: + long_df = long_df.to_pandas() variables = v1.copy() variables.update(v2) @@ -357,10 +372,14 @@ def test_join_replace_variable_new_data(self, long_df): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_add_variable_different_index(self, long_df): + def test_join_add_variable_different_index(self, long_df, using_polars): - d1 = long_df.iloc[:70] - d2 = long_df.iloc[30:] + if using_polars: + d1 = long_df[:70] + d2 = long_df[30:] + else: + d1 = long_df.iloc[:70] + d2 = long_df.iloc[30:] v1 = {"x": "a"} v2 = {"y": "z"} @@ -370,6 +389,9 @@ def test_join_add_variable_different_index(self, long_df): (var1, key1), = v1.items() (var2, key2), = v2.items() + if using_polars: + d1 = d1.to_pandas() + d2 = d2.to_pandas() assert_vector_equal(p2.frame.loc[d1.index, var1], d1[key1]) assert_vector_equal(p2.frame.loc[d2.index, var2], d2[key2]) @@ -377,10 +399,14 @@ def test_join_add_variable_different_index(self, long_df): assert p2.frame.loc[d2.index.difference(d1.index), var1].isna().all() assert p2.frame.loc[d1.index.difference(d2.index), var2].isna().all() - def test_join_replace_variable_different_index(self, long_df): + def test_join_replace_variable_different_index(self, long_df, using_polars): - d1 = long_df.iloc[:70] - d2 = long_df.iloc[30:] + if using_polars: + d1 = long_df[:70] + d2 = long_df[30:] + else: + d1 = long_df.iloc[:70] + d2 = long_df.iloc[30:] var = "x" k1, k2 = "a", "z" @@ -392,18 +418,27 @@ def test_join_replace_variable_different_index(self, long_df): (var1, key1), = v1.items() (var2, key2), = v2.items() + if using_polars: + d1 = d1.to_pandas() + d2 = d2.to_pandas() assert_vector_equal(p2.frame.loc[d2.index, var], d2[k2]) assert p2.frame.loc[d1.index.difference(d2.index), var].isna().all() - def test_join_subset_data_inherit_variables(self, long_df): + def test_join_subset_data_inherit_variables(self, long_df, using_polars): - sub_df = long_df[long_df["a"] == "b"] + if using_polars: + sub_df = long_df.filter(long_df["a"] == "b") + else: + sub_df = long_df[long_df["a"] == "b"] var = "y" p1 = PlotData(long_df, {var: var}) p2 = p1.join(sub_df, None) + if using_polars: + sub_df = sub_df.to_pandas() + long_df = long_df.to_pandas() assert_vector_equal(p2.frame.loc[sub_df.index, var], sub_df[var]) assert p2.frame.loc[long_df.index.difference(sub_df.index), var].isna().all() diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index 3da7aab583..a706c2df5e 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -81,37 +81,50 @@ def test_empty(self): assert p._data.source_data is None assert p._data.source_vars == {} - def test_data_only(self, long_df): + def test_data_only(self, long_df, using_polars): + if using_polars: + # source_data was tranformed to pandas + return p = Plot(long_df) assert p._data.source_data is long_df assert p._data.source_vars == {} - def test_df_and_named_variables(self, long_df): + def test_df_and_named_variables(self, long_df, using_polars): variables = {"x": "a", "y": "z"} p = Plot(long_df, **variables) + if using_polars: + long_df = long_df.to_pandas() for var, col in variables.items(): assert_vector_equal(p._data.frame[var], long_df[col]) - assert p._data.source_data is long_df + if not using_polars: + assert p._data.source_data is long_df assert p._data.source_vars.keys() == variables.keys() - def test_df_and_mixed_variables(self, long_df): + def test_df_and_mixed_variables(self, long_df, using_polars): variables = {"x": "a", "y": long_df["z"]} p = Plot(long_df, **variables) + if using_polars: + long_df = long_df.to_pandas() + variables = {"x": "a", "y": long_df["z"]} for var, col in variables.items(): if isinstance(col, str): assert_vector_equal(p._data.frame[var], long_df[col]) else: assert_vector_equal(p._data.frame[var], col) - assert p._data.source_data is long_df + if not using_polars: + assert p._data.source_data is long_df assert p._data.source_vars.keys() == variables.keys() - def test_vector_variables_only(self, long_df): + def test_vector_variables_only(self, long_df, using_polars): variables = {"x": long_df["a"], "y": long_df["z"]} p = Plot(**variables) + if using_polars: + long_df = long_df.to_pandas() + variables = {"x": long_df["a"], "y": long_df["z"]} for var, col in variables.items(): assert_vector_equal(p._data.frame[var], col) assert p._data.source_data is None @@ -146,10 +159,11 @@ def test_positional_and_named_xy(self, long_df, var): with pytest.raises(TypeError, match=err): Plot(long_df, "a", "b", **{var: "c"}) - def test_positional_data_x_y(self, long_df): + def test_positional_data_x_y(self, long_df, using_polars): p = Plot(long_df, "a", "b") - assert p._data.source_data is long_df + if not using_polars: + assert p._data.source_data is long_df assert list(p._data.source_vars) == ["x", "y"] def test_positional_x_y(self, long_df): @@ -158,10 +172,11 @@ def test_positional_x_y(self, long_df): assert p._data.source_data is None assert list(p._data.source_vars) == ["x", "y"] - def test_positional_data_x(self, long_df): + def test_positional_data_x(self, long_df, using_polars): p = Plot(long_df, "a") - assert p._data.source_data is long_df + if not using_polars: + assert p._data.source_data is long_df assert list(p._data.source_vars) == ["x"] def test_positional_x(self, long_df): @@ -191,35 +206,44 @@ def test_without_data(self, long_df): layer, = p._layers assert_frame_equal(p._data.frame, layer["data"].frame, check_dtype=False) - def test_with_new_variable_by_name(self, long_df): + def test_with_new_variable_by_name(self, long_df, using_polars): p = Plot(long_df, x="x").add(MockMark(), y="y").plot() + if using_polars: + long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_new_variable_by_vector(self, long_df): + def test_with_new_variable_by_vector(self, long_df, using_polars): p = Plot(long_df, x="x").add(MockMark(), y=long_df["y"]).plot() + if using_polars: + long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_late_data_definition(self, long_df): + def test_with_late_data_definition(self, long_df, using_polars): p = Plot().add(MockMark(), data=long_df, x="x", y="y").plot() + if using_polars: + long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_new_data_definition(self, long_df): + def test_with_new_data_definition(self, long_df, using_polars): long_df_sub = long_df.sample(frac=.5) p = Plot(long_df, x="x", y="y").add(MockMark(), data=long_df_sub).plot() + if using_polars: + long_df = long_df.to_pandas() + long_df_sub = long_df_sub.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": @@ -227,9 +251,11 @@ def test_with_new_data_definition(self, long_df): layer["data"].frame[var], long_df_sub[var].reindex(long_df.index) ) - def test_drop_variable(self, long_df): + def test_drop_variable(self, long_df, using_polars): p = Plot(long_df, x="x", y="y").add(MockMark(), y=None).plot() + if using_polars: + long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x"] assert_vector_equal(layer["data"].frame["x"], long_df["x"], check_dtype=False) @@ -396,14 +422,16 @@ def test_log_scale_name(self): assert ax.get_xscale() == "log" assert ax.get_yscale() == "linear" - def test_mark_data_log_transform_is_inverted(self, long_df): + def test_mark_data_log_transform_is_inverted(self, long_df, using_polars): col = "z" m = MockMark() Plot(long_df, x=col).scale(x="log").add(m).plot() + if using_polars: + long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df[col]) - def test_mark_data_log_transfrom_with_stat(self, long_df): + def test_mark_data_log_transfrom_with_stat(self, long_df, using_polars): class Mean(Stat): group_by_orient = True @@ -418,6 +446,8 @@ def __call__(self, data, groupby, orient, scales): s = Mean() Plot(long_df, x=grouper, y=col).scale(y="log").add(m, s).plot() + if using_polars: + long_df = long_df.to_pandas() expected = ( long_df[col] @@ -429,21 +459,25 @@ def __call__(self, data, groupby, orient, scales): ) assert_vector_equal(m.passed_data[0]["y"], expected) - def test_mark_data_from_categorical(self, long_df): + def test_mark_data_from_categorical(self, long_df, using_polars): col = "a" m = MockMark() Plot(long_df, x=col).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() levels = categorical_order(long_df[col]) level_map = {x: float(i) for i, x in enumerate(levels)} assert_vector_equal(m.passed_data[0]["x"], long_df[col].map(level_map)) - def test_mark_data_from_datetime(self, long_df): + def test_mark_data_from_datetime(self, long_df, using_polars): col = "t" m = MockMark() Plot(long_df, x=col).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() expected = long_df[col].map(mpl.dates.date2num) assert_vector_equal(m.passed_data[0]["x"], expected) @@ -698,7 +732,7 @@ def test_single_split_single_layer(self, long_df): for col in p._data.frame: assert_series_equal(m.passed_data[0][col], p._data.frame[col]) - def test_single_split_multi_layer(self, long_df): + def test_single_split_multi_layer(self, long_df, using_polars): vs = [{"color": "a", "linewidth": "z"}, {"color": "b", "pattern": "c"}] @@ -707,6 +741,8 @@ class NoGroupingMark(MockMark): ms = [NoGroupingMark(), NoGroupingMark()] Plot(long_df).add(ms[0], **vs[0]).add(ms[1], **vs[1]).plot() + if using_polars: + long_df = long_df.to_pandas() for m, v in zip(ms, vs): for var, col in v.items(): @@ -751,13 +787,15 @@ def check_splits_multi_vars( "color", # explicitly declared on the Mark "group", # implicitly used for all Mark classes ]) - def test_one_grouping_variable(self, long_df, split_var): + def test_one_grouping_variable(self, long_df, split_var, using_polars): split_col = "a" data_vars = {"x": "f", "y": "z", split_var: split_col} m = MockMark() p = Plot(long_df, **data_vars).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() split_keys = categorical_order(long_df[split_col]) sub, *_ = p._subplots @@ -766,7 +804,7 @@ def test_one_grouping_variable(self, long_df, split_var): long_df, m, data_vars, split_var, split_col, split_keys ) - def test_two_grouping_variables(self, long_df): + def test_two_grouping_variables(self, long_df, using_polars): split_vars = ["color", "group"] split_cols = ["a", "b"] @@ -774,6 +812,8 @@ def test_two_grouping_variables(self, long_df): m = MockMark() p = Plot(long_df, **data_vars).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() split_keys = [categorical_order(long_df[col]) for col in split_cols] sub, *_ = p._subplots @@ -790,7 +830,7 @@ def test_specified_width(self, long_df): Plot(long_df, x="x", y="y").add(m, width="z").plot() assert_array_almost_equal(m.passed_data[0]["width"], long_df["z"]) - def test_facets_no_subgroups(self, long_df): + def test_facets_no_subgroups(self, long_df, using_polars): split_var = "col" split_col = "b" @@ -798,6 +838,8 @@ def test_facets_no_subgroups(self, long_df): m = MockMark() p = Plot(long_df, **data_vars).facet(**{split_var: split_col}).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() split_keys = categorical_order(long_df[split_col]) assert m.passed_axes == list(p._figure.axes) @@ -805,7 +847,7 @@ def test_facets_no_subgroups(self, long_df): long_df, m, data_vars, split_var, split_col, split_keys ) - def test_facets_one_subgroup(self, long_df): + def test_facets_one_subgroup(self, long_df, using_polars): facet_var, facet_col = fx = "col", "a" group_var, group_col = gx = "group", "b" @@ -819,6 +861,8 @@ def test_facets_one_subgroup(self, long_df): .add(m) .plot() ) + if using_polars: + long_df = long_df.to_pandas() split_keys = [categorical_order(long_df[col]) for col in [facet_col, group_col]] assert m.passed_axes == [ @@ -830,13 +874,15 @@ def test_facets_one_subgroup(self, long_df): long_df, m, data_vars, split_vars, split_cols, split_keys ) - def test_layer_specific_facet_disabling(self, long_df): + def test_layer_specific_facet_disabling(self, long_df, using_polars): axis_vars = {"x": "y", "y": "z"} row_var = "a" m = MockMark() p = Plot(long_df, **axis_vars).facet(row=row_var).add(m, row=None).plot() + if using_polars: + long_df = long_df.to_pandas() col_levels = categorical_order(long_df[row_var]) assert len(p._figure.axes) == len(col_levels) @@ -845,13 +891,15 @@ def test_layer_specific_facet_disabling(self, long_df): for var, col in axis_vars.items(): assert_vector_equal(data[var], long_df[col]) - def test_paired_variables(self, long_df): + def test_paired_variables(self, long_df, using_polars): x = ["x", "y"] y = ["f", "z"] m = MockMark() Plot(long_df).pair(x, y).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() var_product = itertools.product(x, y) @@ -859,26 +907,34 @@ def test_paired_variables(self, long_df): assert_vector_equal(data["x"], long_df[x_i].astype(float)) assert_vector_equal(data["y"], long_df[y_i].astype(float)) - def test_paired_one_dimension(self, long_df): + def test_paired_one_dimension(self, long_df, using_polars): x = ["y", "z"] m = MockMark() Plot(long_df).pair(x).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() for data, x_i in zip(m.passed_data, x): assert_vector_equal(data["x"], long_df[x_i].astype(float)) - def test_paired_variables_one_subset(self, long_df): + def test_paired_variables_one_subset(self, long_df, using_polars): x = ["x", "y"] y = ["f", "z"] group = "a" - long_df["x"] = long_df["x"].astype(float) # simplify vector comparison + if using_polars: + import polars as pl + long_df = long_df.with_columns(pl.col('x').cast(pl.Float64)) + else: + long_df["x"] = long_df["x"].astype(float) # simplify vector comparison m = MockMark() Plot(long_df, group=group).pair(x, y).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() groups = categorical_order(long_df[group]) var_product = itertools.product(x, y, groups) @@ -888,7 +944,7 @@ def test_paired_variables_one_subset(self, long_df): assert_vector_equal(data["x"], long_df.loc[rows, x_i]) assert_vector_equal(data["y"], long_df.loc[rows, y_i]) - def test_paired_and_faceted(self, long_df): + def test_paired_and_faceted(self, long_df, using_polars): x = ["y", "z"] y = "f" @@ -896,6 +952,8 @@ def test_paired_and_faceted(self, long_df): m = MockMark() Plot(long_df, y=y).facet(row=row).pair(x).add(m).plot() + if using_polars: + long_df = long_df.to_pandas() facets = categorical_order(long_df[row]) var_product = itertools.product(x, facets) @@ -932,43 +990,60 @@ def test_theme_validation(self): with pytest.raises(KeyError, match="not.a.key is not a valid rc"): p.theme({"not.a.key": True}) - def test_stat(self, long_df): + def test_stat(self, long_df, using_polars): - orig_df = long_df.copy(deep=True) + if not using_polars: + orig_df = long_df.copy(deep=True) + else: + orig_df = long_df.to_pandas() m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg()).plot() + if using_polars: + long_df = long_df.to_pandas() expected = long_df.groupby("a", sort=False)["z"].mean().reset_index(drop=True) assert_vector_equal(m.passed_data[0]["y"], expected) assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_move(self, long_df): + def test_move(self, long_df, using_polars): - orig_df = long_df.copy(deep=True) + if not using_polars: + orig_df = long_df.copy(deep=True) + else: + orig_df = long_df.to_pandas() m = MockMark() Plot(long_df, x="z", y="z").add(m, Shift(x=1)).plot() + if using_polars: + long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["z"] + 1) assert_vector_equal(m.passed_data[0]["y"], long_df["z"]) assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_stat_and_move(self, long_df): + def test_stat_and_move(self, long_df, using_polars): m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg(), Shift(y=1)).plot() + if using_polars: + long_df = long_df.to_pandas() expected = long_df.groupby("a", sort=False)["z"].mean().reset_index(drop=True) assert_vector_equal(m.passed_data[0]["y"], expected + 1) - def test_stat_log_scale(self, long_df): + def test_stat_log_scale(self, long_df, using_polars): - orig_df = long_df.copy(deep=True) + if not using_polars: + orig_df = long_df.copy(deep=True) + else: + orig_df = long_df.to_pandas() m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg()).scale(y="log").plot() + if using_polars: + long_df = long_df.to_pandas() x = long_df["a"] y = np.log10(long_df["z"]) @@ -977,25 +1052,31 @@ def test_stat_log_scale(self, long_df): assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_move_log_scale(self, long_df): + def test_move_log_scale(self, long_df, using_polars): m = MockMark() Plot( long_df, x="z", y="z" ).scale(x="log").add(m, Shift(x=-1)).plot() + if using_polars: + long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["z"] / 10) - def test_multi_move(self, long_df): + def test_multi_move(self, long_df, using_polars): m = MockMark() move_stack = [Shift(1), Shift(2)] Plot(long_df, x="x", y="y").add(m, *move_stack).plot() + if using_polars: + long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["x"] + 3) - def test_multi_move_with_pairing(self, long_df): + def test_multi_move_with_pairing(self, long_df, using_polars): m = MockMark() move_stack = [Shift(1), Shift(2)] Plot(long_df, x="x").pair(y=["y", "z"]).add(m, *move_stack).plot() + if using_polars: + long_df = long_df.to_pandas() for frame in m.passed_data: assert_vector_equal(frame["x"], long_df["x"] + 3) @@ -1323,9 +1404,11 @@ def reorder(self, request): "expand": lambda x: x + ["z"], }[request.param] - def check_facet_results_1d(self, p, df, dim, key, order=None): + def check_facet_results_1d(self, p, df, dim, key, order=None, using_polars=False): p = p.plot() + if using_polars: + df = df.to_pandas() order = categorical_order(df[key], order) assert len(p._figure.axes) == len(order) @@ -1338,24 +1421,27 @@ def check_facet_results_1d(self, p, df, dim, key, order=None): assert subplot["ax"].get_title() == f"{level}" assert_gridspec_shape(subplot["ax"], **{f"n{dim}s": len(order)}) - def test_1d(self, long_df, dim): + def test_1d(self, long_df, dim, using_polars): key = "a" p = Plot(long_df).facet(**{dim: key}) - self.check_facet_results_1d(p, long_df, dim, key) + self.check_facet_results_1d(p, long_df, dim, key, using_polars=using_polars) - def test_1d_as_vector(self, long_df, dim): + def test_1d_as_vector(self, long_df, dim, using_polars): key = "a" p = Plot(long_df).facet(**{dim: long_df[key]}) - self.check_facet_results_1d(p, long_df, dim, key) + self.check_facet_results_1d(p, long_df, dim, key, using_polars=using_polars) - def test_1d_with_order(self, long_df, dim, reorder): + def test_1d_with_order(self, long_df, dim, reorder, using_polars): key = "a" - order = reorder(categorical_order(long_df[key])) + if using_polars: + order = reorder(categorical_order(long_df.to_pandas()[key])) + else: + order = reorder(categorical_order(long_df[key])) p = Plot(long_df).facet(**{dim: key, "order": order}) - self.check_facet_results_1d(p, long_df, dim, key, order) + self.check_facet_results_1d(p, long_df, dim, key, order, using_polars=using_polars) def check_facet_results_2d(self, p, df, variables, order=None): @@ -1377,19 +1463,27 @@ def check_facet_results_2d(self, p, df, variables, order=None): subplot["axes"], len(levels["row"]), len(levels["col"]) ) - def test_2d(self, long_df): + def test_2d(self, long_df, using_polars): variables = {"row": "a", "col": "c"} p = Plot(long_df).facet(**variables) + if using_polars: + long_df = long_df.to_pandas() self.check_facet_results_2d(p, long_df, variables) - def test_2d_with_order(self, long_df, reorder): + def test_2d_with_order(self, long_df, reorder, using_polars): variables = {"row": "a", "col": "c"} - order = { - dim: reorder(categorical_order(long_df[key])) - for dim, key in variables.items() - } + if using_polars: + order = { + dim: reorder(categorical_order(long_df.to_pandas()[key])) + for dim, key in variables.items() + } + else: + order = { + dim: reorder(categorical_order(long_df[key])) + for dim, key in variables.items() + } p = Plot(long_df).facet(**variables, order=order) self.check_facet_results_2d(p, long_df, variables, order) @@ -1413,7 +1507,7 @@ def test_layout_algo(self, algo): sep2 = bb22.corners()[0, 0] - bb21.corners()[2, 0] assert sep1 <= sep2 - def test_axis_sharing(self, long_df): + def test_axis_sharing(self, long_df, using_polars): variables = {"row": "a", "col": "c"} @@ -1432,6 +1526,8 @@ def test_axis_sharing(self, long_df): assert not any(shareset.joined(root, ax) for ax in other) p3 = p.share(x="col", y="row").plot() + if using_polars: + long_df = long_df.to_pandas() shape = ( len(categorical_order(long_df[variables["row"]])), len(categorical_order(long_df[variables["col"]])), @@ -1549,13 +1645,15 @@ def test_with_no_variables(self, long_df): p = Plot(long_df).pair().plot() assert len(p._figure.axes) == 1 - def test_with_facets(self, long_df): + def test_with_facets(self, long_df, using_polars): x = "x" y = ["y", "z"] col = "a" p = Plot(long_df, x=x).facet(col).pair(y=y).plot() + if using_polars: + long_df = long_df.to_pandas() facet_levels = categorical_order(long_df[col]) dims = itertools.product(y, facet_levels) diff --git a/tests/_core/test_properties.py b/tests/_core/test_properties.py index b4764762eb..fc1961a179 100644 --- a/tests/_core/test_properties.py +++ b/tests/_core/test_properties.py @@ -40,7 +40,9 @@ def cat_vector(self, long_df): return long_df["a"] @pytest.fixture - def cat_order(self, cat_vector): + def cat_order(self, cat_vector, using_polars): + if using_polars: + return categorical_order(cat_vector.to_pandas()) return categorical_order(cat_vector) @pytest.fixture @@ -80,7 +82,10 @@ class TestColor(DataFixtures): def assert_same_rgb(self, a, b): assert_array_equal(a[:, :3], b[:, :3]) - def test_nominal_default_palette(self, cat_vector, cat_order): + def test_nominal_default_palette(self, cat_vector, cat_order, using_polars): + if using_polars: + # get_mapping expected pd.Series + return m = Color().get_mapping(Nominal(), cat_vector) n = len(cat_order) From 30e1002d471b63a9d2e75cb9d70d7d642ebe251b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 14:35:36 +0100 Subject: [PATCH 24/32] wip --- tests/_core/test_properties.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/_core/test_properties.py b/tests/_core/test_properties.py index fc1961a179..45cdb35c19 100644 --- a/tests/_core/test_properties.py +++ b/tests/_core/test_properties.py @@ -24,6 +24,8 @@ from seaborn._compat import MarkerStyle, get_colormap from seaborn.palettes import color_palette +import os +pytest.skip(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1') class DataFixtures: From 19850282160679ff2d0771a03ea0c10249269006 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 14:51:07 +0100 Subject: [PATCH 25/32] increase test coverage even more --- tests/_core/test_properties.py | 8 +++++++- tests/_stats/test_counting.py | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/_core/test_properties.py b/tests/_core/test_properties.py index 45cdb35c19..827309ede8 100644 --- a/tests/_core/test_properties.py +++ b/tests/_core/test_properties.py @@ -25,7 +25,13 @@ from seaborn.palettes import color_palette import os -pytest.skip(os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1') +if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': + pytest.skip( + "Testing internal classes/methods, which are reached with non-pandas " + "dataframes already transformed to pandas", + allow_module_level=True + ) + class DataFixtures: diff --git a/tests/_stats/test_counting.py b/tests/_stats/test_counting.py index 7656654492..8b5d2c8f63 100644 --- a/tests/_stats/test_counting.py +++ b/tests/_stats/test_counting.py @@ -8,6 +8,14 @@ from seaborn._core.groupby import GroupBy from seaborn._stats.counting import Hist, Count +import os +if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': + pytest.skip( + "Testing internal classes/methods, which are reached with non-pandas " + "dataframes already transformed to pandas", + allow_module_level=True + ) + class TestCount: From b8584eea9c32d701e2a2e9c72901b4ec434ab1e0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 14:53:48 +0100 Subject: [PATCH 26/32] pre-commit run -a --- tests/_core/test_data.py | 4 +++- tests/_core/test_plot.py | 4 +++- tests/test_categorical.py | 3 +-- tests/test_core.py | 10 +++++++--- tests/test_distributions.py | 4 ++-- tests/test_relational.py | 8 ++++++-- tests/test_statistics.py | 8 ++++++-- 7 files changed, 28 insertions(+), 13 deletions(-) diff --git a/tests/_core/test_data.py b/tests/_core/test_data.py index a4117d20c6..80c8e082b1 100644 --- a/tests/_core/test_data.py +++ b/tests/_core/test_data.py @@ -122,7 +122,9 @@ def test_dict_as_data(self, long_dict, long_variables, using_polars): "vector_type", ["series", "numpy", "list"], ) - def test_vectors_various_types(self, long_df, long_variables, vector_type, using_polars): + def test_vectors_various_types( + self, long_df, long_variables, vector_type, using_polars + ): if using_polars: # no s_cat return diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index a706c2df5e..edfa51b765 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -1441,7 +1441,9 @@ def test_1d_with_order(self, long_df, dim, reorder, using_polars): else: order = reorder(categorical_order(long_df[key])) p = Plot(long_df).facet(**{dim: key, "order": order}) - self.check_facet_results_1d(p, long_df, dim, key, order, using_polars=using_polars) + self.check_facet_results_1d( + p, long_df, dim, key, order, using_polars=using_polars + ) def check_facet_results_2d(self, p, df, variables, order=None): diff --git a/tests/test_categorical.py b/tests/test_categorical.py index e9349d709a..6f58d6de6d 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -850,7 +850,7 @@ def test_positions_unfixed(self, long_df, cat_var, using_polars): kws["jitter"] = False ax = self.func(data=long_df, x=cat_var, y="y", native_scale=True, **kws) - + if using_polars: long_df = long_df.to_pandas() @@ -1204,7 +1204,6 @@ def test_jitter(self, long_df, orient, jitter, using_polars): x_var, y_var = val_var, cat_var cat_idx, val_idx = 1, 0 - ax = stripplot( data=long_df, x=x_var, y=y_var, jitter=jitter, ) diff --git a/tests/test_core.py b/tests/test_core.py index c703b7f822..27b13928a4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -211,7 +211,7 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): for val in [0, 1]: if using_polars: import polars as pl - data = long_df.filter(pl.col('c')==val) + data = long_df.filter(pl.col('c') == val) else: data = long_df[long_df["c"]] p = VectorPlotter( @@ -226,7 +226,9 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="t")) m = HueMapping(p) if using_polars: - assert m.levels == [pd.Timestamp(t) for t in long_df.to_pandas()["t"].unique()] + assert m.levels == [ + pd.Timestamp(t) for t in long_df.to_pandas()["t"].unique() + ] else: assert m.levels == [pd.Timestamp(t) for t in long_df["t"].unique()] assert m.map_type == "datetime" @@ -1453,7 +1455,9 @@ def test_scale_categorical(self, long_df, using_polars): p.scale_categorical("x") assert not p._var_ordered["x"] if using_polars: - assert_array_equal(p.var_levels["x"], categorical_order(long_df.to_pandas()["a"])) + assert_array_equal( + p.var_levels["x"], categorical_order(long_df.to_pandas()["a"]) + ) else: assert_array_equal(p.var_levels["x"], categorical_order(long_df["a"])) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 8786369d8f..17b552923f 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -2407,9 +2407,9 @@ def test_facet_multiple(self, long_df, multiple, using_polars): bins = np.linspace(0, 20, 5) if using_polars: import polars as pl - data = long_df.filter(pl.col('c')==0) + data = long_df.filter(pl.col('c') == 0) else: - data = long_df[long_df['c']==0] + data = long_df[long_df['c'] == 0] ax = histplot( data=data, x="x", hue="a", hue_order=["a", "b", "c"], diff --git a/tests/test_relational.py b/tests/test_relational.py index fb699cf073..53fb0de30b 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -676,7 +676,9 @@ def test_relplot_legend(self, long_df, using_polars): a_like_b = dict(zip(long_df["a"].unique(), long_df["b"].unique())) if using_polars: import polars as pl - long_df = long_df.with_columns(pl.col('a').map_dict(a_like_b).alias("a_like_b")) + long_df = long_df.with_columns( + pl.col('a').map_dict(a_like_b).alias("a_like_b") + ) else: long_df["a_like_b"] = long_df["a"].map(a_like_b) g = relplot( @@ -1198,7 +1200,9 @@ def test_orient(self, long_df, using_polars): assert_array_almost_equal(line.get_ydata(), expected["y"]) ribbon_y = ax1.collections[0].get_paths()[0].vertices[:, 1] if using_polars: - assert_array_equal(np.unique(ribbon_y), long_df.to_pandas()["y"].sort_values().unique()) + assert_array_equal( + np.unique(ribbon_y), long_df.to_pandas()["y"].sort_values().unique() + ) else: assert_array_equal(np.unique(ribbon_y), long_df["y"].sort_values().unique()) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index f4b3deee8d..af6cf5f371 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -537,8 +537,12 @@ def test_se_errorbars(self, long_df, using_polars): out = agg(long_df, "x") assert out["x"] == long_df["x"].mean() if using_polars: - assert out["xmin"] == (long_df["x"].mean() - 2 * long_df.to_pandas()["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + 2 * long_df.to_pandas()["x"].sem()) + assert out["xmin"] == ( + long_df["x"].mean() - 2 * long_df.to_pandas()["x"].sem() + ) + assert out["xmax"] == ( + long_df["x"].mean() + 2 * long_df.to_pandas()["x"].sem() + ) else: assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) From 5b2532e295458838d9169a9d99079922a8b4f6a1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 20 May 2023 15:04:52 +0100 Subject: [PATCH 27/32] skip estimateaggregator tests for the polars fixtures --- doc/_tutorial/properties.ipynb | 4 +-- seaborn/_statistics.py | 3 +- tests/test_statistics.py | 60 +++++++++++++++++++++------------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/doc/_tutorial/properties.ipynb b/doc/_tutorial/properties.ipynb index e2638742e5..70de0e9ea2 100644 --- a/doc/_tutorial/properties.ipynb +++ b/doc/_tutorial/properties.ipynb @@ -1105,9 +1105,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "py310", "language": "python", - "name": "python3" + "name": "py310" }, "language_info": { "codemirror_mode": { diff --git a/seaborn/_statistics.py b/seaborn/_statistics.py index 5fd08a0799..ea9c15d26a 100644 --- a/seaborn/_statistics.py +++ b/seaborn/_statistics.py @@ -35,7 +35,7 @@ class instantiation. _no_scipy = True from .algorithms import bootstrap -from .utils import _check_argument, try_convert_to_pandas +from .utils import _check_argument class KDE: @@ -481,7 +481,6 @@ def __init__(self, estimator, errorbar=None, **boot_kws): def __call__(self, data, var): """Aggregate over `var` column of `data` with estimate and error interval.""" - data = try_convert_to_pandas(data) vals = data[var] if callable(self.estimator): # You would think we could pass to vals.agg, and yet: diff --git a/tests/test_statistics.py b/tests/test_statistics.py index af6cf5f371..6740345cc9 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -499,14 +499,22 @@ def test_bivariate_error(self, x, y): class TestEstimateAggregator: - def test_func_estimator(self, long_df): + def test_func_estimator(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return func = np.mean agg = EstimateAggregator(func) out = agg(long_df, "x") assert out["x"] == func(long_df["x"].to_numpy()) - def test_name_estimator(self, long_df): + def test_name_estimator(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return agg = EstimateAggregator("mean") out = agg(long_df, "x") @@ -522,32 +530,28 @@ def func(x): assert out["x"] == func(long_df["x"]) def test_se_errorbars(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return agg = EstimateAggregator("mean", "se") out = agg(long_df, "x") assert out["x"] == long_df["x"].mean() - if using_polars: - assert out["xmin"] == (long_df["x"].mean() - long_df.to_pandas()["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + long_df.to_pandas()["x"].sem()) - else: - assert out["xmin"] == (long_df["x"].mean() - long_df["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + long_df["x"].sem()) + assert out["xmin"] == (long_df["x"].mean() - long_df["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + long_df["x"].sem()) agg = EstimateAggregator("mean", ("se", 2)) out = agg(long_df, "x") assert out["x"] == long_df["x"].mean() + assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) + assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) + + def test_sd_errorbars(self, long_df, using_polars): if using_polars: - assert out["xmin"] == ( - long_df["x"].mean() - 2 * long_df.to_pandas()["x"].sem() - ) - assert out["xmax"] == ( - long_df["x"].mean() + 2 * long_df.to_pandas()["x"].sem() - ) - else: - assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) - assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) - - def test_sd_errorbars(self, long_df): + # Testing internal class which is reached when + # data has already been converted to pandas + return agg = EstimateAggregator("mean", "sd") out = agg(long_df, "x") @@ -561,7 +565,11 @@ def test_sd_errorbars(self, long_df): assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].std()) assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].std()) - def test_pi_errorbars(self, long_df): + def test_pi_errorbars(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return agg = EstimateAggregator("mean", "pi") out = agg(long_df, "y") @@ -573,7 +581,11 @@ def test_pi_errorbars(self, long_df): assert out["ymin"] == np.percentile(long_df["y"], 25) assert out["ymax"] == np.percentile(long_df["y"], 75) - def test_ci_errorbars(self, long_df): + def test_ci_errorbars(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return agg = EstimateAggregator("mean", "ci", n_boot=100000, seed=0) out = agg(long_df, "y") @@ -598,7 +610,11 @@ def test_ci_errorbars(self, long_df): out_test = agg_ref(long_df, "y") assert_array_equal(out_orig, out_test) - def test_custom_errorbars(self, long_df): + def test_custom_errorbars(self, long_df, using_polars): + if using_polars: + # Testing internal class which is reached when + # data has already been converted to pandas + return f = lambda x: (x.min(), x.max()) # noqa: E731 agg = EstimateAggregator("mean", f) From 4897344b72e647706ebdd04190ef4ae027211994 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 21 May 2023 16:45:15 +0100 Subject: [PATCH 28/32] simplify --- seaborn/_oldcore.py | 1 - seaborn/axisgrid.py | 2 - seaborn/categorical.py | 1 - seaborn/relational.py | 4 - tests/_core/test_data.py | 97 +++---------- tests/_core/test_plot.py | 212 ++++++++------------------- tests/_core/test_properties.py | 17 +-- tests/_stats/test_counting.py | 8 -- tests/conftest.py | 68 +++------ tests/test_categorical.py | 256 +++++++++------------------------ tests/test_core.py | 135 +++++------------ tests/test_distributions.py | 73 +++------- tests/test_relational.py | 192 +++++++------------------ tests/test_statistics.py | 44 ++---- 14 files changed, 283 insertions(+), 827 deletions(-) diff --git a/seaborn/_oldcore.py b/seaborn/_oldcore.py index 506561cd6b..8bb0e97cb3 100644 --- a/seaborn/_oldcore.py +++ b/seaborn/_oldcore.py @@ -794,7 +794,6 @@ def _assign_variables_wideform(self, data=None, **kwargs): else: # Otherwise assume we have some collection of vectors. - data = try_convert_to_pandas(data) # Handle Python sequences such that entries end up in the columns, # not in the rows, of the intermediate wide DataFrame. diff --git a/seaborn/axisgrid.py b/seaborn/axisgrid.py index 5b7e3bebca..bf41fbb69d 100644 --- a/seaborn/axisgrid.py +++ b/seaborn/axisgrid.py @@ -2090,8 +2090,6 @@ def pairplot( # Avoid circular import from .distributions import histplot, kdeplot - data = utils.try_convert_to_pandas(data) - # Handle deprecations if size is not None: height = size diff --git a/seaborn/categorical.py b/seaborn/categorical.py index 2972813ba8..11094a5823 100644 --- a/seaborn/categorical.py +++ b/seaborn/categorical.py @@ -2490,7 +2490,6 @@ def stripplot( hue_norm=None, native_scale=False, formatter=None, legend="auto", ax=None, **kwargs ): - data = utils.try_convert_to_pandas(data) p = _CategoricalPlotterNew( data=data, variables=_CategoricalPlotterNew.get_semantics(locals()), diff --git a/seaborn/relational.py b/seaborn/relational.py index 84305d84ab..8bfc130759 100644 --- a/seaborn/relational.py +++ b/seaborn/relational.py @@ -13,7 +13,6 @@ adjust_legend_subtitles, _default_color, _deprecate_ci, - try_convert_to_pandas, ) from ._statistics import EstimateAggregator from .axisgrid import FacetGrid, _facet_docs @@ -705,7 +704,6 @@ def scatterplot( markers=True, style_order=None, legend="auto", ax=None, **kwargs ): - data = try_convert_to_pandas(data) variables = _ScatterPlotter.get_semantics(locals()) p = _ScatterPlotter(data=data, variables=variables, legend=legend) @@ -801,8 +799,6 @@ def relplot( legend="auto", kind="scatter", height=5, aspect=1, facet_kws=None, **kwargs ): - data = try_convert_to_pandas(data) - if kind == "scatter": plotter = _ScatterPlotter diff --git a/tests/_core/test_data.py b/tests/_core/test_data.py index 80c8e082b1..aeffc9ed7d 100644 --- a/tests/_core/test_data.py +++ b/tests/_core/test_data.py @@ -19,10 +19,7 @@ def long_variables(self): variables = dict(x="x", y="y", color="a", size="z", style="s_cat") return variables - def test_named_vectors(self, long_df, long_variables, using_polars): - if using_polars: - # no s_cat - return + def test_named_vectors(self, long_df, long_variables): p = PlotData(long_df, long_variables) assert p.source_data is long_df @@ -31,10 +28,7 @@ def test_named_vectors(self, long_df, long_variables, using_polars): assert p.names[key] == val assert_vector_equal(p.frame[key], long_df[val]) - def test_named_and_given_vectors(self, long_df, long_variables, using_polars): - if using_polars: - # no s_cat - return + def test_named_and_given_vectors(self, long_df, long_variables): long_variables["y"] = long_df["b"] long_variables["size"] = long_df["z"].to_numpy() @@ -53,10 +47,7 @@ def test_named_and_given_vectors(self, long_df, long_variables, using_polars): assert p.ids["y"] == "b" assert p.ids["size"] == id(long_variables["size"]) - def test_index_as_variable(self, long_df, long_variables, using_polars): - if using_polars: - # no index - return + def test_index_as_variable(self, long_df, long_variables): index = pd.Index(np.arange(len(long_df)) * 2 + 10, name="i", dtype=int) long_variables["x"] = "i" @@ -65,10 +56,7 @@ def test_index_as_variable(self, long_df, long_variables, using_polars): assert p.names["x"] == p.ids["x"] == "i" assert_vector_equal(p.frame["x"], pd.Series(index, index)) - def test_multiindex_as_variables(self, long_df, long_variables, using_polars): - if using_polars: - # no index - return + def test_multiindex_as_variables(self, long_df, long_variables): index_i = pd.Index(np.arange(len(long_df)) * 2 + 10, name="i", dtype=int) index_j = pd.Index(np.arange(len(long_df)) * 3 + 5, name="j", dtype=int) @@ -108,10 +96,7 @@ def test_tuple_as_variable_key(self, rng): assert_vector_equal(p.frame[var], df[key]) assert p.names[var] == p.ids[var] == str(key) - def test_dict_as_data(self, long_dict, long_variables, using_polars): - if using_polars: - # no s_cat - return + def test_dict_as_data(self, long_dict, long_variables): p = PlotData(long_dict, long_variables) assert p.source_data is long_dict @@ -122,12 +107,7 @@ def test_dict_as_data(self, long_dict, long_variables, using_polars): "vector_type", ["series", "numpy", "list"], ) - def test_vectors_various_types( - self, long_df, long_variables, vector_type, using_polars - ): - if using_polars: - # no s_cat - return + def test_vectors_various_types(self, long_df, long_variables, vector_type): variables = {key: long_df[val] for key, val in long_variables.items()} if vector_type == "numpy": @@ -229,10 +209,7 @@ def test_key_with_no_data_raises(self): with pytest.raises(ValueError, match=msg): PlotData(None, {var: key}) - def test_data_vector_different_lengths_raises(self, long_df, using_polars): - if using_polars: - # Does not raise (error specifically checks for pandas.DataFrame) - return + def test_data_vector_different_lengths_raises(self, long_df): vector = np.arange(len(long_df) - 5) msg = "Length of ndarray vectors must match length of `data`" @@ -257,14 +234,12 @@ def test_contains_operation(self, long_df): assert "y" not in p assert "color" in p - def test_join_add_variable(self, long_df, using_polars): + def test_join_add_variable(self, long_df): v1 = {"x": "x", "y": "f"} v2 = {"color": "a"} p1 = PlotData(long_df, v1) - if using_polars: - long_df = long_df.to_pandas() p2 = p1.join(None, v2) for var, key in dict(**v1, **v2).items(): @@ -272,14 +247,12 @@ def test_join_add_variable(self, long_df, using_polars): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_replace_variable(self, long_df, using_polars): + def test_join_replace_variable(self, long_df): v1 = {"x": "x", "y": "y"} v2 = {"y": "s"} p1 = PlotData(long_df, v1) - if using_polars: - long_df = long_df.to_pandas() p2 = p1.join(None, v2) variables = v1.copy() @@ -303,14 +276,12 @@ def test_join_remove_variable(self, long_df): assert drop_var not in p2.frame assert drop_var not in p2.names - def test_join_all_operations(self, long_df, using_polars): + def test_join_all_operations(self, long_df): v1 = {"x": "x", "y": "y", "color": "a"} v2 = {"y": "s", "size": "s", "color": None} p1 = PlotData(long_df, v1) - if using_polars: - long_df = long_df.to_pandas() p2 = p1.join(None, v2) for var, key in v2.items(): @@ -320,14 +291,12 @@ def test_join_all_operations(self, long_df, using_polars): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_all_operations_same_data(self, long_df, using_polars): + def test_join_all_operations_same_data(self, long_df): v1 = {"x": "x", "y": "y", "color": "a"} v2 = {"y": "s", "size": "s", "color": None} p1 = PlotData(long_df, v1) - if using_polars: - long_df = long_df.to_pandas() p2 = p1.join(long_df, v2) for var, key in v2.items(): @@ -337,7 +306,7 @@ def test_join_all_operations_same_data(self, long_df, using_polars): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_add_variable_new_data(self, long_df, using_polars): + def test_join_add_variable_new_data(self, long_df): d1 = long_df[["x", "y"]] d2 = long_df[["a", "s"]] @@ -346,15 +315,13 @@ def test_join_add_variable_new_data(self, long_df, using_polars): v2 = {"color": "a"} p1 = PlotData(d1, v1) - if using_polars: - long_df = long_df.to_pandas() p2 = p1.join(d2, v2) for var, key in dict(**v1, **v2).items(): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_replace_variable_new_data(self, long_df, using_polars): + def test_join_replace_variable_new_data(self, long_df): d1 = long_df[["x", "y"]] d2 = long_df[["a", "s"]] @@ -364,8 +331,6 @@ def test_join_replace_variable_new_data(self, long_df, using_polars): p1 = PlotData(d1, v1) p2 = p1.join(d2, v2) - if using_polars: - long_df = long_df.to_pandas() variables = v1.copy() variables.update(v2) @@ -374,14 +339,10 @@ def test_join_replace_variable_new_data(self, long_df, using_polars): assert p2.names[var] == key assert_vector_equal(p2.frame[var], long_df[key]) - def test_join_add_variable_different_index(self, long_df, using_polars): + def test_join_add_variable_different_index(self, long_df): - if using_polars: - d1 = long_df[:70] - d2 = long_df[30:] - else: - d1 = long_df.iloc[:70] - d2 = long_df.iloc[30:] + d1 = long_df.iloc[:70] + d2 = long_df.iloc[30:] v1 = {"x": "a"} v2 = {"y": "z"} @@ -391,9 +352,6 @@ def test_join_add_variable_different_index(self, long_df, using_polars): (var1, key1), = v1.items() (var2, key2), = v2.items() - if using_polars: - d1 = d1.to_pandas() - d2 = d2.to_pandas() assert_vector_equal(p2.frame.loc[d1.index, var1], d1[key1]) assert_vector_equal(p2.frame.loc[d2.index, var2], d2[key2]) @@ -401,14 +359,10 @@ def test_join_add_variable_different_index(self, long_df, using_polars): assert p2.frame.loc[d2.index.difference(d1.index), var1].isna().all() assert p2.frame.loc[d1.index.difference(d2.index), var2].isna().all() - def test_join_replace_variable_different_index(self, long_df, using_polars): + def test_join_replace_variable_different_index(self, long_df): - if using_polars: - d1 = long_df[:70] - d2 = long_df[30:] - else: - d1 = long_df.iloc[:70] - d2 = long_df.iloc[30:] + d1 = long_df.iloc[:70] + d2 = long_df.iloc[30:] var = "x" k1, k2 = "a", "z" @@ -420,27 +374,18 @@ def test_join_replace_variable_different_index(self, long_df, using_polars): (var1, key1), = v1.items() (var2, key2), = v2.items() - if using_polars: - d1 = d1.to_pandas() - d2 = d2.to_pandas() assert_vector_equal(p2.frame.loc[d2.index, var], d2[k2]) assert p2.frame.loc[d1.index.difference(d2.index), var].isna().all() - def test_join_subset_data_inherit_variables(self, long_df, using_polars): + def test_join_subset_data_inherit_variables(self, long_df): - if using_polars: - sub_df = long_df.filter(long_df["a"] == "b") - else: - sub_df = long_df[long_df["a"] == "b"] + sub_df = long_df[long_df["a"] == "b"] var = "y" p1 = PlotData(long_df, {var: var}) p2 = p1.join(sub_df, None) - if using_polars: - sub_df = sub_df.to_pandas() - long_df = long_df.to_pandas() assert_vector_equal(p2.frame.loc[sub_df.index, var], sub_df[var]) assert p2.frame.loc[long_df.index.difference(sub_df.index), var].isna().all() diff --git a/tests/_core/test_plot.py b/tests/_core/test_plot.py index edfa51b765..3da7aab583 100644 --- a/tests/_core/test_plot.py +++ b/tests/_core/test_plot.py @@ -81,50 +81,37 @@ def test_empty(self): assert p._data.source_data is None assert p._data.source_vars == {} - def test_data_only(self, long_df, using_polars): - if using_polars: - # source_data was tranformed to pandas - return + def test_data_only(self, long_df): p = Plot(long_df) assert p._data.source_data is long_df assert p._data.source_vars == {} - def test_df_and_named_variables(self, long_df, using_polars): + def test_df_and_named_variables(self, long_df): variables = {"x": "a", "y": "z"} p = Plot(long_df, **variables) - if using_polars: - long_df = long_df.to_pandas() for var, col in variables.items(): assert_vector_equal(p._data.frame[var], long_df[col]) - if not using_polars: - assert p._data.source_data is long_df + assert p._data.source_data is long_df assert p._data.source_vars.keys() == variables.keys() - def test_df_and_mixed_variables(self, long_df, using_polars): + def test_df_and_mixed_variables(self, long_df): variables = {"x": "a", "y": long_df["z"]} p = Plot(long_df, **variables) - if using_polars: - long_df = long_df.to_pandas() - variables = {"x": "a", "y": long_df["z"]} for var, col in variables.items(): if isinstance(col, str): assert_vector_equal(p._data.frame[var], long_df[col]) else: assert_vector_equal(p._data.frame[var], col) - if not using_polars: - assert p._data.source_data is long_df + assert p._data.source_data is long_df assert p._data.source_vars.keys() == variables.keys() - def test_vector_variables_only(self, long_df, using_polars): + def test_vector_variables_only(self, long_df): variables = {"x": long_df["a"], "y": long_df["z"]} p = Plot(**variables) - if using_polars: - long_df = long_df.to_pandas() - variables = {"x": long_df["a"], "y": long_df["z"]} for var, col in variables.items(): assert_vector_equal(p._data.frame[var], col) assert p._data.source_data is None @@ -159,11 +146,10 @@ def test_positional_and_named_xy(self, long_df, var): with pytest.raises(TypeError, match=err): Plot(long_df, "a", "b", **{var: "c"}) - def test_positional_data_x_y(self, long_df, using_polars): + def test_positional_data_x_y(self, long_df): p = Plot(long_df, "a", "b") - if not using_polars: - assert p._data.source_data is long_df + assert p._data.source_data is long_df assert list(p._data.source_vars) == ["x", "y"] def test_positional_x_y(self, long_df): @@ -172,11 +158,10 @@ def test_positional_x_y(self, long_df): assert p._data.source_data is None assert list(p._data.source_vars) == ["x", "y"] - def test_positional_data_x(self, long_df, using_polars): + def test_positional_data_x(self, long_df): p = Plot(long_df, "a") - if not using_polars: - assert p._data.source_data is long_df + assert p._data.source_data is long_df assert list(p._data.source_vars) == ["x"] def test_positional_x(self, long_df): @@ -206,44 +191,35 @@ def test_without_data(self, long_df): layer, = p._layers assert_frame_equal(p._data.frame, layer["data"].frame, check_dtype=False) - def test_with_new_variable_by_name(self, long_df, using_polars): + def test_with_new_variable_by_name(self, long_df): p = Plot(long_df, x="x").add(MockMark(), y="y").plot() - if using_polars: - long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_new_variable_by_vector(self, long_df, using_polars): + def test_with_new_variable_by_vector(self, long_df): p = Plot(long_df, x="x").add(MockMark(), y=long_df["y"]).plot() - if using_polars: - long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_late_data_definition(self, long_df, using_polars): + def test_with_late_data_definition(self, long_df): p = Plot().add(MockMark(), data=long_df, x="x", y="y").plot() - if using_polars: - long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": assert_vector_equal(layer["data"].frame[var], long_df[var]) - def test_with_new_data_definition(self, long_df, using_polars): + def test_with_new_data_definition(self, long_df): long_df_sub = long_df.sample(frac=.5) p = Plot(long_df, x="x", y="y").add(MockMark(), data=long_df_sub).plot() - if using_polars: - long_df = long_df.to_pandas() - long_df_sub = long_df_sub.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x", "y"] for var in "xy": @@ -251,11 +227,9 @@ def test_with_new_data_definition(self, long_df, using_polars): layer["data"].frame[var], long_df_sub[var].reindex(long_df.index) ) - def test_drop_variable(self, long_df, using_polars): + def test_drop_variable(self, long_df): p = Plot(long_df, x="x", y="y").add(MockMark(), y=None).plot() - if using_polars: - long_df = long_df.to_pandas() layer, = p._layers assert layer["data"].frame.columns.to_list() == ["x"] assert_vector_equal(layer["data"].frame["x"], long_df["x"], check_dtype=False) @@ -422,16 +396,14 @@ def test_log_scale_name(self): assert ax.get_xscale() == "log" assert ax.get_yscale() == "linear" - def test_mark_data_log_transform_is_inverted(self, long_df, using_polars): + def test_mark_data_log_transform_is_inverted(self, long_df): col = "z" m = MockMark() Plot(long_df, x=col).scale(x="log").add(m).plot() - if using_polars: - long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df[col]) - def test_mark_data_log_transfrom_with_stat(self, long_df, using_polars): + def test_mark_data_log_transfrom_with_stat(self, long_df): class Mean(Stat): group_by_orient = True @@ -446,8 +418,6 @@ def __call__(self, data, groupby, orient, scales): s = Mean() Plot(long_df, x=grouper, y=col).scale(y="log").add(m, s).plot() - if using_polars: - long_df = long_df.to_pandas() expected = ( long_df[col] @@ -459,25 +429,21 @@ def __call__(self, data, groupby, orient, scales): ) assert_vector_equal(m.passed_data[0]["y"], expected) - def test_mark_data_from_categorical(self, long_df, using_polars): + def test_mark_data_from_categorical(self, long_df): col = "a" m = MockMark() Plot(long_df, x=col).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() levels = categorical_order(long_df[col]) level_map = {x: float(i) for i, x in enumerate(levels)} assert_vector_equal(m.passed_data[0]["x"], long_df[col].map(level_map)) - def test_mark_data_from_datetime(self, long_df, using_polars): + def test_mark_data_from_datetime(self, long_df): col = "t" m = MockMark() Plot(long_df, x=col).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() expected = long_df[col].map(mpl.dates.date2num) assert_vector_equal(m.passed_data[0]["x"], expected) @@ -732,7 +698,7 @@ def test_single_split_single_layer(self, long_df): for col in p._data.frame: assert_series_equal(m.passed_data[0][col], p._data.frame[col]) - def test_single_split_multi_layer(self, long_df, using_polars): + def test_single_split_multi_layer(self, long_df): vs = [{"color": "a", "linewidth": "z"}, {"color": "b", "pattern": "c"}] @@ -741,8 +707,6 @@ class NoGroupingMark(MockMark): ms = [NoGroupingMark(), NoGroupingMark()] Plot(long_df).add(ms[0], **vs[0]).add(ms[1], **vs[1]).plot() - if using_polars: - long_df = long_df.to_pandas() for m, v in zip(ms, vs): for var, col in v.items(): @@ -787,15 +751,13 @@ def check_splits_multi_vars( "color", # explicitly declared on the Mark "group", # implicitly used for all Mark classes ]) - def test_one_grouping_variable(self, long_df, split_var, using_polars): + def test_one_grouping_variable(self, long_df, split_var): split_col = "a" data_vars = {"x": "f", "y": "z", split_var: split_col} m = MockMark() p = Plot(long_df, **data_vars).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() split_keys = categorical_order(long_df[split_col]) sub, *_ = p._subplots @@ -804,7 +766,7 @@ def test_one_grouping_variable(self, long_df, split_var, using_polars): long_df, m, data_vars, split_var, split_col, split_keys ) - def test_two_grouping_variables(self, long_df, using_polars): + def test_two_grouping_variables(self, long_df): split_vars = ["color", "group"] split_cols = ["a", "b"] @@ -812,8 +774,6 @@ def test_two_grouping_variables(self, long_df, using_polars): m = MockMark() p = Plot(long_df, **data_vars).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() split_keys = [categorical_order(long_df[col]) for col in split_cols] sub, *_ = p._subplots @@ -830,7 +790,7 @@ def test_specified_width(self, long_df): Plot(long_df, x="x", y="y").add(m, width="z").plot() assert_array_almost_equal(m.passed_data[0]["width"], long_df["z"]) - def test_facets_no_subgroups(self, long_df, using_polars): + def test_facets_no_subgroups(self, long_df): split_var = "col" split_col = "b" @@ -838,8 +798,6 @@ def test_facets_no_subgroups(self, long_df, using_polars): m = MockMark() p = Plot(long_df, **data_vars).facet(**{split_var: split_col}).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() split_keys = categorical_order(long_df[split_col]) assert m.passed_axes == list(p._figure.axes) @@ -847,7 +805,7 @@ def test_facets_no_subgroups(self, long_df, using_polars): long_df, m, data_vars, split_var, split_col, split_keys ) - def test_facets_one_subgroup(self, long_df, using_polars): + def test_facets_one_subgroup(self, long_df): facet_var, facet_col = fx = "col", "a" group_var, group_col = gx = "group", "b" @@ -861,8 +819,6 @@ def test_facets_one_subgroup(self, long_df, using_polars): .add(m) .plot() ) - if using_polars: - long_df = long_df.to_pandas() split_keys = [categorical_order(long_df[col]) for col in [facet_col, group_col]] assert m.passed_axes == [ @@ -874,15 +830,13 @@ def test_facets_one_subgroup(self, long_df, using_polars): long_df, m, data_vars, split_vars, split_cols, split_keys ) - def test_layer_specific_facet_disabling(self, long_df, using_polars): + def test_layer_specific_facet_disabling(self, long_df): axis_vars = {"x": "y", "y": "z"} row_var = "a" m = MockMark() p = Plot(long_df, **axis_vars).facet(row=row_var).add(m, row=None).plot() - if using_polars: - long_df = long_df.to_pandas() col_levels = categorical_order(long_df[row_var]) assert len(p._figure.axes) == len(col_levels) @@ -891,15 +845,13 @@ def test_layer_specific_facet_disabling(self, long_df, using_polars): for var, col in axis_vars.items(): assert_vector_equal(data[var], long_df[col]) - def test_paired_variables(self, long_df, using_polars): + def test_paired_variables(self, long_df): x = ["x", "y"] y = ["f", "z"] m = MockMark() Plot(long_df).pair(x, y).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() var_product = itertools.product(x, y) @@ -907,34 +859,26 @@ def test_paired_variables(self, long_df, using_polars): assert_vector_equal(data["x"], long_df[x_i].astype(float)) assert_vector_equal(data["y"], long_df[y_i].astype(float)) - def test_paired_one_dimension(self, long_df, using_polars): + def test_paired_one_dimension(self, long_df): x = ["y", "z"] m = MockMark() Plot(long_df).pair(x).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() for data, x_i in zip(m.passed_data, x): assert_vector_equal(data["x"], long_df[x_i].astype(float)) - def test_paired_variables_one_subset(self, long_df, using_polars): + def test_paired_variables_one_subset(self, long_df): x = ["x", "y"] y = ["f", "z"] group = "a" - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.col('x').cast(pl.Float64)) - else: - long_df["x"] = long_df["x"].astype(float) # simplify vector comparison + long_df["x"] = long_df["x"].astype(float) # simplify vector comparison m = MockMark() Plot(long_df, group=group).pair(x, y).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() groups = categorical_order(long_df[group]) var_product = itertools.product(x, y, groups) @@ -944,7 +888,7 @@ def test_paired_variables_one_subset(self, long_df, using_polars): assert_vector_equal(data["x"], long_df.loc[rows, x_i]) assert_vector_equal(data["y"], long_df.loc[rows, y_i]) - def test_paired_and_faceted(self, long_df, using_polars): + def test_paired_and_faceted(self, long_df): x = ["y", "z"] y = "f" @@ -952,8 +896,6 @@ def test_paired_and_faceted(self, long_df, using_polars): m = MockMark() Plot(long_df, y=y).facet(row=row).pair(x).add(m).plot() - if using_polars: - long_df = long_df.to_pandas() facets = categorical_order(long_df[row]) var_product = itertools.product(x, facets) @@ -990,60 +932,43 @@ def test_theme_validation(self): with pytest.raises(KeyError, match="not.a.key is not a valid rc"): p.theme({"not.a.key": True}) - def test_stat(self, long_df, using_polars): + def test_stat(self, long_df): - if not using_polars: - orig_df = long_df.copy(deep=True) - else: - orig_df = long_df.to_pandas() + orig_df = long_df.copy(deep=True) m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg()).plot() - if using_polars: - long_df = long_df.to_pandas() expected = long_df.groupby("a", sort=False)["z"].mean().reset_index(drop=True) assert_vector_equal(m.passed_data[0]["y"], expected) assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_move(self, long_df, using_polars): + def test_move(self, long_df): - if not using_polars: - orig_df = long_df.copy(deep=True) - else: - orig_df = long_df.to_pandas() + orig_df = long_df.copy(deep=True) m = MockMark() Plot(long_df, x="z", y="z").add(m, Shift(x=1)).plot() - if using_polars: - long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["z"] + 1) assert_vector_equal(m.passed_data[0]["y"], long_df["z"]) assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_stat_and_move(self, long_df, using_polars): + def test_stat_and_move(self, long_df): m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg(), Shift(y=1)).plot() - if using_polars: - long_df = long_df.to_pandas() expected = long_df.groupby("a", sort=False)["z"].mean().reset_index(drop=True) assert_vector_equal(m.passed_data[0]["y"], expected + 1) - def test_stat_log_scale(self, long_df, using_polars): + def test_stat_log_scale(self, long_df): - if not using_polars: - orig_df = long_df.copy(deep=True) - else: - orig_df = long_df.to_pandas() + orig_df = long_df.copy(deep=True) m = MockMark() Plot(long_df, x="a", y="z").add(m, Agg()).scale(y="log").plot() - if using_polars: - long_df = long_df.to_pandas() x = long_df["a"] y = np.log10(long_df["z"]) @@ -1052,31 +977,25 @@ def test_stat_log_scale(self, long_df, using_polars): assert_frame_equal(long_df, orig_df) # Test data was not mutated - def test_move_log_scale(self, long_df, using_polars): + def test_move_log_scale(self, long_df): m = MockMark() Plot( long_df, x="z", y="z" ).scale(x="log").add(m, Shift(x=-1)).plot() - if using_polars: - long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["z"] / 10) - def test_multi_move(self, long_df, using_polars): + def test_multi_move(self, long_df): m = MockMark() move_stack = [Shift(1), Shift(2)] Plot(long_df, x="x", y="y").add(m, *move_stack).plot() - if using_polars: - long_df = long_df.to_pandas() assert_vector_equal(m.passed_data[0]["x"], long_df["x"] + 3) - def test_multi_move_with_pairing(self, long_df, using_polars): + def test_multi_move_with_pairing(self, long_df): m = MockMark() move_stack = [Shift(1), Shift(2)] Plot(long_df, x="x").pair(y=["y", "z"]).add(m, *move_stack).plot() - if using_polars: - long_df = long_df.to_pandas() for frame in m.passed_data: assert_vector_equal(frame["x"], long_df["x"] + 3) @@ -1404,11 +1323,9 @@ def reorder(self, request): "expand": lambda x: x + ["z"], }[request.param] - def check_facet_results_1d(self, p, df, dim, key, order=None, using_polars=False): + def check_facet_results_1d(self, p, df, dim, key, order=None): p = p.plot() - if using_polars: - df = df.to_pandas() order = categorical_order(df[key], order) assert len(p._figure.axes) == len(order) @@ -1421,29 +1338,24 @@ def check_facet_results_1d(self, p, df, dim, key, order=None, using_polars=False assert subplot["ax"].get_title() == f"{level}" assert_gridspec_shape(subplot["ax"], **{f"n{dim}s": len(order)}) - def test_1d(self, long_df, dim, using_polars): + def test_1d(self, long_df, dim): key = "a" p = Plot(long_df).facet(**{dim: key}) - self.check_facet_results_1d(p, long_df, dim, key, using_polars=using_polars) + self.check_facet_results_1d(p, long_df, dim, key) - def test_1d_as_vector(self, long_df, dim, using_polars): + def test_1d_as_vector(self, long_df, dim): key = "a" p = Plot(long_df).facet(**{dim: long_df[key]}) - self.check_facet_results_1d(p, long_df, dim, key, using_polars=using_polars) + self.check_facet_results_1d(p, long_df, dim, key) - def test_1d_with_order(self, long_df, dim, reorder, using_polars): + def test_1d_with_order(self, long_df, dim, reorder): key = "a" - if using_polars: - order = reorder(categorical_order(long_df.to_pandas()[key])) - else: - order = reorder(categorical_order(long_df[key])) + order = reorder(categorical_order(long_df[key])) p = Plot(long_df).facet(**{dim: key, "order": order}) - self.check_facet_results_1d( - p, long_df, dim, key, order, using_polars=using_polars - ) + self.check_facet_results_1d(p, long_df, dim, key, order) def check_facet_results_2d(self, p, df, variables, order=None): @@ -1465,27 +1377,19 @@ def check_facet_results_2d(self, p, df, variables, order=None): subplot["axes"], len(levels["row"]), len(levels["col"]) ) - def test_2d(self, long_df, using_polars): + def test_2d(self, long_df): variables = {"row": "a", "col": "c"} p = Plot(long_df).facet(**variables) - if using_polars: - long_df = long_df.to_pandas() self.check_facet_results_2d(p, long_df, variables) - def test_2d_with_order(self, long_df, reorder, using_polars): + def test_2d_with_order(self, long_df, reorder): variables = {"row": "a", "col": "c"} - if using_polars: - order = { - dim: reorder(categorical_order(long_df.to_pandas()[key])) - for dim, key in variables.items() - } - else: - order = { - dim: reorder(categorical_order(long_df[key])) - for dim, key in variables.items() - } + order = { + dim: reorder(categorical_order(long_df[key])) + for dim, key in variables.items() + } p = Plot(long_df).facet(**variables, order=order) self.check_facet_results_2d(p, long_df, variables, order) @@ -1509,7 +1413,7 @@ def test_layout_algo(self, algo): sep2 = bb22.corners()[0, 0] - bb21.corners()[2, 0] assert sep1 <= sep2 - def test_axis_sharing(self, long_df, using_polars): + def test_axis_sharing(self, long_df): variables = {"row": "a", "col": "c"} @@ -1528,8 +1432,6 @@ def test_axis_sharing(self, long_df, using_polars): assert not any(shareset.joined(root, ax) for ax in other) p3 = p.share(x="col", y="row").plot() - if using_polars: - long_df = long_df.to_pandas() shape = ( len(categorical_order(long_df[variables["row"]])), len(categorical_order(long_df[variables["col"]])), @@ -1647,15 +1549,13 @@ def test_with_no_variables(self, long_df): p = Plot(long_df).pair().plot() assert len(p._figure.axes) == 1 - def test_with_facets(self, long_df, using_polars): + def test_with_facets(self, long_df): x = "x" y = ["y", "z"] col = "a" p = Plot(long_df, x=x).facet(col).pair(y=y).plot() - if using_polars: - long_df = long_df.to_pandas() facet_levels = categorical_order(long_df[col]) dims = itertools.product(y, facet_levels) diff --git a/tests/_core/test_properties.py b/tests/_core/test_properties.py index 827309ede8..b4764762eb 100644 --- a/tests/_core/test_properties.py +++ b/tests/_core/test_properties.py @@ -24,14 +24,6 @@ from seaborn._compat import MarkerStyle, get_colormap from seaborn.palettes import color_palette -import os -if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': - pytest.skip( - "Testing internal classes/methods, which are reached with non-pandas " - "dataframes already transformed to pandas", - allow_module_level=True - ) - class DataFixtures: @@ -48,9 +40,7 @@ def cat_vector(self, long_df): return long_df["a"] @pytest.fixture - def cat_order(self, cat_vector, using_polars): - if using_polars: - return categorical_order(cat_vector.to_pandas()) + def cat_order(self, cat_vector): return categorical_order(cat_vector) @pytest.fixture @@ -90,10 +80,7 @@ class TestColor(DataFixtures): def assert_same_rgb(self, a, b): assert_array_equal(a[:, :3], b[:, :3]) - def test_nominal_default_palette(self, cat_vector, cat_order, using_polars): - if using_polars: - # get_mapping expected pd.Series - return + def test_nominal_default_palette(self, cat_vector, cat_order): m = Color().get_mapping(Nominal(), cat_vector) n = len(cat_order) diff --git a/tests/_stats/test_counting.py b/tests/_stats/test_counting.py index 8b5d2c8f63..7656654492 100644 --- a/tests/_stats/test_counting.py +++ b/tests/_stats/test_counting.py @@ -8,14 +8,6 @@ from seaborn._core.groupby import GroupBy from seaborn._stats.counting import Hist, Count -import os -if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': - pytest.skip( - "Testing internal classes/methods, which are reached with non-pandas " - "dataframes already transformed to pandas", - allow_module_level=True - ) - class TestCount: diff --git a/tests/conftest.py b/tests/conftest.py index 0bef4859f0..01d93a4941 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,26 +1,9 @@ -import os - import numpy as np import pandas as pd import pytest -def maybe_convert_to_polars(df): - # If the SEABORN_TEST_INTERCHANGE_PROTOCOL=1 environment variable - # is set, then check tests work when starting with a non-pandas - # DataFrame (here, polars). - if os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1': - import polars as pl - return pl.from_pandas(df) - return df - - -@pytest.fixture() -def using_polars() -> bool: - return os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1' - - @pytest.fixture(autouse=True) def close_figs(): yield @@ -46,7 +29,7 @@ def wide_df(rng): columns = list("abc") index = pd.RangeIndex(10, 50, 2, name="wide_index") values = rng.normal(size=(len(index), len(columns))) - return maybe_convert_to_polars(pd.DataFrame(values, index=index, columns=columns)) + return pd.DataFrame(values, index=index, columns=columns) @pytest.fixture @@ -60,7 +43,7 @@ def wide_array(wide_df): def flat_series(rng): index = pd.RangeIndex(10, 30, name="t") - return maybe_convert_to_polars(pd.Series(rng.normal(size=20), index, name="s")) + return pd.Series(rng.normal(size=20), index, name="s") @pytest.fixture @@ -79,7 +62,7 @@ def flat_list(flat_series): def flat_data(rng, request): index = pd.RangeIndex(10, 30, name="t") - series = maybe_convert_to_polars(pd.Series(rng.normal(size=20), index, name="s")) + series = pd.Series(rng.normal(size=20), index, name="s") if request.param == "series": data = series elif request.param == "array": @@ -92,14 +75,8 @@ def flat_data(rng, request): @pytest.fixture def wide_list_of_series(rng): - return [ - maybe_convert_to_polars( - pd.Series(rng.normal(size=20), np.arange(20), name="a") - ), - maybe_convert_to_polars( - pd.Series(rng.normal(size=10), np.arange(5, 15), name="b") - ) - ] + return [pd.Series(rng.normal(size=20), np.arange(20), name="a"), + pd.Series(rng.normal(size=10), np.arange(5, 15), name="b")] @pytest.fixture @@ -133,7 +110,7 @@ def wide_dict_of_lists(wide_list_of_series): @pytest.fixture -def long_df(rng, using_polars): +def long_df(rng): n = 100 df = pd.DataFrame(dict( @@ -156,9 +133,6 @@ def long_df(rng, using_polars): df["s_cat"] = df["s"].astype("category") df["s_str"] = df["s"].astype(str) - if using_polars: - import polars as pl - return pl.from_pandas(df.drop('s_cat', axis=1)) return df @@ -172,41 +146,35 @@ def long_dict(long_df): def repeated_df(rng): n = 100 - return maybe_convert_to_polars(pd.DataFrame(dict( + return pd.DataFrame(dict( x=np.tile(np.arange(n // 2), 2), y=rng.normal(size=n), a=rng.choice(list("abc"), n), u=np.repeat(np.arange(2), n // 2), - ))) + )) @pytest.fixture -def null_df(rng, long_df, using_polars): - if using_polars: - df = long_df.to_pandas().copy() - else: - df = long_df.copy() +def null_df(rng, long_df): + + df = long_df.copy() for col in df: idx = rng.permutation(df.index)[:10] df.loc[idx, col] = np.nan - return maybe_convert_to_polars(df) + return df @pytest.fixture -def object_df(rng, long_df, using_polars): - if using_polars: - df = long_df.to_pandas().copy() - else: - df = long_df.copy() +def object_df(rng, long_df): + + df = long_df.copy() # objectify numeric columns for col in ["c", "s", "f"]: df[col] = df[col].astype(object) - return maybe_convert_to_polars(df) + return df @pytest.fixture -def null_series(flat_series, using_polars): - if using_polars: - import polars as pl - return pl.Series([], dtype=pl.Float64) +def null_series(flat_series): + return pd.Series(index=flat_series.index, dtype='float64') diff --git a/tests/test_categorical.py b/tests/test_categorical.py index 6f58d6de6d..f1722b9f79 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -540,7 +540,7 @@ def common_kws(self): return {} @pytest.mark.parametrize("orient", ["x", "y"]) - def test_labels_long(self, long_df, orient, using_polars): + def test_labels_long(self, long_df, orient): depend = {"x": "y", "y": "x"}[orient] kws = {orient: "a", depend: "y", "hue": "b"} @@ -555,33 +555,25 @@ def test_labels_long(self, long_df, orient, using_polars): get_ori_labels = getattr(ax, f"get_{orient}ticklabels") ori_labels = [t.get_text() for t in get_ori_labels()] - if using_polars: - ori_levels = categorical_order(long_df.to_pandas()[kws[orient]]) - else: - ori_levels = categorical_order(long_df[kws[orient]]) + ori_levels = categorical_order(long_df[kws[orient]]) assert ori_labels == ori_levels legend = ax.get_legend() assert legend.get_title().get_text() == kws["hue"] hue_labels = [t.get_text() for t in legend.texts] - if using_polars: - hue_levels = categorical_order(long_df.to_pandas()[kws["hue"]]) - else: - hue_levels = categorical_order(long_df[kws["hue"]]) + hue_levels = categorical_order(long_df[kws["hue"]]) assert hue_labels == hue_levels - def test_labels_wide(self, wide_df, using_polars): + def test_labels_wide(self, wide_df): - if not using_polars: - wide_df = wide_df.rename_axis("cols", axis=1) + wide_df = wide_df.rename_axis("cols", axis=1) ax = self.func(wide_df) # To populate texts; only needed on older matplotlibs _draw_figure(ax.figure) - if not using_polars: - assert ax.get_xlabel() == wide_df.columns.name + assert ax.get_xlabel() == wide_df.columns.name labels = [t.get_text() for t in ax.get_xticklabels()] for label, level in zip(labels, wide_df.columns): assert label == level @@ -675,13 +667,10 @@ def test_supplied_color_array(self, long_df): ("x", "dataframe"), ("x", "dict"), ] ) - def test_wide(self, wide_df, orient, data_type, using_polars): + def test_wide(self, wide_df, orient, data_type): if data_type == "dict": - if using_polars: - wide_df = {col: wide_df[col].to_numpy() for col in wide_df.columns} - else: - wide_df = {k: v.to_numpy() for k, v in wide_df.items()} + wide_df = {k: v.to_numpy() for k, v in wide_df.items()} ax = self.func(data=wide_df, orient=orient) _draw_figure(ax.figure) @@ -743,7 +732,7 @@ def test_flat(self, flat_series, orient): ({"val": "y", "cat": "s_cat", "hue": None}, None), ], ) - def test_positions(self, long_df, variables, orient, using_polars): + def test_positions(self, long_df, variables, orient): cat_var = variables["cat"] val_var = variables["val"] @@ -751,16 +740,10 @@ def test_positions(self, long_df, variables, orient, using_polars): var_names = list(variables.values()) x_var, y_var, *_ = var_names - if using_polars and y_var == 's_cat': - return - ax = self.func( data=long_df, x=x_var, y=y_var, hue=hue_var, orient=orient, ) - if using_polars: - long_df = long_df.to_pandas() - _draw_figure(ax.figure) cat_idx = var_names.index(cat_var) @@ -797,7 +780,7 @@ def test_positions(self, long_df, variables, orient, using_polars): {"cat": "a", "val": "y", "hue": "f"}, ], ) - def test_positions_dodged(self, long_df, variables, using_polars): + def test_positions_dodged(self, long_df, variables): cat_var = variables["cat"] val_var = variables["val"] @@ -809,8 +792,6 @@ def test_positions_dodged(self, long_df, variables, using_polars): data=long_df, x=x_var, y=y_var, hue=hue_var, dodge=True, ) - if using_polars: - long_df = long_df.to_pandas() cat_vals = categorical_order(long_df[cat_var]) hue_vals = categorical_order(long_df[hue_var]) @@ -838,12 +819,9 @@ def test_positions_dodged(self, long_df, variables, using_polars): assert 0 <= np.ptp(cat_pos) <= nest_width @pytest.mark.parametrize("cat_var", ["a", "s", "d"]) - def test_positions_unfixed(self, long_df, cat_var, using_polars): + def test_positions_unfixed(self, long_df, cat_var): - if using_polars: - long_df = long_df.sort(cat_var) - else: - long_df = long_df.sort_values(cat_var) + long_df = long_df.sort_values(cat_var) kws = dict(size=.001) if "stripplot" in str(self.func): # can't use __name__ with partial @@ -851,9 +829,6 @@ def test_positions_unfixed(self, long_df, cat_var, using_polars): ax = self.func(data=long_df, x=cat_var, y="y", native_scale=True, **kws) - if using_polars: - long_df = long_df.to_pandas() - for i, (cat_level, cat_data) in enumerate(long_df.groupby(cat_var)): points = ax.collections[i].get_offsets().T @@ -912,19 +887,16 @@ def test_order(self, x_type, order): assert not positions.size @pytest.mark.parametrize("hue_var", ["a", "b"]) - def test_hue_categorical(self, long_df, hue_var, using_polars): + def test_hue_categorical(self, long_df, hue_var): cat_var = "b" - pal_name = "muted" - ax = self.func(data=long_df, x=cat_var, y="y", hue=hue_var, palette=pal_name) - - if using_polars: - long_df = long_df.to_pandas() - hue_levels = categorical_order(long_df[hue_var]) cat_levels = categorical_order(long_df[cat_var]) + + pal_name = "muted" palette = dict(zip(hue_levels, color_palette(pal_name))) + ax = self.func(data=long_df, x=cat_var, y="y", hue=hue_var, palette=pal_name) for i, level in enumerate(cat_levels): @@ -940,13 +912,9 @@ def test_hue_categorical(self, long_df, hue_var, using_polars): assert tuple(color) == to_rgba(palette[hue]) @pytest.mark.parametrize("hue_var", ["a", "b"]) - def test_hue_dodged(self, long_df, hue_var, using_polars): + def test_hue_dodged(self, long_df, hue_var): ax = self.func(data=long_df, x="y", y="a", hue=hue_var, dodge=True) - - if using_polars: - long_df = long_df.to_pandas() - colors = color_palette(n_colors=long_df[hue_var].nunique()) collections = iter(ax.collections) @@ -964,15 +932,12 @@ def test_hue_dodged(self, long_df, hue_var, using_polars): "val_var,val_col,hue_col", list(itertools.product(["x", "y"], ["b", "y", "t"], [None, "a"])), ) - def test_single(self, long_df, val_var, val_col, hue_col, using_polars): + def test_single(self, long_df, val_var, val_col, hue_col): var_kws = {val_var: val_col, "hue": hue_col} ax = self.func(data=long_df, **var_kws) _draw_figure(ax.figure) - if using_polars: - long_df = long_df.to_pandas() - axis_vars = ["x", "y"] val_idx = axis_vars.index(val_var) cat_idx = int(not val_idx) @@ -1033,13 +998,9 @@ def test_three_points(self): for point_color in ax.collections[0].get_facecolor(): assert tuple(point_color) == to_rgba("C0") - def test_legend_categorical(self, long_df, using_polars): + def test_legend_categorical(self, long_df): ax = self.func(data=long_df, x="y", y="a", hue="b") - - if using_polars: - long_df = long_df.to_pandas() - legend_texts = [t.get_text() for t in ax.legend_.texts] expected = categorical_order(long_df["b"]) assert legend_texts == expected @@ -1055,22 +1016,18 @@ def test_legend_disabled(self, long_df): ax = self.func(data=long_df, x="y", y="a", hue="b", legend=False) assert ax.legend_ is None - def test_palette_from_color_deprecation(self, long_df, using_polars): + def test_palette_from_color_deprecation(self, long_df): color = (.9, .4, .5) hex_color = mpl.colors.to_hex(color) hue_var = "a" + n_hue = long_df[hue_var].nunique() + palette = color_palette(f"dark:{hex_color}", n_hue) with pytest.warns(FutureWarning, match="Setting a gradient palette"): ax = self.func(data=long_df, x="z", hue=hue_var, color=color) - if using_polars: - long_df = long_df.to_pandas() - - n_hue = long_df[hue_var].nunique() - palette = color_palette(f"dark:{hex_color}", n_hue) - points = ax.collections[0] for point_color in points.get_facecolors(): assert to_rgb(point_color) in palette @@ -1194,7 +1151,7 @@ def test_jitter_unfixed(self, long_df): "orient,jitter", itertools.product(["v", "h"], [True, .1]), ) - def test_jitter(self, long_df, orient, jitter, using_polars): + def test_jitter(self, long_df, orient, jitter): cat_var, val_var = "a", "y" if orient == "x": @@ -1204,14 +1161,12 @@ def test_jitter(self, long_df, orient, jitter, using_polars): x_var, y_var = val_var, cat_var cat_idx, val_idx = 1, 0 + cat_vals = categorical_order(long_df[cat_var]) + ax = stripplot( data=long_df, x=x_var, y=y_var, jitter=jitter, ) - if using_polars: - long_df = long_df.to_pandas() - cat_vals = categorical_order(long_df[cat_var]) - if jitter is True: jitter_range = .4 else: @@ -1285,12 +1240,8 @@ def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): p25, p75 = np.percentile(data, [25, 75]) iqr = p75 - p25 - if isinstance(data, pd.Series): - adj_lo = data[data >= (p25 - iqr * whis)].min() - adj_hi = data[data <= (p75 + iqr * whis)].max() - else: # polars - adj_lo = data.filter(data >= (p25 - iqr * whis)).min() - adj_hi = data.filter(data <= (p75 + iqr * whis)).max() + adj_lo = data[data >= (p25 - iqr * whis)].min() + adj_hi = data[data <= (p75 + iqr * whis)].max() assert whis_lo[val_idx].max() == p25 assert whis_lo[val_idx].min() == approx(adj_lo) @@ -1304,15 +1255,12 @@ def check_whiskers(self, bxp, data, orient, pos, capsize=0.4, whis=1.5): assert np.allclose(caps_hi[val_idx], (adj_hi, adj_hi)) assert np.allclose(caps_hi[pos_idx], (pos - capsize / 2, pos + capsize / 2)) - if isinstance(data, pd.Series): - flier_data = data[(data < adj_lo) | (data > adj_hi)] - else: - flier_data = data.filter((data < adj_lo) | (data > adj_hi)) + flier_data = data[(data < adj_lo) | (data > adj_hi)] assert sorted(fliers[val_idx]) == sorted(flier_data) assert np.allclose(fliers[pos_idx], pos) @pytest.mark.parametrize("orient,col", [("x", "y"), ("y", "z")]) - def test_single_var(self, long_df, orient, col, using_polars): + def test_single_var(self, long_df, orient, col): var = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, **{var: col}) @@ -1330,7 +1278,7 @@ def test_vector_data(self, long_df, orient, col): self.check_whiskers(bxp, long_df[col], orient, 0) @pytest.mark.parametrize("orient", ["h", "v"]) - def test_wide_data(self, wide_df, orient, using_polars): + def test_wide_data(self, wide_df, orient): orient = {"h": "y", "v": "x"}[orient] ax = boxplot(wide_df, orient=orient) @@ -1340,12 +1288,10 @@ def test_wide_data(self, wide_df, orient, using_polars): self.check_whiskers(bxp[i], wide_df[col], orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_grouped(self, long_df, orient, using_polars): + def test_grouped(self, long_df, orient): value = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, **{orient: "a", value: "z"}) - if using_polars: - long_df = long_df.to_pandas() bxp, = ax.containers levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): @@ -1354,12 +1300,10 @@ def test_grouped(self, long_df, orient, using_polars): self.check_whiskers(bxp[i], data, orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_hue_grouped(self, long_df, orient, using_polars): + def test_hue_grouped(self, long_df, orient): value = {"x": "y", "y": "x"}[orient] ax = boxplot(long_df, hue="c", **{orient: "a", value: "z"}) - if using_polars: - long_df = long_df.to_pandas() for i, hue_level in enumerate(categorical_order(long_df["c"])): bxp = ax.containers[i] for j, level in enumerate(categorical_order(long_df["a"])): @@ -1370,17 +1314,11 @@ def test_hue_grouped(self, long_df, orient, using_polars): self.check_box(bxp[j], data, orient, pos, width) self.check_whiskers(bxp[j], data, orient, pos, capsize) - def test_hue_not_dodged(self, long_df, using_polars): + def test_hue_not_dodged(self, long_df): - if using_polars: - levels = categorical_order(long_df.to_pandas()["b"]) - hue = long_df["b"].is_in(levels[:2]) - else: - levels = categorical_order(long_df["b"]) - hue = long_df["b"].isin(levels[:2]) + levels = categorical_order(long_df["b"]) + hue = long_df["b"].isin(levels[:2]) ax = boxplot(long_df, x="b", y="z", hue=hue) - if using_polars: - long_df = long_df.to_pandas() bxps = ax.containers for i, level in enumerate(levels): idx = int(i < 2) @@ -1388,7 +1326,7 @@ def test_hue_not_dodged(self, long_df, using_polars): self.check_box(bxps[idx][i % 2], data, "x", i) self.check_whiskers(bxps[idx][i % 2], data, "x", i) - def test_dodge_native_scale(self, long_df, using_polars): + def test_dodge_native_scale(self, long_df): centers = categorical_order(long_df["s"]) hue_levels = categorical_order(long_df["c"]) @@ -1396,8 +1334,6 @@ def test_dodge_native_scale(self, long_df, using_polars): width = 0.8 * spacing / len(hue_levels) offset = width / len(hue_levels) ax = boxplot(long_df, x="s", y="z", hue="c", native_scale=True) - if using_polars: - long_df = long_df.to_pandas() for i, hue_level in enumerate(hue_levels): bxp = ax.containers[i] for j, center in enumerate(centers): @@ -1484,11 +1420,9 @@ def test_whis(self, long_df): bxp = ax.containers[0][0] self.check_whiskers(bxp, data, "y", 0, whis=2) - def test_gap(self, long_df, using_polars): + def test_gap(self, long_df): ax = boxplot(long_df, x="a", y="z", hue="c", gap=.1) - if using_polars: - long_df = long_df.to_pandas() for i, hue_level in enumerate(categorical_order(long_df["c"])): bxp = ax.containers[i] for j, level in enumerate(categorical_order(long_df["a"])): @@ -1619,24 +1553,20 @@ def test_wide_data(self, wide_df, orient): self.check_violin(poly, wide_df[col], orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_grouped(self, long_df, orient, using_polars): + def test_grouped(self, long_df, orient): value = {"x": "y", "y": "x"}[orient] ax = violinplot(long_df, **{orient: "a", value: "z"}, cut=0) - if using_polars: - long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): data = long_df.loc[long_df["a"] == level, "z"] self.check_violin(ax.collections[i], data, orient, i) @pytest.mark.parametrize("orient", ["x", "y"]) - def test_hue_grouped(self, long_df, orient, using_polars): + def test_hue_grouped(self, long_df, orient): value = {"x": "y", "y": "x"}[orient] ax = violinplot(long_df, hue="c", **{orient: "a", value: "z"}, cut=0) - if using_polars: - long_df = long_df.to_pandas() polys = iter(ax.collections) for i, level in enumerate(categorical_order(long_df["a"])): for j, hue_level in enumerate(categorical_order(long_df["c"])): @@ -1646,23 +1576,17 @@ def test_hue_grouped(self, long_df, orient, using_polars): width = 0.4 self.check_violin(next(polys), data, orient, pos, width) - def test_hue_not_dodged(self, long_df, using_polars): + def test_hue_not_dodged(self, long_df): - if using_polars: - levels = categorical_order(long_df.to_pandas()["b"]) - hue = long_df["b"].is_in(levels[:2]) - else: - levels = categorical_order(long_df["b"]) - hue = long_df["b"].isin(levels[:2]) + levels = categorical_order(long_df["b"]) + hue = long_df["b"].isin(levels[:2]) ax = violinplot(long_df, x="b", y="z", hue=hue, cut=0) - if using_polars: - long_df = long_df.to_pandas() for i, level in enumerate(levels): poly = ax.collections[i] data = long_df.loc[long_df["b"] == level, "z"] self.check_violin(poly, data, "x", i) - def test_dodge_native_scale(self, long_df, using_polars): + def test_dodge_native_scale(self, long_df): centers = categorical_order(long_df["s"]) hue_levels = categorical_order(long_df["c"]) @@ -1670,8 +1594,6 @@ def test_dodge_native_scale(self, long_df, using_polars): width = 0.8 * spacing / len(hue_levels) offset = width / len(hue_levels) ax = violinplot(long_df, x="s", y="z", hue="c", native_scale=True, cut=0) - if using_polars: - long_df = long_df.to_pandas() violins = iter(ax.collections) for center in centers: for i, hue_level in enumerate(hue_levels): @@ -1681,15 +1603,13 @@ def test_dodge_native_scale(self, long_df, using_polars): poly = next(violins) self.check_violin(poly, data, "x", pos, width) - def test_dodge_native_scale_log(self, long_df, using_polars): + def test_dodge_native_scale_log(self, long_df): pos = 10 ** long_df["s"] ax = mpl.figure.Figure().subplots() ax.set_xscale("log") variables = dict(x=pos, y="z", hue="c") violinplot(long_df, **variables, native_scale=True, density_norm="width", ax=ax) - if using_polars: - long_df = long_df.to_pandas() widths = [] n_violins = long_df["s"].nunique() * long_df["c"].nunique() for poly in ax.collections[:n_violins]: @@ -1705,11 +1625,9 @@ def test_color(self, long_df): for poly in ax.collections: assert same_color(poly.get_facecolor(), color) - def test_hue_colors(self, long_df, using_polars): + def test_hue_colors(self, long_df): ax = violinplot(long_df, x="a", y="y", hue="b", saturation=1) - if using_polars: - long_df = long_df.to_pandas() n_levels = long_df["b"].nunique() for i, poly in enumerate(ax.collections): assert same_color(poly.get_facecolor(), f"C{i % n_levels}") @@ -1788,34 +1706,28 @@ def test_inner_quartiles(self, long_df, orient): assert pts[0, pos_idx] == -pts[1, pos_idx] @pytest.mark.parametrize("orient", ["x", "y"]) - def test_inner_stick(self, long_df, orient, using_polars): + def test_inner_stick(self, long_df, orient): pos_idx, val_idx = self.orient_indices(orient) ax = violinplot(long_df["y"], orient=orient, inner="stick") - if using_polars: - long_df = long_df.to_pandas() for i, pts in enumerate(ax.collections[1].get_segments()): for pt in pts: assert pt[val_idx] == long_df["y"].iloc[i] assert pts[0, pos_idx] == -pts[1, pos_idx] @pytest.mark.parametrize("orient", ["x", "y"]) - def test_inner_points(self, long_df, orient, using_polars): + def test_inner_points(self, long_df, orient): pos_idx, val_idx = self.orient_indices(orient) ax = violinplot(long_df["y"], orient=orient, inner="points") - if using_polars: - long_df = long_df.to_pandas() points = ax.collections[1] for i, pt in enumerate(points.get_offsets()): assert pt[val_idx] == long_df["y"].iloc[i] assert pt[pos_idx] == 0 - def test_split_single(self, long_df, using_polars): + def test_split_single(self, long_df): ax = violinplot(long_df, x="a", y="z", split=True, cut=0) - if using_polars: - long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) for i, level in enumerate(levels): data = long_df.loc[long_df["a"] == level, "z"] @@ -1823,11 +1735,9 @@ def test_split_single(self, long_df, using_polars): verts = ax.collections[i].get_paths()[0].vertices assert np.isclose(verts[:, 0], i + .4).sum() >= 100 - def test_split_multi(self, long_df, using_polars): + def test_split_multi(self, long_df): ax = violinplot(long_df, x="a", y="z", hue="c", split=True, cut=0) - if using_polars: - long_df = long_df.to_pandas() polys = iter(ax.collections) for i, level in enumerate(categorical_order(long_df["a"])): for j, hue_level in enumerate(categorical_order(long_df["c"])): @@ -1991,17 +1901,13 @@ def test_single_var(self, orient): assert getattr(bar, f"get_{prop}")() == approx(vals.mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) - def test_wide_df(self, wide_df, orient, using_polars): + def test_wide_df(self, wide_df, orient): ax = barplot(wide_df, orient=orient) orient = {"h": "y", "v": "x"}.get(orient, orient) prop = {"x": "height", "y": "width"}[orient] for i, bar in enumerate(ax.patches): - if using_polars: - expected = approx(wide_df[:, i].mean()) - else: - expected = approx(wide_df.iloc[:, i].mean()) - assert getattr(bar, f"get_{prop}")() == expected + assert getattr(bar, f"get_{prop}")() == approx(wide_df.iloc[:, i].mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) def test_vector_orient(self, orient): @@ -2227,38 +2133,32 @@ def test_native_scale_log_transform_dodged(self): for x_i, bar in zip(x[2:], ax.patches[2:]): assert bar.get_x() == approx(x_i) - def test_estimate_default(self, long_df, using_polars): + def test_estimate_default(self, long_df): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].mean() ax = barplot(long_df, x=agg_var, y=val_var, errorbar=None) - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].mean() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) - def test_estimate_string(self, long_df, using_polars): + def test_estimate_string(self, long_df): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].median() ax = barplot(long_df, x=agg_var, y=val_var, estimator="median", errorbar=None) - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].median() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) - def test_estimate_func(self, long_df, using_polars): + def test_estimate_func(self, long_df): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].median() ax = barplot(long_df, x=agg_var, y=val_var, estimator=np.median, errorbar=None) - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].median() order = categorical_order(long_df[agg_var]) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(agg_df[order[i]]) @@ -2271,14 +2171,12 @@ def test_estimate_log_transform(self, long_df): bar, = ax.patches assert bar.get_width() == 10 ** np.log10(long_df["z"]).mean() - def test_errorbars(self, long_df, using_polars): + def test_errorbars(self, long_df): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) ax = barplot(long_df, x=agg_var, y=val_var, errorbar="sd") - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) order = categorical_order(long_df[agg_var]) for i, line in enumerate(ax.lines): row = agg_df.loc[order[i]] @@ -2498,7 +2396,7 @@ def test_single_var(self, orient): assert getattr(line, f"get_{orient}data")() == approx(vals.mean()) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) - def test_wide_df(self, wide_df, orient, using_polars): + def test_wide_df(self, wide_df, orient): ax = pointplot(wide_df, orient=orient) orient = {"h": "y", "v": "x"}.get(orient, orient) @@ -2508,16 +2406,10 @@ def test_wide_df(self, wide_df, orient, using_polars): getattr(line, f"get_{orient}data")(), np.arange(len(wide_df.columns)), ) - if using_polars: - assert_array_almost_equal( - getattr(line, f"get_{depend}data")(), - wide_df.mean(axis=0).to_numpy().flatten(), - ) - else: - assert_array_almost_equal( - getattr(line, f"get_{depend}data")(), - wide_df.mean(axis=0), - ) + assert_array_almost_equal( + getattr(line, f"get_{depend}data")(), + wide_df.mean(axis=0), + ) @pytest.mark.parametrize("orient", ["x", "y", "h", "v"]) def test_vector_orient(self, orient): @@ -2587,14 +2479,12 @@ def test_xy_native_scale(self): assert_array_equal(line.get_ydata(), y) @pytest.mark.parametrize("estimator", ["mean", np.mean]) - def test_estimate(self, long_df, estimator, using_polars): + def test_estimate(self, long_df, estimator): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].agg(estimator) ax = pointplot(long_df, x=agg_var, y=val_var, errorbar=None) - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].agg(estimator) order = categorical_order(long_df[agg_var]) for i, xy in enumerate(ax.lines[0].get_xydata()): assert tuple(xy) == approx((i, agg_df[order[i]])) @@ -2607,14 +2497,12 @@ def test_estimate_log_transform(self, long_df): val, = ax.lines[0].get_xdata() assert val == 10 ** np.log10(long_df["z"]).mean() - def test_errorbars(self, long_df, using_polars): + def test_errorbars(self, long_df): agg_var, val_var = "a", "y" + agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) ax = pointplot(long_df, x=agg_var, y=val_var, errorbar="sd") - if using_polars: - long_df = long_df.to_pandas() - agg_df = long_df.groupby(agg_var)[val_var].agg(["mean", "std"]) order = categorical_order(long_df[agg_var]) for i, line in enumerate(ax.lines[1:]): row = agg_df.loc[order[i]] @@ -2935,16 +2823,14 @@ def test_hue_dodged(self): assert same_color(bar.get_facecolor(), f"C{i // 2}") @pytest.mark.parametrize("stat", ["percent", "probability", "proportion"]) - def test_stat(self, long_df, stat, using_polars): + def test_stat(self, long_df, stat): col = "a" - ax = countplot(long_df, x=col, stat=stat) - if using_polars: - long_df = long_df.to_pandas() order = categorical_order(long_df[col]) expected = long_df[col].value_counts(normalize=True) if stat == "percent": expected *= 100 + ax = countplot(long_df, x=col, stat=stat) for i, bar in enumerate(ax.patches): assert bar.get_height() == approx(expected[order[i]]) @@ -3684,7 +3570,7 @@ def test_beeswarm(self, long_df): p = Beeswarm() data = long_df["y"] d = data.diff().mean() * 1.5 - x = np.zeros(len(data)) + x = np.zeros(data.size) y = np.sort(data) r = np.full_like(y, d) orig_xyr = np.c_[x, y, r] diff --git a/tests/test_core.py b/tests/test_core.py index 27b13928a4..1e52868863 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,4 @@ import itertools -import os import numpy as np import pandas as pd import matplotlib as mpl @@ -106,10 +105,6 @@ def test_plotter_reinit(self, long_df): assert p._hue_map.palette == palette assert p._hue_map.levels == hue_order - @pytest.mark.xfail( - os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', - reason='different-length inputs not yet supported for non-pandas' - ) def test_hue_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, hue=null_series)) @@ -121,14 +116,11 @@ def test_hue_map_null(self, flat_series, null_series): assert m.norm is None assert m.lookup_table is None - def test_hue_map_categorical(self, wide_df, long_df, using_polars): + def test_hue_map_categorical(self, wide_df, long_df): p = VectorPlotter(data=wide_df) m = HueMapping(p) - if using_polars: - assert m.levels == wide_df.columns - else: - assert m.levels == wide_df.columns.to_list() + assert m.levels == wide_df.columns.to_list() assert m.map_type == "categorical" assert m.cmap is None @@ -172,19 +164,13 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): # Test long data p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="a")) m = HueMapping(p) - if using_polars: - assert m.levels == categorical_order(long_df.to_pandas()["a"]) - else: - assert m.levels == categorical_order(long_df["a"]) + assert m.levels == categorical_order(long_df["a"]) assert m.map_type == "categorical" assert m.cmap is None # Test default palette m = HueMapping(p) - if using_polars: - hue_levels = categorical_order(long_df.to_pandas()["a"]) - else: - hue_levels = categorical_order(long_df["a"]) + hue_levels = categorical_order(long_df["a"]) expected_colors = color_palette(n_colors=len(hue_levels)) expected_lookup_table = dict(zip(hue_levels, expected_colors)) assert m.lookup_table == expected_lookup_table @@ -209,13 +195,8 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): assert m.map_type == "categorical" for val in [0, 1]: - if using_polars: - import polars as pl - data = long_df.filter(pl.col('c') == val) - else: - data = long_df[long_df["c"]] p = VectorPlotter( - data=data, + data=long_df[long_df["c"] == val], variables=dict(x="x", y="y", hue="c"), ) m = HueMapping(p) @@ -225,33 +206,24 @@ def test_hue_map_categorical(self, wide_df, long_df, using_polars): # Test Timestamp data p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="t")) m = HueMapping(p) - if using_polars: - assert m.levels == [ - pd.Timestamp(t) for t in long_df.to_pandas()["t"].unique() - ] - else: - assert m.levels == [pd.Timestamp(t) for t in long_df["t"].unique()] + assert m.levels == [pd.Timestamp(t) for t in long_df["t"].unique()] assert m.map_type == "datetime" # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", hue="a_cat")) m = HueMapping(p) - if using_polars: - assert m.levels == long_df.to_pandas()["a_cat"].cat.categories.to_list() - else: - assert m.levels == long_df["a_cat"].cat.categories.to_list() + assert m.levels == long_df["a_cat"].cat.categories.to_list() assert m.map_type == "categorical" # Test numeric data with category type - if not using_polars: - p = VectorPlotter( - data=long_df, - variables=dict(x="x", y="y", hue="s_cat") - ) - m = HueMapping(p) - assert m.levels == categorical_order(long_df["s_cat"]) - assert m.map_type == "categorical" - assert m.cmap is None + p = VectorPlotter( + data=long_df, + variables=dict(x="x", y="y", hue="s_cat") + ) + m = HueMapping(p) + assert m.levels == categorical_order(long_df["s_cat"]) + assert m.map_type == "categorical" + assert m.cmap is None # Test categorical palette specified for numeric data p = VectorPlotter( @@ -354,11 +326,9 @@ def test_hue_map_without_hue_dataa(self, long_df): with pytest.warns(UserWarning, match="Ignoring `palette`"): HueMapping(p, palette="viridis") - def test_saturation(self, long_df, using_polars): + def test_saturation(self, long_df): p = VectorPlotter(data=long_df, variables=dict(x="x", y="y", hue="a")) - if using_polars: - long_df = long_df.to_pandas() levels = categorical_order(long_df["a"]) palette = color_palette("viridis", len(levels)) saturation = 0.8 @@ -412,10 +382,6 @@ def test_plotter_reinit(self, long_df): assert p._size_map.lookup_table == dict(zip(size_order, sizes)) assert p._size_map.levels == size_order - @pytest.mark.xfail( - os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', - reason='different-length inputs not yet supported for non-pandas' - ) def test_size_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, size=null_series)) @@ -470,7 +436,7 @@ def test_map_size_numeric(self, long_df): with pytest.raises(ValueError): SizeMapping(p, norm="bad_norm") - def test_map_size_categorical(self, long_df, using_polars): + def test_map_size_categorical(self, long_df): p = VectorPlotter( data=long_df, @@ -504,10 +470,7 @@ def test_map_size_categorical(self, long_df, using_polars): # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", size="a_cat")) m = SizeMapping(p) - if using_polars: - assert m.levels == long_df.to_pandas()["a_cat"].cat.categories.to_list() - else: - assert m.levels == long_df["a_cat"].cat.categories.to_list() + assert m.levels == long_df["a_cat"].cat.categories.to_list() assert m.map_type == "categorical" # Test sizes list with wrong length @@ -566,10 +529,6 @@ def test_plotter_reinit(self, long_df): assert p._style_map.levels == style_order assert p._style_map(style_order, "marker") == markers - @pytest.mark.xfail( - os.environ.get('SEABORN_TEST_INTERCHANGE_PROTOCOL', '0') == '1', - reason='different-length inputs not yet supported for non-pandas' - ) def test_style_map_null(self, flat_series, null_series): p = VectorPlotter(variables=dict(x=flat_series, style=null_series)) @@ -578,7 +537,7 @@ def test_style_map_null(self, flat_series, null_series): assert m.map_type is None assert m.lookup_table is None - def test_map_style(self, long_df, using_polars): + def test_map_style(self, long_df): p = VectorPlotter( data=long_df, @@ -620,8 +579,6 @@ def test_map_style(self, long_df, using_polars): # Test explicit categories p = VectorPlotter(data=long_df, variables=dict(x="x", style="a_cat")) m = StyleMapping(p) - if using_polars: - long_df = long_df.to_pandas() assert m.levels == long_df["a_cat"].cat.categories.to_list() # Test style order with defaults @@ -659,7 +616,7 @@ def test_map_style(self, long_df, using_polars): class TestVectorPlotter: - def test_flat_variables(self, flat_data, using_polars): + def test_flat_variables(self, flat_data): p = VectorPlotter() p.assign_variables(data=flat_data) @@ -696,14 +653,11 @@ def test_long_df(self, long_df, long_variables): for key, val in long_variables.items(): assert_array_equal(p.plot_data[key], long_df[val]) - def test_long_df_with_index(self, long_df, long_variables, using_polars): + def test_long_df_with_index(self, long_df, long_variables): p = VectorPlotter() - if using_polars: - # no index - return p.assign_variables( - data=long_df.set_index('a'), + data=long_df.set_index("a"), variables=long_variables, ) assert p.input_format == "long" @@ -712,10 +666,7 @@ def test_long_df_with_index(self, long_df, long_variables, using_polars): for key, val in long_variables.items(): assert_array_equal(p.plot_data[key], long_df[val]) - def test_long_df_with_multiindex(self, long_df, long_variables, using_polars): - if using_polars: - # no index (let along multiindex) - return + def test_long_df_with_multiindex(self, long_df, long_variables): p = VectorPlotter() p.assign_variables( @@ -807,11 +758,8 @@ def test_units(self, repeated_df): assert_array_equal(p.plot_data["units"], repeated_df["u"]) @pytest.mark.parametrize("name", [3, 4.5]) - def test_long_numeric_name(self, long_df, name, using_polars): + def test_long_numeric_name(self, long_df, name): - if using_polars: - # Only string names allowed - return long_df[name] = long_df["x"] p = VectorPlotter() p.assign_variables(data=long_df, variables={"x": name}) @@ -913,7 +861,7 @@ def test_iter_data_quantitites(self, long_df): out = p.iter_data(["hue"]) assert len(list(out)) == n_subsets - n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].to_numpy())))) + n_subsets = len(set(list(map(tuple, long_df[[var1, var2]].values)))) p = VectorPlotter( data=long_df, @@ -933,7 +881,7 @@ def test_iter_data_quantitites(self, long_df): var1, var2, var3 = "a", "s", "b" cols = [var1, var2, var3] - n_subsets = len(set(list(map(tuple, long_df[cols].to_numpy())))) + n_subsets = len(set(list(map(tuple, long_df[cols].values)))) p = VectorPlotter( data=long_df, @@ -963,7 +911,7 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("hue"): assert list(sub_vars) == ["hue"] - assert sub_vars["hue"] in long_df[var].to_numpy() + assert sub_vars["hue"] in long_df[var].values p = VectorPlotter( data=long_df, @@ -971,7 +919,7 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("size"): assert list(sub_vars) == ["size"] - assert sub_vars["size"] in long_df[var].to_numpy() + assert sub_vars["size"] in long_df[var].values p = VectorPlotter( data=long_df, @@ -979,8 +927,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data(semantics): assert list(sub_vars) == ["hue", "style"] - assert sub_vars["hue"] in long_df[var].to_numpy() - assert sub_vars["style"] in long_df[var].to_numpy() + assert sub_vars["hue"] in long_df[var].values + assert sub_vars["style"] in long_df[var].values assert sub_vars["hue"] == sub_vars["style"] var1, var2 = "a", "s" @@ -991,8 +939,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data(semantics): assert list(sub_vars) == ["hue", "size"] - assert sub_vars["hue"] in long_df[var1].to_numpy() - assert sub_vars["size"] in long_df[var2].to_numpy() + assert sub_vars["hue"] in long_df[var1].values + assert sub_vars["size"] in long_df[var2].values semantics = ["hue", "col", "row"] p = VectorPlotter( @@ -1001,8 +949,8 @@ def test_iter_data_keys(self, long_df): ) for sub_vars, _ in p.iter_data("hue"): assert list(sub_vars) == ["hue", "col"] - assert sub_vars["hue"] in long_df[var1].to_numpy() - assert sub_vars["col"] in long_df[var2].to_numpy() + assert sub_vars["hue"] in long_df[var1].values + assert sub_vars["col"] in long_df[var2].values def test_iter_data_values(self, long_df): @@ -1033,16 +981,14 @@ def test_iter_data_values(self, long_df): rows &= p.plot_data["size"] == sub_vars["size"] assert_frame_equal(sub_data, p.plot_data[rows]) - def test_iter_data_reverse(self, long_df, using_polars): + def test_iter_data_reverse(self, long_df): + reversed_order = categorical_order(long_df["a"])[::-1] p = VectorPlotter( data=long_df, variables=dict(x="x", y="y", hue="a") ) iterator = p.iter_data("hue", reverse=True) - if using_polars: - long_df = long_df.to_pandas() - reversed_order = categorical_order(long_df["a"])[::-1] for i, (sub_vars, _) in enumerate(iterator): assert sub_vars["hue"] == reversed_order[i] @@ -1435,7 +1381,7 @@ def test_scale_datetime(self, long_df): with pytest.raises(NotImplementedError): p.scale_datetime("x") - def test_scale_categorical(self, long_df, using_polars): + def test_scale_categorical(self, long_df): p = VectorPlotter(data=long_df, variables={"x": "x"}) p.scale_categorical("y") @@ -1454,12 +1400,7 @@ def test_scale_categorical(self, long_df, using_polars): p = VectorPlotter(data=long_df, variables={"x": "a"}) p.scale_categorical("x") assert not p._var_ordered["x"] - if using_polars: - assert_array_equal( - p.var_levels["x"], categorical_order(long_df.to_pandas()["a"]) - ) - else: - assert_array_equal(p.var_levels["x"], categorical_order(long_df["a"])) + assert_array_equal(p.var_levels["x"], categorical_order(long_df["a"])) p = VectorPlotter(data=long_df, variables={"x": "a_cat"}) p.scale_categorical("x") diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 17b552923f..4cada7d516 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -272,11 +272,9 @@ def test_rug_data(self, flat_array): assert_array_equal(segments[:, 1, 1], np.full(n, height)) assert_array_equal(segments[:, 1, 0], flat_array) - def test_rug_colors(self, long_df, using_polars): + def test_rug_colors(self, long_df): ax = rugplot(data=long_df, x="x", hue="a") - if using_polars: - long_df = long_df.to_pandas() order = categorical_order(long_df["a"]) palette = color_palette() @@ -705,7 +703,7 @@ def test_common_norm(self, long_df): xdata, ydata = line.get_xydata().T assert integrate(ydata, xdata) == pytest.approx(1) - def test_common_grid(self, long_df, using_polars): + def test_common_grid(self, long_df): f, (ax1, ax2) = plt.subplots(ncols=2) @@ -719,8 +717,6 @@ def test_common_grid(self, long_df, using_polars): data=long_df, x="x", hue="a", hue_order=order, common_grid=True, cut=0, ax=ax2, ) - if using_polars: - long_df = long_df.to_pandas() for line, level in zip(ax1.lines[::-1], order): xdata = line.get_xdata() @@ -896,17 +892,14 @@ def test_axis_labels(self, long_df): assert ax2.get_xlabel() == "Density" assert ax2.get_ylabel() == "y" - def test_legend(self, long_df, using_polars): + def test_legend(self, long_df): ax = kdeplot(data=long_df, x="x", hue="a") assert ax.legend_.get_title().get_text() == "a" legend_labels = ax.legend_.get_texts() - if using_polars: - order = categorical_order(long_df.to_pandas()["a"]) - else: - order = categorical_order(long_df["a"]) + order = categorical_order(long_df["a"]) for label, level in zip(legend_labels, order): assert label.get_text() == level @@ -1440,11 +1433,9 @@ def test_unique_bins(self, wide_df): assert_array_almost_equal(start, wide_df[col].min()) assert_array_almost_equal(stop, wide_df[col].max()) - def test_weights_with_missing(self, null_df, using_polars): + def test_weights_with_missing(self, null_df): ax = histplot(null_df, x="x", weights="s", bins=5) - if using_polars: - null_df = null_df.to_pandas() bar_heights = [bar.get_height() for bar in ax.patches] total_weight = null_df[["x", "s"]].dropna()["s"].sum() @@ -2076,7 +2067,7 @@ def test_color_limits(self, long_df): (counts <= f(counts, pthresh)).T.flat, ) - def test_hue_color_limits(self, long_df, using_polars): + def test_hue_color_limits(self, long_df): _, (ax1, ax2, ax3, ax4) = plt.subplots(4) kws = dict(data=long_df, x="x", y="y", hue="c", bins=4) @@ -2086,11 +2077,7 @@ def test_hue_color_limits(self, long_df, using_polars): full_counts, _ = hist(long_df["x"], long_df["y"]) sub_counts = [] - if using_polars: - group_by = long_df.groupby(kws['hue'], maintain_order=True) - else: - group_by = long_df.groupby(kws['hue']) - for _, sub_df in group_by: + for _, sub_df in long_df.groupby(kws["hue"]): c, _ = hist(sub_df["x"], sub_df["y"]) sub_counts.append(c) @@ -2272,7 +2259,7 @@ class TestDisPlot: dict(x="x", y="y"), ], ) - def test_versus_single_histplot(self, long_df, kwargs, using_polars): + def test_versus_single_histplot(self, long_df, kwargs): ax = histplot(long_df, **kwargs) g = displot(long_df, **kwargs) @@ -2282,11 +2269,7 @@ def test_versus_single_histplot(self, long_df, kwargs, using_polars): assert_legends_equal(ax.legend_, g._legend) if kwargs: - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.lit('_').alias('_')) - else: - long_df["_"] = "_" + long_df["_"] = "_" g2 = displot(long_df, col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2306,7 +2289,7 @@ def test_versus_single_histplot(self, long_df, kwargs, using_polars): dict(x="x", y="y"), ], ) - def test_versus_single_kdeplot(self, long_df, kwargs, using_polars): + def test_versus_single_kdeplot(self, long_df, kwargs): ax = kdeplot(data=long_df, **kwargs) g = displot(long_df, kind="kde", **kwargs) @@ -2316,11 +2299,7 @@ def test_versus_single_kdeplot(self, long_df, kwargs, using_polars): assert_legends_equal(ax.legend_, g._legend) if kwargs: - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.lit('_').alias('_')) - else: - long_df["_"] = "_" + long_df["_"] = "_" g2 = displot(long_df, kind="kde", col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2338,7 +2317,7 @@ def test_versus_single_kdeplot(self, long_df, kwargs, using_polars): dict(x="x", hue="a", palette="muted"), ], ) - def test_versus_single_ecdfplot(self, long_df, kwargs, using_polars): + def test_versus_single_ecdfplot(self, long_df, kwargs): ax = ecdfplot(data=long_df, **kwargs) g = displot(long_df, kind="ecdf", **kwargs) @@ -2348,11 +2327,7 @@ def test_versus_single_ecdfplot(self, long_df, kwargs, using_polars): assert_legends_equal(ax.legend_, g._legend) if kwargs: - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.lit('_').alias('_')) - else: - long_df["_"] = "_" + long_df["_"] = "_" g2 = displot(long_df, kind="ecdf", col="_", **kwargs) assert_plots_equal(ax, g2.ax) @@ -2363,7 +2338,7 @@ def test_versus_single_ecdfplot(self, long_df, kwargs, using_polars): dict(x="x", hue="a"), ] ) - def test_with_rug(self, long_df, kwargs, using_polars): + def test_with_rug(self, long_df, kwargs): ax = plt.figure().subplots() histplot(data=long_df, **kwargs, ax=ax) @@ -2373,11 +2348,7 @@ def test_with_rug(self, long_df, kwargs, using_polars): assert_plots_equal(ax, g.ax, labels=False) - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.lit('_').alias('_')) - else: - long_df["_"] = "_" + long_df["_"] = "_" g2 = displot(long_df, col="_", rug=True, **kwargs) assert_plots_equal(ax, g2.ax, labels=False) @@ -2402,16 +2373,11 @@ def test_facets(self, long_df, facet_var): assert text in facet_ax.get_title() @pytest.mark.parametrize("multiple", ["dodge", "stack", "fill"]) - def test_facet_multiple(self, long_df, multiple, using_polars): + def test_facet_multiple(self, long_df, multiple): bins = np.linspace(0, 20, 5) - if using_polars: - import polars as pl - data = long_df.filter(pl.col('c') == 0) - else: - data = long_df[long_df['c'] == 0] ax = histplot( - data=data, + data=long_df[long_df["c"] == 0], x="x", hue="a", hue_order=["a", "b", "c"], multiple=multiple, bins=bins, ) @@ -2484,10 +2450,7 @@ def test_bivariate_hist_norm(self, rng): clim2 = g.axes.flat[1].collections[0].get_clim() assert clim1[1] > clim2[1] - def test_facetgrid_data(self, long_df, using_polars): - if using_polars: - # This test doesn't pass a DataFrame anyway - return + def test_facetgrid_data(self, long_df): g = displot( data=long_df.to_dict(orient="list"), diff --git a/tests/test_relational.py b/tests/test_relational.py index 53fb0de30b..ca7970d433 100644 --- a/tests/test_relational.py +++ b/tests/test_relational.py @@ -89,7 +89,7 @@ def test_color(self, long_df): class TestRelationalPlotter(Helpers): - def test_wide_df_variables(self, wide_df, using_polars): + def test_wide_df_variables(self, wide_df): p = _RelationalPlotter() p.assign_variables(data=wide_df) @@ -98,10 +98,7 @@ def test_wide_df_variables(self, wide_df, using_polars): assert len(p.plot_data) == np.prod(wide_df.shape) x = p.plot_data["x"] - if using_polars: - expected_x = np.tile(np.arange(len(wide_df)), wide_df.shape[1]) - else: - expected_x = np.tile(wide_df.index, wide_df.shape[1]) + expected_x = np.tile(wide_df.index, wide_df.shape[1]) assert_array_equal(x, expected_x) y = p.plot_data["y"] @@ -109,41 +106,28 @@ def test_wide_df_variables(self, wide_df, using_polars): assert_array_equal(y, expected_y) hue = p.plot_data["hue"] - if using_polars: - expected_hue = np.repeat(wide_df.columns, wide_df.shape[0]) - else: - expected_hue = np.repeat(wide_df.columns.to_numpy(), wide_df.shape[0]) + expected_hue = np.repeat(wide_df.columns.to_numpy(), wide_df.shape[0]) assert_array_equal(hue, expected_hue) style = p.plot_data["style"] expected_style = expected_hue assert_array_equal(style, expected_style) - if using_polars: - assert p.variables["x"] is None - assert p.variables["hue"] is None - assert p.variables["style"] is None - else: - assert p.variables["x"] == wide_df.index.name - assert p.variables["hue"] == wide_df.columns.name - assert p.variables["style"] == wide_df.columns.name + assert p.variables["x"] == wide_df.index.name + assert p.variables["y"] is None + assert p.variables["hue"] == wide_df.columns.name + assert p.variables["style"] == wide_df.columns.name - def test_wide_df_with_nonnumeric_variables(self, long_df, using_polars): + def test_wide_df_with_nonnumeric_variables(self, long_df): p = _RelationalPlotter() p.assign_variables(data=long_df) assert p.input_format == "wide" assert list(p.variables) == ["x", "y", "hue", "style"] - if using_polars: - import polars as pl - numeric_df = long_df.select(pl.col(pl.NUMERIC_DTYPES)) - else: - numeric_df = long_df.select_dtypes("number") + numeric_df = long_df.select_dtypes("number") assert len(p.plot_data) == np.prod(numeric_df.shape) - if using_polars: - numeric_df = numeric_df.to_pandas() x = p.plot_data["x"] expected_x = np.tile(numeric_df.index, numeric_df.shape[1]) @@ -237,7 +221,7 @@ def test_flat_list_variables(self, flat_list): assert p.variables["x"] is None assert p.variables["y"] is None - def test_flat_series_variables(self, flat_series, using_polars): + def test_flat_series_variables(self, flat_series): p = _RelationalPlotter() p.assign_variables(data=flat_series) @@ -246,21 +230,17 @@ def test_flat_series_variables(self, flat_series, using_polars): assert len(p.plot_data) == len(flat_series) x = p.plot_data["x"] - if using_polars: - expected_x = np.arange(len(flat_series)) - else: - expected_x = flat_series.index + expected_x = flat_series.index assert_array_equal(x, expected_x) y = p.plot_data["y"] expected_y = flat_series assert_array_equal(y, expected_y) - if not using_polars: - assert p.variables["x"] is flat_series.index.name - assert p.variables["y"] is flat_series.name + assert p.variables["x"] is flat_series.index.name + assert p.variables["y"] is flat_series.name - def test_wide_list_of_series_variables(self, wide_list_of_series, using_polars): + def test_wide_list_of_series_variables(self, wide_list_of_series): p = _RelationalPlotter() p.assign_variables(data=wide_list_of_series) @@ -272,28 +252,18 @@ def test_wide_list_of_series_variables(self, wide_list_of_series, using_polars): assert len(p.plot_data) == chunks * chunk_size - if using_polars: - index_union = np.unique( - np.concatenate([np.arange(len(s)) for s in wide_list_of_series]) - ) - else: - index_union = np.unique( - np.concatenate([s.index for s in wide_list_of_series]) - ) + index_union = np.unique( + np.concatenate([s.index for s in wide_list_of_series]) + ) x = p.plot_data["x"] expected_x = np.tile(index_union, chunks) assert_array_equal(x, expected_x) y = p.plot_data["y"] - if using_polars: - expected_y = np.concatenate([ - s.to_pandas().reindex(index_union) for s in wide_list_of_series - ]) - else: - expected_y = np.concatenate([ - s.reindex(index_union) for s in wide_list_of_series - ]) + expected_y = np.concatenate([ + s.reindex(index_union) for s in wide_list_of_series + ]) assert_array_equal(y, expected_y) hue = p.plot_data["hue"] @@ -475,7 +445,7 @@ def test_wide_dict_of_lists_variables(self, wide_dict_of_lists): assert p.variables["hue"] is None assert p.variables["style"] is None - def test_relplot_simple(self, long_df, using_polars): + def test_relplot_simple(self, long_df): g = relplot(data=long_df, x="x", y="y", kind="scatter") x, y = g.ax.collections[0].get_offsets().T @@ -483,8 +453,6 @@ def test_relplot_simple(self, long_df, using_polars): assert_array_equal(y, long_df["y"]) g = relplot(data=long_df, x="x", y="y", kind="line") - if using_polars: - long_df = long_df.to_pandas() x, y = g.ax.lines[0].get_xydata().T expected = long_df.groupby("x").y.mean() assert_array_equal(x, expected.index) @@ -493,7 +461,7 @@ def test_relplot_simple(self, long_df, using_polars): with pytest.raises(ValueError): g = relplot(data=long_df, x="x", y="y", kind="not_a_kind") - def test_relplot_complex(self, long_df, using_polars): + def test_relplot_complex(self, long_df): for sem in ["hue", "size", "style"]: g = relplot(data=long_df, x="x", y="y", **{sem: "a"}) @@ -505,10 +473,7 @@ def test_relplot_complex(self, long_df, using_polars): g = relplot( data=long_df, x="x", y="y", col="c", **{sem: "a"} ) - if using_polars: - grouped = long_df.to_pandas().groupby("c") - else: - grouped = long_df.groupby("c") + grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) @@ -518,36 +483,25 @@ def test_relplot_complex(self, long_df, using_polars): g = relplot( data=long_df, x="x", y="y", hue="b", col="c", **{sem: "a"} ) - if using_polars: - grouped = long_df.to_pandas().groupby("c") - else: - grouped = long_df.groupby("c") + grouped = long_df.groupby("c") for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) assert_array_equal(y, grp_df["y"]) for sem in ["hue", "size", "style"]: - if using_polars: - data = long_df.sort(['c', 'b']) - else: - data = long_df.sort_values(["c", "b"]) - g = relplot( - data=data, + data=long_df.sort_values(["c", "b"]), x="x", y="y", col="b", row="c", **{sem: "a"} ) - if using_polars: - grouped = long_df.to_pandas().groupby(['c', 'b']) - else: - grouped = long_df.groupby(['c', 'b']) + grouped = long_df.groupby(["c", "b"]) for (_, grp_df), ax in zip(grouped, g.axes.flat): x, y = ax.collections[0].get_offsets().T assert_array_equal(x, grp_df["x"]) assert_array_equal(y, grp_df["y"]) @pytest.mark.parametrize("vector_type", ["series", "numpy", "list"]) - def test_relplot_vectors(self, long_df, vector_type, using_polars): + def test_relplot_vectors(self, long_df, vector_type): semantics = dict(x="x", y="y", hue="f", col="c") kws = {key: long_df[val] for key, val in semantics.items()} @@ -556,8 +510,6 @@ def test_relplot_vectors(self, long_df, vector_type, using_polars): elif vector_type == "list": kws = {k: v.to_list() for k, v in kws.items()} g = relplot(data=long_df, **kws) - if using_polars: - long_df = long_df.to_pandas() grouped = long_df.groupby("c") assert len(g.axes_dict) == len(grouped) for (_, grp_df), ax in zip(grouped, g.axes.flat): @@ -572,15 +524,13 @@ def test_relplot_wide(self, wide_df): assert_array_equal(y, wide_df.to_numpy().T.ravel()) assert not g.ax.get_ylabel() - def test_relplot_hues(self, long_df, using_polars): + def test_relplot_hues(self, long_df): palette = ["r", "b", "g"] g = relplot( x="x", y="y", hue="a", style="b", col="c", palette=palette, data=long_df ) - if using_polars: - long_df = long_df.to_pandas() palette = dict(zip(long_df["a"].unique(), palette)) grouped = long_df.groupby("c") @@ -589,7 +539,7 @@ def test_relplot_hues(self, long_df, using_polars): expected_hues = [palette[val] for val in grp_df["a"]] assert same_color(points.get_facecolors(), expected_hues) - def test_relplot_sizes(self, long_df, using_polars): + def test_relplot_sizes(self, long_df): sizes = [5, 12, 7] g = relplot( @@ -597,8 +547,6 @@ def test_relplot_sizes(self, long_df, using_polars): x="x", y="y", size="a", hue="b", col="c", sizes=sizes, ) - if using_polars: - long_df = long_df.to_pandas() sizes = dict(zip(long_df["a"].unique(), sizes)) grouped = long_df.groupby("c") @@ -607,7 +555,7 @@ def test_relplot_sizes(self, long_df, using_polars): expected_sizes = [sizes[val] for val in grp_df["a"]] assert_array_equal(points.get_sizes(), expected_sizes) - def test_relplot_styles(self, long_df, using_polars): + def test_relplot_styles(self, long_df): markers = ["o", "d", "s"] g = relplot( @@ -615,8 +563,6 @@ def test_relplot_styles(self, long_df, using_polars): x="x", y="y", style="a", hue="b", col="c", markers=markers, ) - if using_polars: - long_df = long_df.to_pandas() paths = [] for m in markers: @@ -630,13 +576,9 @@ def test_relplot_styles(self, long_df, using_polars): expected_paths = [paths[val] for val in grp_df["a"]] assert self.paths_equal(points.get_paths(), expected_paths) - def test_relplot_stringy_numerics(self, long_df, using_polars): + def test_relplot_stringy_numerics(self, long_df): - if using_polars: - import polars as pl - long_df = long_df.with_columns(pl.col('x').cast(pl.Utf8).alias('x_str')) - else: - long_df["x_str"] = long_df["x"].astype(str) + long_df["x_str"] = long_df["x"].astype(str) g = relplot(data=long_df, x="x", y="y", hue="x_str") points = g.ax.collections[0] @@ -652,17 +594,14 @@ def test_relplot_stringy_numerics(self, long_df, using_polars): assert not mask.any() assert_array_equal(xys, long_df[["x", "y"]]) - def test_relplot_legend(self, long_df, using_polars): + def test_relplot_legend(self, long_df): g = relplot(data=long_df, x="x", y="y") assert g._legend is None g = relplot(data=long_df, x="x", y="y", hue="a") texts = [t.get_text() for t in g._legend.texts] - if using_polars: - expected_texts = long_df.to_pandas()["a"].unique() - else: - expected_texts = long_df["a"].unique() + expected_texts = long_df["a"].unique() assert_array_equal(texts, expected_texts) g = relplot(data=long_df, x="x", y="y", hue="s", size="s") @@ -674,13 +613,7 @@ def test_relplot_legend(self, long_df, using_polars): palette = color_palette("deep", len(long_df["b"].unique())) a_like_b = dict(zip(long_df["a"].unique(), long_df["b"].unique())) - if using_polars: - import polars as pl - long_df = long_df.with_columns( - pl.col('a').map_dict(a_like_b).alias("a_like_b") - ) - else: - long_df["a_like_b"] = long_df["a"].map(a_like_b) + long_df["a_like_b"] = long_df["a"].map(a_like_b) g = relplot( data=long_df, x="x", y="y", hue="b", style="a_like_b", @@ -707,10 +640,7 @@ def test_relplot_unshared_axis_labels(self, long_df): for ax in g.axes[:, 1:].flat: assert ax.get_ylabel() == "" - def test_relplot_data(self, long_df, using_polars): - if using_polars: - # Test doesn't pass DataFrame - return + def test_relplot_data(self, long_df): g = relplot( data=long_df.to_dict(orient="list"), @@ -724,14 +654,11 @@ def test_relplot_data(self, long_df, using_polars): assert_array_equal(g.data["y_var"], long_df["y"]) assert_array_equal(g.data["_hue_"], long_df["a"]) - def test_facet_variable_collision(self, long_df, using_polars): + def test_facet_variable_collision(self, long_df): # https://github.com/mwaskom/seaborn/issues/2488 col_data = long_df["c"] - if using_polars: - long_df = long_df.with_columns(size=col_data) - else: - long_df = long_df.assign(size=col_data) + long_df = long_df.assign(size=col_data) g = relplot( data=long_df, @@ -961,7 +888,7 @@ def test_legend_data(self, long_df): handles, labels = ax.get_legend_handles_labels() assert labels == expected_levels - def test_plot(self, long_df, repeated_df, using_polars): + def test_plot(self, long_df, repeated_df): f, ax = plt.subplots() @@ -971,8 +898,6 @@ def test_plot(self, long_df, repeated_df, using_polars): sort=False, estimator=None ) - if using_polars: - long_df = long_df.to_pandas() p.plot(ax, {}) line, = ax.lines assert_array_equal(line.get_xdata(), long_df.x.to_numpy()) @@ -1181,30 +1106,19 @@ def test_non_aggregated_data(self): assert_array_equal(line.get_xdata(), x) assert_array_equal(line.get_ydata(), y) - def test_orient(self, long_df, using_polars): + def test_orient(self, long_df): - if using_polars: - long_df = long_df.drop("x").rename({"s": "y", "y": "x"}) - else: - long_df = long_df.drop("x", axis=1).rename(columns={"s": "y", "y": "x"}) + long_df = long_df.drop("x", axis=1).rename(columns={"s": "y", "y": "x"}) ax1 = plt.figure().subplots() lineplot(data=long_df, x="x", y="y", orient="y", errorbar="sd") assert len(ax1.lines) == len(ax1.collections) line, = ax1.lines - if using_polars: - expected = long_df.to_pandas().groupby("y").agg({"x": "mean"}).reset_index() - else: - expected = long_df.groupby("y").agg({"x": "mean"}).reset_index() + expected = long_df.groupby("y").agg({"x": "mean"}).reset_index() assert_array_almost_equal(line.get_xdata(), expected["x"]) assert_array_almost_equal(line.get_ydata(), expected["y"]) ribbon_y = ax1.collections[0].get_paths()[0].vertices[:, 1] - if using_polars: - assert_array_equal( - np.unique(ribbon_y), long_df.to_pandas()["y"].sort_values().unique() - ) - else: - assert_array_equal(np.unique(ribbon_y), long_df["y"].sort_values().unique()) + assert_array_equal(np.unique(ribbon_y), long_df["y"].sort_values().unique()) ax2 = plt.figure().subplots() lineplot( @@ -1351,13 +1265,13 @@ def test_lineplot_smoke( lineplot(x="x", y="y", data=long_df) ax.clear() - lineplot(x=long_df['x'], y=long_df['y']) + lineplot(x=long_df.x, y=long_df.y) ax.clear() - lineplot(x=long_df['x'], y="y", data=long_df) + lineplot(x=long_df.x, y="y", data=long_df) ax.clear() - lineplot(x="x", y=long_df['y'].to_numpy(), data=long_df) + lineplot(x="x", y=long_df.y.to_numpy(), data=long_df) ax.clear() lineplot(x="x", y="t", data=long_df) @@ -1617,7 +1531,7 @@ def test_legend_data(self, long_df): with pytest.raises(ValueError): p.add_legend_data(ax) - def test_plot(self, long_df, repeated_df, using_polars): + def test_plot(self, long_df, repeated_df): f, ax = plt.subplots() @@ -1693,11 +1607,7 @@ def test_plot(self, long_df, repeated_df, using_polars): assert same_color(points.get_facecolors(), expected_colors) assert self.paths_equal(points.get_paths(), expected_paths) - if using_polars: - import polars as pl - x_str = long_df["x"].cast(pl.Utf8) - else: - x_str = long_df["x"].astype(str) + x_str = long_df["x"].astype(str) p = _ScatterPlotter( data=long_df, variables=dict(x="x", y="y", hue=x_str), ) @@ -1912,13 +1822,13 @@ def test_scatterplot_smoke( scatterplot(x="x", y="y", data=long_df) ax.clear() - scatterplot(x=long_df['x'], y=long_df['y']) + scatterplot(x=long_df.x, y=long_df.y) ax.clear() - scatterplot(x=long_df['x'], y="y", data=long_df) + scatterplot(x=long_df.x, y="y", data=long_df) ax.clear() - scatterplot(x="x", y=long_df['y'].to_numpy(), data=long_df) + scatterplot(x="x", y=long_df.y.to_numpy(), data=long_df) ax.clear() scatterplot(x="x", y="y", hue="a", data=long_df) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 6740345cc9..f8fa444f22 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -499,22 +499,14 @@ def test_bivariate_error(self, x, y): class TestEstimateAggregator: - def test_func_estimator(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_func_estimator(self, long_df): func = np.mean agg = EstimateAggregator(func) out = agg(long_df, "x") - assert out["x"] == func(long_df["x"].to_numpy()) + assert out["x"] == func(long_df["x"]) - def test_name_estimator(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_name_estimator(self, long_df): agg = EstimateAggregator("mean") out = agg(long_df, "x") @@ -529,11 +521,7 @@ def func(x): out = agg(long_df, "x") assert out["x"] == func(long_df["x"]) - def test_se_errorbars(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_se_errorbars(self, long_df): agg = EstimateAggregator("mean", "se") out = agg(long_df, "x") @@ -547,11 +535,7 @@ def test_se_errorbars(self, long_df, using_polars): assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].sem()) assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].sem()) - def test_sd_errorbars(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_sd_errorbars(self, long_df): agg = EstimateAggregator("mean", "sd") out = agg(long_df, "x") @@ -565,11 +549,7 @@ def test_sd_errorbars(self, long_df, using_polars): assert out["xmin"] == (long_df["x"].mean() - 2 * long_df["x"].std()) assert out["xmax"] == (long_df["x"].mean() + 2 * long_df["x"].std()) - def test_pi_errorbars(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_pi_errorbars(self, long_df): agg = EstimateAggregator("mean", "pi") out = agg(long_df, "y") @@ -581,11 +561,7 @@ def test_pi_errorbars(self, long_df, using_polars): assert out["ymin"] == np.percentile(long_df["y"], 25) assert out["ymax"] == np.percentile(long_df["y"], 75) - def test_ci_errorbars(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_ci_errorbars(self, long_df): agg = EstimateAggregator("mean", "ci", n_boot=100000, seed=0) out = agg(long_df, "y") @@ -610,11 +586,7 @@ def test_ci_errorbars(self, long_df, using_polars): out_test = agg_ref(long_df, "y") assert_array_equal(out_orig, out_test) - def test_custom_errorbars(self, long_df, using_polars): - if using_polars: - # Testing internal class which is reached when - # data has already been converted to pandas - return + def test_custom_errorbars(self, long_df): f = lambda x: (x.min(), x.max()) # noqa: E731 agg = EstimateAggregator("mean", f) From 8117fe64c78933f5dc470399ee6d5189f0911d19 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 21 May 2023 17:13:49 +0100 Subject: [PATCH 29/32] convert as soon as possible --- seaborn/_core/data.py | 3 ++- seaborn/_core/plot.py | 10 +++++----- seaborn/_oldcore.py | 3 --- seaborn/axisgrid.py | 6 ++---- seaborn/distributions.py | 1 + seaborn/regression.py | 1 + seaborn/relational.py | 2 ++ 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/seaborn/_core/data.py b/seaborn/_core/data.py index 535fafe83f..4c0e11beb2 100644 --- a/seaborn/_core/data.py +++ b/seaborn/_core/data.py @@ -10,6 +10,7 @@ from pandas import DataFrame from seaborn._core.typing import DataSource, VariableSpec, ColumnName +from seaborn.utils import try_convert_to_pandas class PlotData: @@ -51,7 +52,7 @@ def __init__( data: DataSource, variables: dict[str, VariableSpec], ): - + data = try_convert_to_pandas(data) frame, names, ids = self._assign_variables(data, variables) self.frame = frame diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index 2d25d44289..0b529ba787 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -309,6 +309,7 @@ def __init__( if args: data, variables = self._resolve_positionals(args, data, variables) + data = try_convert_to_pandas(data) unknown = [x for x in variables if x not in PROPERTIES] if unknown: @@ -347,14 +348,13 @@ def _resolve_positionals( # TODO need some clearer way to differentiate data / vector here # (There might be an abstract DataFrame class to use here?) - if isinstance(args[0], (abc.Mapping, pd.DataFrame)): + if ( + isinstance(args[0], (abc.Mapping, pd.DataFrame)) + or hasattr(args[0], '__dataframe__') + ): if data is not None: raise TypeError("`data` given by both name and position.") data, args = args[0], args[1:] - elif hasattr(args[0], '__dataframe__'): - if data is not None: - raise TypeError("`data` given by both name and position.") - data, args = try_convert_to_pandas(args[0]), args[1:] if len(args) == 2: x, y = args diff --git a/seaborn/_oldcore.py b/seaborn/_oldcore.py index 8bb0e97cb3..f427bd3f4b 100644 --- a/seaborn/_oldcore.py +++ b/seaborn/_oldcore.py @@ -23,7 +23,6 @@ desaturate, get_color_cycle, remove_na, - try_convert_to_pandas, ) @@ -895,8 +894,6 @@ def _assign_variables_longform(self, data=None, **kwargs): # Data is optional; all variables can be defined as vectors if data is None: data = {} - else: - data = try_convert_to_pandas(data) # TODO should we try a data.to_dict() or similar here to more # generally accept objects with that interface? diff --git a/seaborn/axisgrid.py b/seaborn/axisgrid.py index bf41fbb69d..e30b6b0268 100644 --- a/seaborn/axisgrid.py +++ b/seaborn/axisgrid.py @@ -372,8 +372,6 @@ def __init__( margin_titles=False, xlim=None, ylim=None, subplot_kws=None, gridspec_kws=None, ): - data = utils.try_convert_to_pandas(data) - super().__init__() # Determine the hue facet layer information @@ -1240,8 +1238,6 @@ def __init__( """ - data = utils.try_convert_to_pandas(data) - super().__init__() # Sort out the variables that define the grid @@ -2090,6 +2086,8 @@ def pairplot( # Avoid circular import from .distributions import histplot, kdeplot + data = utils.try_convert_to_pandas(data) + # Handle deprecations if size is not None: height = size diff --git a/seaborn/distributions.py b/seaborn/distributions.py index a58129042e..febf859624 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -2125,6 +2125,7 @@ def displot( **kwargs, ): + data = try_convert_to_pandas(data) p = _DistributionFacetPlotter( data=data, variables=_DistributionFacetPlotter.get_semantics(locals()) diff --git a/seaborn/regression.py b/seaborn/regression.py index c6b81a1727..8b03c1c9bb 100644 --- a/seaborn/regression.py +++ b/seaborn/regression.py @@ -575,6 +575,7 @@ def lmplot( truncate=True, x_jitter=None, y_jitter=None, scatter_kws=None, line_kws=None, facet_kws=None, ): + data = utils.try_convert_to_pandas(data) if facet_kws is None: facet_kws = {} diff --git a/seaborn/relational.py b/seaborn/relational.py index 8bfc130759..5a2f83626d 100644 --- a/seaborn/relational.py +++ b/seaborn/relational.py @@ -13,6 +13,7 @@ adjust_legend_subtitles, _default_color, _deprecate_ci, + try_convert_to_pandas, ) from ._statistics import EstimateAggregator from .axisgrid import FacetGrid, _facet_docs @@ -799,6 +800,7 @@ def relplot( legend="auto", kind="scatter", height=5, aspect=1, facet_kws=None, **kwargs ): + data = try_convert_to_pandas(data) if kind == "scatter": plotter = _ScatterPlotter From 3812e5f43ceed54004c04e72b962cf98f2e2d86d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 21 May 2023 17:18:27 +0100 Subject: [PATCH 30/32] try convert in facetgrid --- seaborn/axisgrid.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seaborn/axisgrid.py b/seaborn/axisgrid.py index e30b6b0268..e0f1aa1f09 100644 --- a/seaborn/axisgrid.py +++ b/seaborn/axisgrid.py @@ -372,6 +372,7 @@ def __init__( margin_titles=False, xlim=None, ylim=None, subplot_kws=None, gridspec_kws=None, ): + data = utils.try_convert_to_pandas(data) super().__init__() # Determine the hue facet layer information From 6494ef42c668dd5836f07b118cf991fd10bd8e0b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 21 May 2023 17:20:14 +0100 Subject: [PATCH 31/32] convert in pairgrid --- seaborn/axisgrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seaborn/axisgrid.py b/seaborn/axisgrid.py index e0f1aa1f09..9e58641699 100644 --- a/seaborn/axisgrid.py +++ b/seaborn/axisgrid.py @@ -1238,7 +1238,7 @@ def __init__( .. include:: ../docstrings/PairGrid.rst """ - + data = utils.try_convert_to_pandas(data) super().__init__() # Sort out the variables that define the grid From 991c34387610fc0b742d8ee524ab0c204043bc3b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 21 May 2023 19:27:43 +0100 Subject: [PATCH 32/32] remove separate workflow; --- .github/workflows/ci.yaml | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2c8b1bb083..8493ea5be2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -90,43 +90,6 @@ jobs: uses: codecov/codecov-action@v3 if: ${{ success() }} - run-interchange-protocol-tests: - runs-on: ubuntu-latest - - env: - SEABORN_TEST_INTERCHANGE_PROTOCOL: 1 - - strategy: - matrix: - python: ["3.10"] - - steps: - - uses: actions/checkout@v3 - - - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - - name: Install polars and pandas nightly - run: | - pip install --upgrade pip wheel - # Install pandas nightly (necessary for interchanging - remove once pandas 2.0.2 is released) - pip install --upgrade numpy - pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas - pip install --upgrade polars pyarrow - - - name: Install seaborn - run: | - pip install .[dev] - - - name: Run tests - run: make test - - - name: Upload coverage - uses: codecov/codecov-action@v3 - if: ${{ success() }} - lint: runs-on: ubuntu-latest strategy: