Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Introducing test file for polars support for estimators #370

Merged
merged 27 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
539fd51
create test_polars.py file
julian-fong May 26, 2024
4712da9
updates
julian-fong May 28, 2024
fe5333b
initial commit
julian-fong May 30, 2024
5f578fd
added polars eager table to allowed mtypes in base regressor
julian-fong May 30, 2024
cf8a0d5
added draft version of testing fit and predict in polars dataframe
julian-fong May 30, 2024
9357486
fixed to use skpro check soft dependencies
julian-fong May 30, 2024
1a23ee0
updated tests
julian-fong Jun 2, 2024
89079f6
added test for predict_quantiles
julian-fong Jun 2, 2024
02f699f
fixed naming of pandas datafarmes
julian-fong Jun 2, 2024
c49ed0e
Merge branch 'sktime:main' into polars_support
julian-fong Jun 2, 2024
be084ef
added test for check_polars_table
julian-fong Jun 3, 2024
5c3697e
updates to pr
julian-fong Jun 7, 2024
32e700a
updated estimator to be a pytest fixture for one estimator
julian-fong Jun 10, 2024
0470817
Merge branch 'sktime:main' into polars_support
julian-fong Jun 11, 2024
497e1ef
bug fix
julian-fong Jun 11, 2024
8d3b541
update
julian-fong Jun 11, 2024
782e714
update
julian-fong Jun 11, 2024
39590f7
updates
julian-fong Jun 11, 2024
20643c5
updates
julian-fong Jun 11, 2024
05e96bf
updates
julian-fong Jun 11, 2024
ad697a3
updates
julian-fong Jun 11, 2024
00ac2bf
updates
julian-fong Jun 11, 2024
78d5d46
Merge branch 'sktime:main' into polars_support
julian-fong Jun 13, 2024
f464b7f
Merge branch 'sktime:main' into polars_support
julian-fong Jun 13, 2024
5eba103
Merge branch 'sktime:main' into polars_support
julian-fong Jun 14, 2024
227d623
updates to remove unnecessary skipifs and changed the estimator used …
julian-fong Jun 14, 2024
0b51616
Merge branch 'sktime:main' into polars_support
julian-fong Jun 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion skpro/regression/base/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@

from skpro.base import BaseEstimator
from skpro.datatypes import check_is_error_msg, check_is_mtype, convert
from skpro.utils.validation._dependencies import _check_estimator_deps
from skpro.utils.validation._dependencies import (
_check_estimator_deps,
_check_soft_dependencies,
)

# allowed input mtypes
# include mtypes that are core dependencies
ALLOWED_MTYPES = [
"pd_DataFrame_Table",
"pd_Series_Table",
"numpy1D",
"numpy2D",
]
# include polars eager table if the soft dependency is installed
if _check_soft_dependencies(["polars", "pyarrow"], severity="none"):
ALLOWED_MTYPES.append("polars_eager_table")


class BaseProbaRegressor(BaseEstimator):
Expand Down
146 changes: 146 additions & 0 deletions skpro/tests/test_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""Test file for polars dataframes"""

import pandas as pd
import pytest
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

from skpro.utils.validation._dependencies import _check_soft_dependencies

if _check_soft_dependencies(["polars", "pyarrow"], severity="none"):
import polars as pl

from skpro.datatypes._table._check import check_polars_table
from skpro.datatypes._table._convert import convert_pandas_to_polars_eager

TEST_ALPHAS = [0.05, 0.1, 0.25]


@pytest.fixture
def polars_load_diabetes_pandas():
X, y = load_diabetes(return_X_y=True, as_frame=True)
X = X.iloc[:75]
y = y.iloc[:75]

# typically y is returned as a pd.Series to we call y as a Dataframe here
y = pd.DataFrame(y)

X_train, X_test, y_train, _ = train_test_split(
X, y, test_size=0.33, random_state=42
)
return [X_train, X_test, y_train]


@pytest.fixture
def estimator():
from sklearn.linear_model import LinearRegression

from skpro.regression.residual import ResidualDouble

# refactor to use ResidualDouble with Linear Regression
_estimator = ResidualDouble(LinearRegression())
return _estimator


@pytest.fixture
def polars_load_diabetes_polars(polars_load_diabetes_pandas):
X_train, X_test, y_train = polars_load_diabetes_pandas
X_train_pl = convert_pandas_to_polars_eager(X_train)
X_test_pl = convert_pandas_to_polars_eager(X_test)
y_train_pl = convert_pandas_to_polars_eager(y_train)

return [X_train_pl, X_test_pl, y_train_pl]


@pytest.mark.skipif(
not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
reason="skip test if polars/pyarrow is not installed in environment",
)
def test_polars_eager_conversion_methods(
polars_load_diabetes_pandas, polars_load_diabetes_polars
):
"""
Tests to ensure that given a pandas dataframe, the conversion methods
convert properly to polars dataframe
"""

X_train, X_test, y_train = polars_load_diabetes_pandas
X_train_pl, X_test_pl, y_train_pl = polars_load_diabetes_polars

assert check_polars_table(X_train_pl)
assert check_polars_table(X_test_pl)
assert check_polars_table(y_train_pl)
assert (X_train.values == X_train_pl.to_numpy()).all()
assert (X_test.values == X_test_pl.to_numpy()).all()
assert (y_train.values == y_train_pl.to_numpy()).all()


@pytest.mark.skipif(
not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
reason="skip test if polars/pyarrow is not installed in environment",
)
def test_polars_eager_regressor_in_fit_predict(
estimator, polars_load_diabetes_pandas, polars_load_diabetes_polars
):
"""
Tests to ensure that given a polars dataframe, the regression estimator
can fit and predict and return the correct set of outputs

Parameters
----------

estimator: a given regression estimator

"""
# TODO - expand estimator to include a list of regression models to test
# create a copy of estimator to run further checks
estimator_copy = estimator
X_train, X_test, y_train = polars_load_diabetes_pandas
X_train_pl, X_test_pl, y_train_pl = polars_load_diabetes_polars

estimator.fit(X_train_pl, y_train_pl)
y_pred = estimator.predict(X_test_pl)

assert isinstance(y_pred, pl.DataFrame)
assert y_pred.columns == y_train_pl.columns
assert y_pred.shape[0] == X_test_pl.shape[0]

# code to ensure prediction values match up correctly
estimator_copy.fit(X_train, y_train)
y_pred_pd = estimator_copy.predict(X_test)
assert (y_pred_pd.values == y_pred.to_numpy()).all()


@pytest.mark.skipif(
not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
reason="skip test if polars/pyarrow is not installed in environment",
)
def test_polars_eager_regressor_in_predict_interval(
estimator, polars_load_diabetes_polars
):
X_train_pl, X_test_pl, y_train_pl = polars_load_diabetes_polars
# TODO - expand estimator to include a list of regression models to test
estimator.fit(X_train_pl, y_train_pl)
y_pred_interval = estimator.predict_interval(X_test_pl)

assert isinstance(y_pred_interval, pd.DataFrame)
assert y_pred_interval.columns[0] == ("target", 0.9, "lower")
assert y_pred_interval.columns[1] == ("target", 0.9, "upper")


@pytest.mark.skipif(
not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
reason="skip test if polars/pyarrow is not installed in environment",
)
def test_polars_eager_regressor_in_predict_quantiles(
estimator, polars_load_diabetes_polars
):
X_train_pl, X_test_pl, y_train_pl = polars_load_diabetes_polars

estimator.fit(X_train_pl, y_train_pl)
y_pred_quantile = estimator.predict_quantiles(X_test_pl, alpha=TEST_ALPHAS)

assert isinstance(y_pred_quantile, pd.DataFrame)
assert y_pred_quantile.columns[0] == ("target", 0.05)
assert y_pred_quantile.columns[1] == ("target", 0.1)
assert y_pred_quantile.columns[2] == ("target", 0.25)
Loading