Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade python to 3.7/3.8 and scikit-learn to v1.0.0 #836

Merged
merged 7 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ unsafe.*
/python/**/*.pyc
.sbtopts
.DS_Store
.bsp/
27 changes: 20 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# Use container-based infrastructure
os: linux
dist: xenial
dist: focal

# Set default python env to be 3.7
# because the xgboost-spark 1.5.1 library when running training code, it will
# Set default python env
# because the xgboost-spark library when running training code, it will
# run a python script (in RabitTracker) which only support python3
env:
global:
- PATH=/opt/python/3.6.7/bin:$PATH
- PATH=/opt/python/3.8.15/bin:$PATH

addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-4.8
- g++-4.8
- gcc
- g++

services:
- docker
Expand Down Expand Up @@ -52,7 +52,20 @@ jobs:
--create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar
script:
- make py37_test
- make test_python37

- name: "Python 3.8 tests"
language: python
python: 3.8.15
install:
- pip install tox
before_script:
- >
curl
--create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar
https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar
script:
- make test_python38

- if: (NOT type IN (pull_request)) AND (branch = master)
stage: "Deploy"
Expand Down
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@ test_xgboost_runtime:
test_xgboost_spark:
$(SBT) "+ mleap-xgboost-spark/test"

.PHONY: py37_test
py37_test:
.PHONY: test_python37
test_python37:
source scripts/scala_classpath_for_python.sh && make -C python py37_test

.PHONY: test_python38
test_python38:
source scripts/scala_classpath_for_python.sh && make -C python py38_test

.PHONY: test
test: test_executor test_benchmark test_xgboost_runtime test_xgboost_spark test_root_sbt_project py37_test
test: test_executor test_benchmark test_xgboost_runtime test_xgboost_spark test_root_sbt_project test_python
@echo "All tests run successfully"
9 changes: 6 additions & 3 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ $(error SCALA_CLASS_PATH for python tests is not set. Please check out \
the top-level Makefile on how to source scala_classpath_for_python.sh)
endif

.PHONY: help env clean py37_test test build upload
.PHONY: help env clean py37_test py38_test test build upload

help:
@echo " env create a development environment using virtualenv"
Expand All @@ -26,9 +26,12 @@ clean:
find . -name '__pycache__' | xargs -r rm -rf

py37_test:
tox -e py37 -v
tox -e py37

test: py37_test
py38_test:
tox -e py38

test: py37_test py38_test
@echo "All python tests completed"

build: clean
Expand Down
12 changes: 10 additions & 2 deletions python/mleap/sklearn/extensions/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pandas as pd
from mleap.sklearn.preprocessing.data import ImputerSerializer, FeatureExtractor
from sklearn.impute import SimpleImputer as SKLearnImputer
from sklearn.preprocessing.data import BaseEstimator, TransformerMixin
from sklearn.base import BaseEstimator, TransformerMixin


class DefineEstimator(BaseEstimator, TransformerMixin):
Expand Down Expand Up @@ -75,7 +75,15 @@ def __init__(self, input_features, output_features,
self.feature_extractor = FeatureExtractor(input_scalars=[input_features],
output_vector='extracted_' + output_features,
output_vector_items=[output_features])
SKLearnImputer.__init__(self, missing_values, strategy, fill_value, verbose, copy, add_indicator)
SKLearnImputer.__init__(
self,
missing_values=missing_values,
strategy=strategy,
fill_value=fill_value,
verbose=verbose,
copy=copy,
add_indicator=add_indicator,
)

def fit(self, X, y=None):
super(Imputer, self).fit(self.feature_extractor.transform(X))
Expand Down
24 changes: 2 additions & 22 deletions python/mleap/sklearn/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,9 @@
import pandas as pd
from mleap.bundle.serialize import MLeapSerializer, MLeapDeserializer, Vector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Binarizer, PolynomialFeatures
from sklearn.preprocessing.data import BaseEstimator, TransformerMixin
from sklearn.preprocessing.data import OneHotEncoder
from sklearn.preprocessing.label import LabelEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Binarizer, PolynomialFeatures, OneHotEncoder, LabelEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import column_or_1d
from sklearn.utils.fixes import np_version
from sklearn.utils.validation import check_is_fitted


Expand All @@ -49,20 +46,6 @@ def __init__(self):
ops = ops()


def _check_numpy_unicode_bug(labels):
"""Check that user is not subject to an old numpy bug

Fixed in master before 1.7.0:

https://github.com/numpy/numpy/pull/243

"""
if np_version[:3] < (1, 7, 0) and labels.dtype.kind == 'U':
raise RuntimeError("NumPy < 1.7.0 does not implement searchsorted"
" on unicode data correctly. Please upgrade"
" NumPy to use LabelEncoder with unicode inputs.")


def serialize_to_bundle(self, path, model_name):
serializer = SimpleSerializer()
return serializer.serialize_to_bundle(self, path, model_name)
Expand Down Expand Up @@ -373,7 +356,6 @@ def fit(self, X):
self : returns an instance of self.
"""
X = column_or_1d(X, warn=True)
_check_numpy_unicode_bug(X)
self.classes_ = np.unique(X)
return self

Expand All @@ -390,7 +372,6 @@ def fit_transform(self, X, y=None, **fit_params):
y : array-like of shape [n_samples]
"""
y = column_or_1d(X, warn=True)
_check_numpy_unicode_bug(X)
self.classes_, X = np.unique(X, return_inverse=True)
return X

Expand All @@ -410,7 +391,6 @@ def transform(self, y):
y = column_or_1d(y, warn=True)

classes = np.unique(y)
_check_numpy_unicode_bug(classes)
if len(np.intersect1d(classes, self.classes_)) < len(classes):
diff = np.setdiff1d(classes, self.classes_)
raise ValueError("y contains new labels: %s" % str(diff))
Expand Down
3 changes: 1 addition & 2 deletions python/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
-r requirements.txt
coverage<5.0.0
coverage
ipdb
nose
nose-exclude>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ numpy>=1.8.2
six>=1.10.0
scipy>=0.13.0b1
pandas>=1.0.5
scikit-learn>=0.22.0,<0.23.0
scikit-learn~=1.0.0
gensim<4.1.0
urllib3==1.26.5
16 changes: 5 additions & 11 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@
import sys
from setuptools import setup, find_packages

if sys.version_info < (2, 7):
print("Python versions prior to 2.7 are not supported for pip installed MLeap.",
file=sys.stderr)
exit(-1)

try:
exec(open('mleap/version.py').read())
except IOError:
Expand All @@ -35,14 +30,12 @@

VERSION = version

numpy_version = "1.8.2"

REQUIRED_PACKAGES = [
'numpy >= %s' % numpy_version,
'six >= 1.10.0',
'numpy>=1.8.2',
'six>=1.10.0',
'scipy>=0.13.0b1',
'pandas>=0.18.1',
'scikit-learn>=0.22.0,<0.23.0',
'scikit-learn~=1.0.0',
]

TESTS_REQUIRED_PACKAGES = [
Expand All @@ -59,6 +52,7 @@
zip_safe=False,
install_requires=REQUIRED_PACKAGES,
tests_require=TESTS_REQUIRED_PACKAGES,
python_requires=">=3.7",
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
Expand All @@ -69,7 +63,7 @@
'Topic :: Internet',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
],
)
8 changes: 4 additions & 4 deletions python/tests/sklearn/preprocessing/data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,15 @@ def test_min_max_scaler_deserializer(self):
def test_min_max_scaler_multi_deserializer(self):

extract_features = ['a', 'b']
feature_extractor = FeatureExtractor(input_scalars=['a', 'b'],
feature_extractor = FeatureExtractor(input_scalars=extract_features,
output_vector='extracted_multi_outputs',
output_vector_items=["{}_out".format(x) for x in extract_features])

scaler = MinMaxScaler()
scaler.mlinit(prior_tf=feature_extractor,
output_features=['a_scaled', 'b_scaled'])

scaler.fit(self.df[['a']])
scaler.fit(self.df[extract_features])

scaler.serialize_to_bundle(self.tmp_dir, scaler.name)

Expand All @@ -295,8 +295,8 @@ def test_min_max_scaler_multi_deserializer(self):
min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name)

# Transform some sample data
res_a = scaler.transform(self.df[['a', 'b']])
res_b = min_max_scaler_tf.transform(self.df[['a', 'b']])
res_a = scaler.transform(self.df[extract_features])
res_b = min_max_scaler_tf.transform(self.df[extract_features])

self.assertEqual(res_a[0][0], res_b[0][0])
self.assertEqual(res_a[0][1], res_b[0][1])
Expand Down
6 changes: 4 additions & 2 deletions python/tox.ini
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[tox]
envlist = py37
envlist = py37,py38
skipdist = true

[testenv]
passenv = SCALA_CLASS_PATH
deps = -rrequirements-dev.txt
deps =
-rrequirements-dev.txt
-rrequirements.txt
commands =
nosetests --with-coverage \
--cover-package=mleap \
Expand Down