From df305543fb582bbd7ac86ce7f96b04ef34aee26f Mon Sep 17 00:00:00 2001 From: Jason Sleight Date: Tue, 20 Dec 2022 15:28:06 -0500 Subject: [PATCH 1/5] Upgrade python to 3.7/3.8 and scikit-learn to v1.0.0 --- .gitignore | 1 + .travis.yml | 23 ++++++++++++++---- Makefile | 8 +++---- python/Makefile | 9 ++++--- python/mleap/sklearn/extensions/data.py | 12 ++++++++-- python/mleap/sklearn/preprocessing/data.py | 24 ++----------------- python/requirements-dev.txt | 3 +-- python/requirements.txt | 2 +- python/setup.py | 16 ++++--------- .../tests/sklearn/preprocessing/data_test.py | 8 +++---- python/tox.ini | 6 +++-- 11 files changed, 56 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index c0701d5b9..92ad28cbf 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ unsafe.* /python/**/*.pyc .sbtopts .DS_Store +.bsp/ diff --git a/.travis.yml b/.travis.yml index 6a7524d6a..0eb4131c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,13 @@ # Use container-based infrastructure os: linux -dist: xenial +dist: focal -# Set default python env to be 3.7 -# because the xgboost-spark 1.5.1 library when running training code, it will +# Set default python env +# because the xgboost-spark library when running training code, it will # run a python script (in RabitTracker) which only support python3 env: global: - - PATH=/opt/python/3.6.7/bin:$PATH + - PATH=/opt/python/3.8.16/bin:$PATH addons: apt: @@ -52,7 +52,20 @@ jobs: --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar script: - - make py37_test + - make -C python py37_test + + - name: "Python 3.8 tests" + language: python + python: 3.8.16 + install: + - pip install tox + before_script: + - > + curl + --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar + https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar + script: + - make -C python py38_test - if: (NOT type IN (pull_request)) AND (branch = master) stage: "Deploy" diff --git a/Makefile b/Makefile index ecc19abcc..69415e6c6 100644 --- a/Makefile +++ b/Makefile @@ -23,10 +23,10 @@ test_xgboost_runtime: test_xgboost_spark: $(SBT) "+ mleap-xgboost-spark/test" -.PHONY: py37_test -py37_test: - source scripts/scala_classpath_for_python.sh && make -C python py37_test +.PHONY: test_python +test_python: + source scripts/scala_classpath_for_python.sh && make -C python test .PHONY: test -test: test_executor test_benchmark test_xgboost_runtime test_xgboost_spark test_root_sbt_project py37_test +test: test_executor test_benchmark test_xgboost_runtime test_xgboost_spark test_root_sbt_project test_python @echo "All tests run successfully" diff --git a/python/Makefile b/python/Makefile index 9e6f2059c..a0077f34e 100644 --- a/python/Makefile +++ b/python/Makefile @@ -5,7 +5,7 @@ $(error SCALA_CLASS_PATH for python tests is not set. Please check out \ the top-level Makefile on how to source scala_classpath_for_python.sh) endif -.PHONY: help env clean py37_test test build upload +.PHONY: help env clean py37_test py38_test test build upload help: @echo " env create a development environment using virtualenv" @@ -26,9 +26,12 @@ clean: find . -name '__pycache__' | xargs -r rm -rf py37_test: - tox -e py37 -v + tox -e py37 -test: py37_test +py38_test: + tox -e py38 + +test: py37_test py38_test @echo "All python tests completed" build: clean diff --git a/python/mleap/sklearn/extensions/data.py b/python/mleap/sklearn/extensions/data.py index 7f32c6632..c86c37ba3 100644 --- a/python/mleap/sklearn/extensions/data.py +++ b/python/mleap/sklearn/extensions/data.py @@ -21,7 +21,7 @@ import pandas as pd from mleap.sklearn.preprocessing.data import ImputerSerializer, FeatureExtractor from sklearn.impute import SimpleImputer as SKLearnImputer -from sklearn.preprocessing.data import BaseEstimator, TransformerMixin +from sklearn.base import BaseEstimator, TransformerMixin class DefineEstimator(BaseEstimator, TransformerMixin): @@ -75,7 +75,15 @@ def __init__(self, input_features, output_features, self.feature_extractor = FeatureExtractor(input_scalars=[input_features], output_vector='extracted_' + output_features, output_vector_items=[output_features]) - SKLearnImputer.__init__(self, missing_values, strategy, fill_value, verbose, copy, add_indicator) + SKLearnImputer.__init__( + self, + missing_values=missing_values, + strategy=strategy, + fill_value=fill_value, + verbose=verbose, + copy=copy, + add_indicator=add_indicator, + ) def fit(self, X, y=None): super(Imputer, self).fit(self.feature_extractor.transform(X)) diff --git a/python/mleap/sklearn/preprocessing/data.py b/python/mleap/sklearn/preprocessing/data.py index 85c2cdfb1..dec961e57 100644 --- a/python/mleap/sklearn/preprocessing/data.py +++ b/python/mleap/sklearn/preprocessing/data.py @@ -25,12 +25,9 @@ import pandas as pd from mleap.bundle.serialize import MLeapSerializer, MLeapDeserializer, Vector from sklearn.impute import SimpleImputer -from sklearn.preprocessing import StandardScaler, MinMaxScaler, Binarizer, PolynomialFeatures -from sklearn.preprocessing.data import BaseEstimator, TransformerMixin -from sklearn.preprocessing.data import OneHotEncoder -from sklearn.preprocessing.label import LabelEncoder +from sklearn.preprocessing import StandardScaler, MinMaxScaler, Binarizer, PolynomialFeatures, OneHotEncoder, LabelEncoder +from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import column_or_1d -from sklearn.utils.fixes import np_version from sklearn.utils.validation import check_is_fitted @@ -49,20 +46,6 @@ def __init__(self): ops = ops() -def _check_numpy_unicode_bug(labels): - """Check that user is not subject to an old numpy bug - - Fixed in master before 1.7.0: - - https://github.com/numpy/numpy/pull/243 - - """ - if np_version[:3] < (1, 7, 0) and labels.dtype.kind == 'U': - raise RuntimeError("NumPy < 1.7.0 does not implement searchsorted" - " on unicode data correctly. Please upgrade" - " NumPy to use LabelEncoder with unicode inputs.") - - def serialize_to_bundle(self, path, model_name): serializer = SimpleSerializer() return serializer.serialize_to_bundle(self, path, model_name) @@ -373,7 +356,6 @@ def fit(self, X): self : returns an instance of self. """ X = column_or_1d(X, warn=True) - _check_numpy_unicode_bug(X) self.classes_ = np.unique(X) return self @@ -390,7 +372,6 @@ def fit_transform(self, X, y=None, **fit_params): y : array-like of shape [n_samples] """ y = column_or_1d(X, warn=True) - _check_numpy_unicode_bug(X) self.classes_, X = np.unique(X, return_inverse=True) return X @@ -410,7 +391,6 @@ def transform(self, y): y = column_or_1d(y, warn=True) classes = np.unique(y) - _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt index 08edaf16d..3a8c1dc6d 100644 --- a/python/requirements-dev.txt +++ b/python/requirements-dev.txt @@ -1,5 +1,4 @@ --r requirements.txt -coverage<5.0.0 +coverage ipdb nose nose-exclude>=0.5.0 diff --git a/python/requirements.txt b/python/requirements.txt index 2e846a6ed..d9990b75d 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -2,6 +2,6 @@ numpy>=1.8.2 six>=1.10.0 scipy>=0.13.0b1 pandas>=1.0.5 -scikit-learn>=0.22.0,<0.23.0 +scikit-learn~=1.0.0 gensim<4.1.0 urllib3==1.26.5 diff --git a/python/setup.py b/python/setup.py index e7b1bcabe..586a5f683 100755 --- a/python/setup.py +++ b/python/setup.py @@ -21,11 +21,6 @@ import sys from setuptools import setup, find_packages -if sys.version_info < (2, 7): - print("Python versions prior to 2.7 are not supported for pip installed MLeap.", - file=sys.stderr) - exit(-1) - try: exec(open('mleap/version.py').read()) except IOError: @@ -35,14 +30,12 @@ VERSION = version -numpy_version = "1.8.2" - REQUIRED_PACKAGES = [ - 'numpy >= %s' % numpy_version, - 'six >= 1.10.0', + 'numpy>=1.8.2', + 'six>=1.10.0', 'scipy>=0.13.0b1', 'pandas>=0.18.1', - 'scikit-learn>=0.22.0,<0.23.0', + 'scikit-learn~=1.0.0', ] TESTS_REQUIRED_PACKAGES = [ @@ -59,6 +52,7 @@ zip_safe=False, install_requires=REQUIRED_PACKAGES, tests_require=TESTS_REQUIRED_PACKAGES, + python_requires=">=3.7", classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', @@ -69,7 +63,7 @@ 'Topic :: Internet', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], ) diff --git a/python/tests/sklearn/preprocessing/data_test.py b/python/tests/sklearn/preprocessing/data_test.py index b8dccfa70..2a9abbe16 100644 --- a/python/tests/sklearn/preprocessing/data_test.py +++ b/python/tests/sklearn/preprocessing/data_test.py @@ -277,7 +277,7 @@ def test_min_max_scaler_deserializer(self): def test_min_max_scaler_multi_deserializer(self): extract_features = ['a', 'b'] - feature_extractor = FeatureExtractor(input_scalars=['a', 'b'], + feature_extractor = FeatureExtractor(input_scalars=extract_features, output_vector='extracted_multi_outputs', output_vector_items=["{}_out".format(x) for x in extract_features]) @@ -285,7 +285,7 @@ def test_min_max_scaler_multi_deserializer(self): scaler.mlinit(prior_tf=feature_extractor, output_features=['a_scaled', 'b_scaled']) - scaler.fit(self.df[['a']]) + scaler.fit(self.df[extract_features]) scaler.serialize_to_bundle(self.tmp_dir, scaler.name) @@ -295,8 +295,8 @@ def test_min_max_scaler_multi_deserializer(self): min_max_scaler_tf.deserialize_from_bundle(self.tmp_dir, node_name) # Transform some sample data - res_a = scaler.transform(self.df[['a', 'b']]) - res_b = min_max_scaler_tf.transform(self.df[['a', 'b']]) + res_a = scaler.transform(self.df[extract_features]) + res_b = min_max_scaler_tf.transform(self.df[extract_features]) self.assertEqual(res_a[0][0], res_b[0][0]) self.assertEqual(res_a[0][1], res_b[0][1]) diff --git a/python/tox.ini b/python/tox.ini index fbe1ac8a1..eaf3ae872 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -1,10 +1,12 @@ [tox] -envlist = py37 +envlist = py37,py38 skipdist = true [testenv] passenv = SCALA_CLASS_PATH -deps = -rrequirements-dev.txt +deps = + -rrequirements-dev.txt + -rrequirements.txt commands = nosetests --with-coverage \ --cover-package=mleap \ From 380fd178118f4283569935eac319a8d11854a2c1 Mon Sep 17 00:00:00 2001 From: Jason Sleight Date: Tue, 20 Dec 2022 15:53:26 -0500 Subject: [PATCH 2/5] Upgrade travis build environment versions. --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0eb4131c0..98a3b39f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,15 +7,15 @@ dist: focal # run a python script (in RabitTracker) which only support python3 env: global: - - PATH=/opt/python/3.8.16/bin:$PATH + - PATH=/opt/python/3.8.15/bin:$PATH addons: apt: sources: - ubuntu-toolchain-r-test packages: - - gcc-4.8 - - g++-4.8 + - gcc-9.3 + - g++-9.3 services: - docker @@ -56,7 +56,7 @@ jobs: - name: "Python 3.8 tests" language: python - python: 3.8.16 + python: 3.8.15 install: - pip install tox before_script: From c901158b4f64034874361b5b1bf302f709ac7c7a Mon Sep 17 00:00:00 2001 From: Jason Sleight Date: Tue, 20 Dec 2022 15:57:44 -0500 Subject: [PATCH 3/5] Another attempt at travis gcc configs --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98a3b39f6..8db40fa19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,8 @@ addons: sources: - ubuntu-toolchain-r-test packages: - - gcc-9.3 - - g++-9.3 + - gcc + - g++ services: - docker From e99a138ee8f2b41bb0f2a1959bacb31493f475ad Mon Sep 17 00:00:00 2001 From: Jason Sleight Date: Tue, 20 Dec 2022 16:14:11 -0500 Subject: [PATCH 4/5] We will make travis work eventually. --- .travis.yml | 4 ++-- Makefile | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8db40fa19..0cfe0f49d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,7 +52,7 @@ jobs: --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar script: - - make -C python py37_test + - make -C python test_python37 - name: "Python 3.8 tests" language: python @@ -65,7 +65,7 @@ jobs: --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar script: - - make -C python py38_test + - make -C python test_python38 - if: (NOT type IN (pull_request)) AND (branch = master) stage: "Deploy" diff --git a/Makefile b/Makefile index 69415e6c6..45cbabecc 100644 --- a/Makefile +++ b/Makefile @@ -23,9 +23,13 @@ test_xgboost_runtime: test_xgboost_spark: $(SBT) "+ mleap-xgboost-spark/test" -.PHONY: test_python -test_python: - source scripts/scala_classpath_for_python.sh && make -C python test +.PHONY: test_python37 +test_python37: + source scripts/scala_classpath_for_python.sh && make -C python py37_test + +.PHONY: test_python38 +test_python38: + source scripts/scala_classpath_for_python.sh && make -C python py38_test .PHONY: test test: test_executor test_benchmark test_xgboost_runtime test_xgboost_spark test_root_sbt_project test_python From 7c0ab052387578a50df3bb2b3af1a65496337a36 Mon Sep 17 00:00:00 2001 From: Jason Sleight Date: Tue, 20 Dec 2022 16:40:29 -0500 Subject: [PATCH 5/5] More travis attempts. --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0cfe0f49d..4fc7007c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,7 +52,7 @@ jobs: --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar script: - - make -C python test_python37 + - make test_python37 - name: "Python 3.8 tests" language: python @@ -65,7 +65,7 @@ jobs: --create-dirs -L -o /home/travis/.sbt/launchers/1.4.9/sbt-launch.jar https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/1.4.9/sbt-launch-1.4.9.jar script: - - make -C python test_python38 + - make test_python38 - if: (NOT type IN (pull_request)) AND (branch = master) stage: "Deploy"