Project import generated by Copybara. (#121)

GitOrigin-RevId: 918d1e6be9d4d1489ede49cbd5a4ede9959f90ed Co-authored-by: Snowflake Authors <[email protected]>
snowflakedb · Oct 22, 2024 · f737798 · f737798
1 parent f54ab9f
commit f737798
Show file tree

Hide file tree

Showing 140 changed files with 2,411 additions and 1,007 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -1,7 +1,8 @@
 # Common Default
 
 # Wrapper to make sure tests are run.
-test --run_under='//bazel:test_wrapper'
+# Allow at most 3 hours for eternal tests.
+test --run_under='//bazel:test_wrapper' --test_timeout=-1,-1,-1,10800
 
 # Since integration tests are located in different packages than code under test,
 # the default instrumentation filter would exclude the code under test. This
@@ -22,7 +23,6 @@ build:_extended_gpu_oss --platforms //bazel/platforms:extended_conda_gpu_env --h
 
 # Python environment flag, should use in combination with other configs
 
-build:py3.8 --repo_env=BAZEL_CONDA_PYTHON_VERSION=3.8
 build:py3.9 --repo_env=BAZEL_CONDA_PYTHON_VERSION=3.9
 build:py3.10 --repo_env=BAZEL_CONDA_PYTHON_VERSION=3.10
 build:py3.11 --repo_env=BAZEL_CONDA_PYTHON_VERSION=3.11
@@ -35,15 +35,15 @@ run --config=_sf_only
 cquery --config=_sf_only
 
 # Config to sync files
-run:pre_build --config=_build --config=py3.8
+run:pre_build --config=_build --config=py3.9
 
 # Config to run type check
-build:typecheck --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended --config=py3.8
-build:typecheck_oss --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended_oss --config=py3.8
-build:typecheck_gpu_oss --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended_gpu_oss --config=py3.8
+build:typecheck --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended --config=py3.9
+build:typecheck_oss --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended_oss --config=py3.9
+build:typecheck_gpu_oss --aspects @rules_mypy//:mypy.bzl%mypy_aspect --output_groups=mypy --config=_extended_gpu_oss --config=py3.9
 
 # Config to build the doc
-build:docs --config=_sf_only --config=py3.8
+build:docs --config=_sf_only --config=py3.9
 
 # Public the extended setting
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,70 @@
 # Release History
 
-## 1.6.4
+## 1.7.0
+
+### Behavior Change
+
+- Generic: Require python >= 3.9.
+- Data Connector: Update `to_torch_dataset` and `to_torch_datapipe` to add a dimension for scalar data.
+This allows for more seamless integration with PyTorch `DataLoader`, which creates batches by stacking inputs of each batch.
+
+Examples:
+
+```python
+ds = connector.to_torch_dataset(shuffle=False, batch_size=3)
+```
+
+- Input: "col1": [10, 11, 12]
+  - Previous batch: array([10., 11., 12.]) with shape (3,)
+  - New batch: array([[10.], [11.], [12.]]) with shape (3, 1)
+
+- Input: "col2": [[0, 100], [1, 110], [2, 200]]
+  - Previous batch: array([[  0, 100], [  1, 110], [  2, 200]]) with shape (3,2)
+  - New batch: No change
+
+- Model Registry: External access integrations are optional when creating a model inference service in
+  Snowflake >= 8.40.0.
+- Model Registry: Deprecate `build_external_access_integration` with `build_external_access_integrations` in
+  `ModelVersion.create_service()`.
+
+### Bug Fixes
+
+- Registry: Updated `log_model` API to accept both signature and sample_input_data parameters.
+- Feature Store: ExampleHelper uses fully qualified path for table name. change weather features aggregation from 1d to 1h.
+- Data Connector: Return numpy array with appropriate object type instead of list for multi-dimensional
+data from `to_torch_dataset` and `to_torch_datapipe`
+- Model explainability: Incompatibility between SHAP 0.42.1 and XGB 2.1.1 resolved by using latest SHAP 0.46.0.
+
+### New Features
+
+- Registry: Provide pass keyworded variable length of arguments to class ModelContext. Example usage:
+
+```python
+mc = custom_model.ModelContext(
+    config = 'local_model_dir/config.json',
+    m1 = model1
+)
+
+class ExamplePipelineModel(custom_model.CustomModel):
+    def __init__(self, context: custom_model.ModelContext) -> None:
+      super().__init__(context)
+      v = open(self.context['config']).read()
+      self.bias = json.loads(v)['bias']
+
+    @custom_model.inference_api
+    def predict(self, input: pd.DataFrame) -> pd.DataFrame:
+      model_output = self.context['m1'].predict(input)
+      return pd.DataFrame({'output': model_output + self.bias})
+```
+
+- Model Development: Upgrade scikit-learn in UDTF backend for log_loss metric. As a result, `eps` argument is now ignored.
+- Data Connector: Add the option of passing a `None` sized batch to `to_torch_dataset` for better
+interoperability with PyTorch DataLoader.
+- Model Registry: Support [pandas.CategoricalDtype](https://pandas.pydata.org/docs/reference/api/pandas.CategoricalDtype.html#pandas-categoricaldtype)
+- Registry: It is now possible to pass `signatures` and `sample_input_data` at the same time to capture background
+data from explainablity and data lineage.
+
+## 1.6.4 (2024-10-17)
 
 ### Bug Fixes
 
@@ -18,6 +82,9 @@
 - Registry: Fix a bug that `ModelVersion.run` is called in a nested way.
 - Registry: Fix an issue that leads to `log_model` failure when local package version contains parts other than
   base version.
+- Fix issue where `sample_weights` were not being applied to search estimators.
+- Model explainability: Fix bug which creates explain as a function instead of table function when enabling by default.
+- Model explainability: Update lightgbm binary classification to return non-json values, from customer feedback.
 
 ### New Features
 

diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ If you don't have a Snowflake account yet, you can [sign up for a 30-day free tr
 Follow the [installation instructions](https://docs.snowflake.com/en/developer-guide/snowpark-ml/index#installing-snowpark-ml)
 in the Snowflake documentation.
 
-Python versions 3.8 to 3.11 are supported. You can use [miniconda](https://docs.conda.io/en/latest/miniconda.html) or
+Python versions 3.9 to 3.11 are supported. You can use [miniconda](https://docs.conda.io/en/latest/miniconda.html) or
 [anaconda](https://www.anaconda.com/) to create a Conda environment (recommended),
 or [virtualenv](https://docs.python.org/3/tutorial/venv.html) to create a virtual environment.
 

diff --git a/bazel/environments/conda-env-build.yml b/bazel/environments/conda-env-build.yml
@@ -15,7 +15,7 @@ dependencies:
   - numpy==1.23.5
   - packaging==23.0
   - ruamel.yaml==0.17.21
-  - scikit-learn==1.3.0
+  - scikit-learn==1.5.1
   - sphinx==5.0.2
   - toml==0.10.2
   - types-toml==0.10.8.6

diff --git a/bazel/environments/conda-env-snowflake.yml b/bazel/environments/conda-env-snowflake.yml
@@ -23,7 +23,7 @@ dependencies:
   - httpx==0.23.0
   - importlib_resources==6.1.1
   - inflection==0.5.1
-  - joblib==1.1.1
+  - joblib==1.4.2
   - jsonschema==3.2.0
   - lightgbm==3.3.5
   - mlflow==2.3.1
@@ -46,11 +46,11 @@ dependencies:
   - retrying==1.3.3
   - ruamel.yaml==0.17.21
   - s3fs==2023.3.0
-  - scikit-learn==1.3.0
+  - scikit-learn==1.5.1
   - scipy==1.9.3
   - sentence-transformers==2.2.2
   - sentencepiece==0.1.99
-  - shap==0.42.1
+  - shap==0.46.0
   - snowflake-connector-python==3.10.0
   - snowflake-snowpark-python==1.17.0
   - sphinx==5.0.2

diff --git a/bazel/environments/conda-env.yml b/bazel/environments/conda-env.yml
@@ -23,7 +23,7 @@ dependencies:
   - httpx==0.23.0
   - importlib_resources==6.1.1
   - inflection==0.5.1
-  - joblib==1.1.1
+  - joblib==1.4.2
   - jsonschema==3.2.0
   - lightgbm==3.3.5
   - mlflow==2.3.1
@@ -46,11 +46,11 @@ dependencies:
   - retrying==1.3.3
   - ruamel.yaml==0.17.21
   - s3fs==2023.3.0
-  - scikit-learn==1.3.0
+  - scikit-learn==1.5.1
   - scipy==1.9.3
   - sentence-transformers==2.2.2
   - sentencepiece==0.1.99
-  - shap==0.42.1
+  - shap==0.46.0
   - snowflake-connector-python==3.10.0
   - snowflake-snowpark-python==1.17.0
   - sphinx==5.0.2

diff --git a/bazel/environments/conda-gpu-env.yml b/bazel/environments/conda-gpu-env.yml
@@ -23,7 +23,7 @@ dependencies:
   - httpx==0.23.0
   - importlib_resources==6.1.1
   - inflection==0.5.1
-  - joblib==1.1.1
+  - joblib==1.4.2
   - jsonschema==3.2.0
   - lightgbm==3.3.5
   - mlflow==2.3.1
@@ -48,11 +48,11 @@ dependencies:
   - retrying==1.3.3
   - ruamel.yaml==0.17.21
   - s3fs==2023.3.0
-  - scikit-learn==1.3.0
+  - scikit-learn==1.5.1
   - scipy==1.9.3
   - sentence-transformers==2.2.2
   - sentencepiece==0.1.99
-  - shap==0.42.1
+  - shap==0.46.0
   - snowflake-connector-python==3.10.0
   - snowflake-snowpark-python==1.17.0
   - sphinx==5.0.2

diff --git a/bazel/environments/fetch_conda_env_config.bzl b/bazel/environments/fetch_conda_env_config.bzl
@@ -1,7 +1,7 @@
 def _fetch_conda_env_config_impl(rctx):
     # read the particular environment variable we are interested in
     env_name = rctx.os.environ.get("BAZEL_CONDA_ENV_NAME", "extended").lower()
-    python_ver = rctx.os.environ.get("BAZEL_CONDA_PYTHON_VERSION", "3.8").lower()
+    python_ver = rctx.os.environ.get("BAZEL_CONDA_PYTHON_VERSION", "3.9").lower()
 
     # necessary to create empty BUILD file for this rule
     # which will be located somewhere in the Bazel build files

diff --git a/bazel/requirements/templates/meta.tpl.yaml b/bazel/requirements/templates/meta.tpl.yaml
@@ -13,7 +13,17 @@ requirements:
     - python
     - bazel==6.3.2
   run:
-    - python>=3.8,<3.12
+    - python>=3.9,<3.12
+
+test:
+  imports:
+    - snowflake.cortex
+    - snowflake.ml
+    - snowflake.ml.modeling
+  commands:
+    - pip check
+  requires:
+    - pip
 
 about:
   home: https://github.com/snowflakedb/snowflake-ml-python

diff --git a/bazel/requirements/templates/pyproject.toml b/bazel/requirements/templates/pyproject.toml
@@ -19,7 +19,6 @@ classifiers = [
   "Intended Audience :: System Administrators",
   "License :: OSI Approved :: Apache Software License",
   "Operating System :: OS Independent",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
@@ -30,7 +29,7 @@ classifiers = [
   "Topic :: Software Development :: Libraries :: Python Modules",
   "Topic :: Scientific/Engineering :: Information Analysis"
 ]
-requires-python = ">=3.8, <3.12"
+requires-python = ">=3.9, <3.12"
 dynamic = ["version", "readme"]
 
 [project.urls]

diff --git a/ci/RunBazelAction.sh b/ci/RunBazelAction.sh
@@ -27,58 +27,65 @@ bazel="bazel"
 mode="continuous_run"
 target=""
 SF_ENV="prod3"
+WITH_SPCS_IMAGE=false
 PROG=$0
 
 action=$1 && shift
 
 help() {
     local exit_code=$1
-    echo "Usage: ${PROG} <test|coverage> [-b <bazel_path>] [-m merge_gate|continuous_run|quarantined|local_unittest|local_all] [-e <snowflake_env>]"
+    echo "Usage: ${PROG} <test|coverage> [-b <bazel_path>] [-m merge_gate|continuous_run|quarantined|local_unittest|local_all] [-e <snowflake_env>] [--with-spcs-image]"
     exit "${exit_code}"
 }
 
 if [[ "${action}" != "test" && "${action}" != "coverage" ]]; then
     help 1
 fi
 
-while getopts "b:m:t:c:e:h" opt; do
-    case "${opt}" in
-    m)
-        if [[ "${OPTARG}" = "merge_gate" || "${OPTARG}" = "continuous_run" || "${OPTARG}" = "quarantined" || "${OPTARG}" = "release" || "${OPTARG}" = "local_unittest" || "${OPTARG}" = "local_all" ]]; then
-            mode="${OPTARG}"
+while (($#)); do
+    case $1 in
+    -m | --mode)
+        shift
+        if [[ $1 = "merge_gate" || $1 = "continuous_run" || $1 = "quarantined" || $1 = "release" || $1 = "local_unittest" || $1 = "local_all" ]]; then
+            mode=$1
             if [[ $mode = "release" ]]; then
                 mode="continuous_run"
             fi
         else
             help 1
         fi
         ;;
-    b)
-        bazel="${OPTARG}"
+    -b | --bazel_path)
+        shift
+        bazel=$1
         ;;
-    t)
+    -t | --target)
+        shift
         if [[ "${mode}" = "local_unittest" || "${mode}" = "local_all" ]]; then
-            target="${OPTARG}"
+            target=$1
         else
             help 1
         fi
         ;;
-    c)
-        coverage_report_file="${OPTARG}"
+    -c | --coverage_report)
+        shift
+        coverage_report_file=$1
         ;;
-    e)
-        SF_ENV="${OPTARG}"
+    -e | --snowflake_env)
+        shift
+        SF_ENV=$1
         ;;
-    h)
-        help 0
+    --with-spcs-image)
+        WITH_SPCS_IMAGE=true
         ;;
-    :)
-        help 1
+    -h | --help)
+        help 0
         ;;
-    ?)
+    *)
         help 1
         ;;
     esac
+    shift
 done
 
 if [[ ("${mode}" = "local_unittest" || "${mode}" = "local_all") ]]; then
@@ -89,6 +96,13 @@ else
     "${bazel}" clean
 fi
 
+action_env=()
+
+if [[ "${WITH_SPCS_IMAGE}" = true ]]; then
+    source model_container_services_deployment/ci/build_and_push_images.sh
+    action_env=("--action_env=BUILDER_IMAGE_PATH=${BUILDER_IMAGE_PATH}" "--action_env=BASE_CPU_IMAGE_PATH=${BASE_CPU_IMAGE_PATH}" "--action_env=BASE_GPU_IMAGE_PATH=${BASE_GPU_IMAGE_PATH}")
+fi
+
 working_dir=$(mktemp -d "/tmp/tmp_XXXXX")
 trap 'rm -rf "${working_dir}"' EXIT
 
@@ -150,6 +164,7 @@ if [[ "${action}" = "test" ]]; then
         "${cache_test_results}" \
         --test_output=errors \
         --flaky_test_attempts=2 \
+        ${action_env[@]+"${action_env[@]}"} \
         "${tag_filter}" \
         --target_pattern_file "${sf_only_test_targets_file}"
     sf_only_bazel_exit_code=$?
@@ -160,13 +175,15 @@ if [[ "${action}" = "test" ]]; then
         "${cache_test_results}" \
         --test_output=errors \
         --flaky_test_attempts=2 \
+        ${action_env[@]+"${action_env[@]}"} \
         "${tag_filter}" \
         --target_pattern_file "${extended_test_targets_file}"
     extended_bazel_exit_code=$?
 elif [[ "${action}" = "coverage" ]]; then
     "${bazel}" coverage \
         "${cache_test_results}" \
         --combined_report=lcov \
+        ${action_env[@]+"${action_env[@]}"} \
         "${tag_filter}" \
         --experimental_collect_code_coverage_for_generated_files \
         --target_pattern_file "${sf_only_test_targets_file}"
@@ -180,6 +197,7 @@ elif [[ "${action}" = "coverage" ]]; then
         --config=extended \
         "${cache_test_results}" \
         --combined_report=lcov \
+        ${action_env[@]+"${action_env[@]}"} \
         "${tag_filter}" \
         --experimental_collect_code_coverage_for_generated_files \
         --target_pattern_file "${extended_test_targets_file}"