From d82ac0dc172d21ac58621bab632622333381e5ab Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Tue, 24 Sep 2024 19:27:20 +0300 Subject: [PATCH 1/6] support python 3.12 Signed-off-by: Alexey Roytman --- .make.defaults | 2 +- .make.versions | 2 +- kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml | 2 +- kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml | 2 +- kfp/kfp_support_lib/shared_workflow_support/pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.make.defaults b/.make.defaults index 8d7f454da..f9156db1d 100644 --- a/.make.defaults +++ b/.make.defaults @@ -51,7 +51,7 @@ DOCKER_LOCAL_IMAGE=$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) DOCKER_SPARK_BASE_IMAGE_NAME=data-prep-kit-spark-3.5.2 DOCKER_SPARK_BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) -RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py310 +RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py312 # Deprecated in favor of DOCKER_REMOTE_IMAGE DOCKER_IMAGE?=$(DOCKER_REMOTE_IMAGE) KIND_CLUSTER_NAME=dataprep diff --git a/.make.versions b/.make.versions index 54e6d8ca1..361d7034c 100644 --- a/.make.versions +++ b/.make.versions @@ -112,7 +112,7 @@ KFP_v2=2.2.0 KFP_v2_SDK=2.8.0 KFP_v1=1.8.5 KFP_v1_SDK=1.8.22 -RAY=2.24.0 +RAY=2.36.1 ifeq ($(KFPv2), 1) WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml index eaea5fb0d..4a6b2505b 100644 --- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_v1" version = "0.2.1.dev3" -requires-python = ">=3.10,<3.12" +requires-python = ">=3.10,<=3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml index c5ca32f1a..7568c99db 100644 --- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_v2" version = "0.2.1.dev3" -requires-python = ">=3.10,<3.12" +requires-python = ">=3.10,<=3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml index b4f509433..97cf08779 100644 --- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_shared" version = "0.2.1.dev3" -requires-python = ">=3.10,<3.12" +requires-python = ">=3.10,<=3.12" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} From 690c23b5999e378158c5afe0f67958988294161b Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Tue, 24 Sep 2024 19:41:19 +0300 Subject: [PATCH 2/6] update required ray version Signed-off-by: Alexey Roytman --- data-processing-lib/ray/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-processing-lib/ray/pyproject.toml b/data-processing-lib/ray/pyproject.toml index 3f347cdf4..5b60eaf71 100644 --- a/data-processing-lib/ray/pyproject.toml +++ b/data-processing-lib/ray/pyproject.toml @@ -12,7 +12,7 @@ authors = [ ] dependencies = [ "data-prep-toolkit>=0.2.1.dev3", - "ray[default]==2.24.0", + "ray[default]==2.36.1", # These two are to fix security issues identified by quay.io "fastapi>=0.110.2", "pillow>=10.3.0", From a8139c86d263d85cb154eda96ee524ff1cf0faf6 Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Tue, 24 Sep 2024 21:45:07 +0300 Subject: [PATCH 3/6] remove python <=3.12 restriction Signed-off-by: Alexey Roytman --- kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml | 2 +- kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml | 2 +- kfp/kfp_support_lib/shared_workflow_support/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml index 4a6b2505b..15fcf70c4 100644 --- a/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_v1" version = "0.2.1.dev3" -requires-python = ">=3.10,<=3.12" +requires-python = ">=3.10" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml index 7568c99db..107f2c9d6 100644 --- a/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_v2" version = "0.2.1.dev3" -requires-python = ">=3.10,<=3.12" +requires-python = ">=3.10" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml index 97cf08779..1f7bc54ca 100644 --- a/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml +++ b/kfp/kfp_support_lib/shared_workflow_support/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_kfp_shared" version = "0.2.1.dev3" -requires-python = ">=3.10,<=3.12" +requires-python = ">=3.10" description = "Data Preparation Kit Library. KFP support" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} From 683197ae4488390cad61581cd6e87b396f300203 Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Wed, 25 Sep 2024 23:40:39 +0300 Subject: [PATCH 4/6] set python versions --- .make.defaults | 19 +++++++++++++++++-- .make.versions | 2 ++ data-processing-lib/python/pyproject.toml | 2 +- data-processing-lib/ray/Makefile | 3 +++ data-processing-lib/ray/pyproject.toml | 2 +- data-processing-lib/spark/pyproject.toml | 2 +- kfp/kfp_ray_components/Dockerfile | 2 +- .../code/code2parquet/python/pyproject.toml | 2 +- .../code/code2parquet/ray/pyproject.toml | 2 +- .../code/code_quality/python/pyproject.toml | 2 +- .../code/code_quality/ray/pyproject.toml | 2 +- .../header_cleanser/python/pyproject.toml | 2 +- .../code/header_cleanser/ray/pyproject.toml | 2 +- transforms/code/malware/python/pyproject.toml | 2 +- transforms/code/malware/ray/pyproject.toml | 2 +- .../proglang_select/python/pyproject.toml | 2 +- .../code/proglang_select/ray/pyproject.toml | 2 +- .../repo_level_ordering/ray/pyproject.toml | 2 +- .../language/doc_chunk/python/pyproject.toml | 2 +- .../language/doc_chunk/ray/pyproject.toml | 2 +- .../doc_quality/python/pyproject.toml | 2 +- .../language/doc_quality/ray/pyproject.toml | 2 +- .../language/lang_id/python/pyproject.toml | 2 +- .../language/lang_id/ray/pyproject.toml | 2 +- .../pdf2parquet/python/pyproject.toml | 2 +- .../language/pdf2parquet/ray/pyproject.toml | 2 +- .../pii_redactor/python/pyproject.toml | 2 +- .../language/pii_redactor/ray/pyproject.toml | 2 +- .../text_encoder/python/pyproject.toml | 2 +- .../language/text_encoder/ray/pyproject.toml | 2 +- transforms/packaging/python/pyproject.toml | 2 +- transforms/packaging/ray/pyproject.toml | 2 +- .../universal/doc_id/python/pyproject.toml | 2 +- .../universal/doc_id/ray/pyproject.toml | 2 +- .../universal/doc_id/spark/pyproject.toml | 2 +- .../universal/ededup/python/pyproject.toml | 2 +- .../universal/ededup/ray/pyproject.toml | 2 +- .../universal/fdedup/ray/pyproject.toml | 2 +- .../universal/filter/python/pyproject.toml | 2 +- .../universal/filter/ray/pyproject.toml | 2 +- .../universal/filter/spark/pyproject.toml | 2 +- .../html2parquet/python/pyproject.toml | 2 +- .../universal/noop/python/pyproject.toml | 2 +- transforms/universal/noop/ray/pyproject.toml | 2 +- .../universal/noop/spark/pyproject.toml | 2 +- .../universal/profiler/ray/pyproject.toml | 2 +- .../universal/resize/python/pyproject.toml | 2 +- .../universal/resize/ray/pyproject.toml | 2 +- .../tokenization/python/pyproject.toml | 2 +- .../universal/tokenization/ray/pyproject.toml | 2 +- 50 files changed, 69 insertions(+), 49 deletions(-) diff --git a/.make.defaults b/.make.defaults index f9156db1d..c6c0d9d6a 100644 --- a/.make.defaults +++ b/.make.defaults @@ -170,8 +170,8 @@ __check_defined = \ @version=$$(echo $(PYTHON_VERSION) | sed -e 's/Python[ ]*//'); \ major=$$(echo $$version | awk -F. '{print $$1}'); \ minor=$$(echo $$version | awk -F. '{print $$2}'); \ - if [ $$major -lt 3 -o $$minor -lt 10 -o $$minor -gt 11 ]; then \ - echo Python 3.10 or 3.11 is required, but got $$version; \ + if [ $$major -lt 3 -o $$minor -lt 10 -o $$minor -gt 12 ]; then \ + echo Python 3.10 - 3.12 is required, but got $$version; \ echo Try overriding PYTHON=$(PYTHON). For example, "'"make PYTHON=python3.10" ...'"; \ exit 1; \ fi @@ -553,12 +553,15 @@ MINIO_ADMIN_PWD= localminiosecretkey # Changes the version field of the pyproject.toml file to the given version # and update the referenced library versions as defined in .make.versions. # Expects TOML_VERSION + + .PHONY: .defaults.update-toml .defaults.update-toml: $(call check_defined, TOML_VERSION) if [ -e pyproject.toml ]; then \ $(MAKE) TOML_VERSION=$(TOML_VERSION) .defaults.__set-toml-version; \ $(MAKE) .defaults.__update-toml-lib-dep-versions; \ + $(MAKE) .defaults.__update-toml-python-versions; \ fi # Changes the version field of the pyproject.toml file to the given version @@ -573,6 +576,18 @@ MINIO_ADMIN_PWD= localminiosecretkey mv tt.toml pyproject.toml; \ fi +# Updates the Python supported versions field of the pyproject.toml file to the given versions +# Expects REQUIRED_PYTHON_VERSIONS +.PHONY: .defaults.__update-toml-python-versions +.defaults.__update-toml-python-versions: + @# Help: Set the version= field of pyproject.toml + if [ -e pyproject.toml ]; then \ + cat pyproject.toml | sed -e \ + 's/^requires-python[ ]*=.*"/requires-python = "'${REQUIRED_PYTHON_VERSIONS}'"/' \ + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi + # Updates the versions references to our repo source as defined in .make.versions .PHONY: .defaults.__update-toml-lib-dep-versions .defaults.__update-toml-lib-dep-versions: diff --git a/.make.versions b/.make.versions index 8bdd36cbd..e77220915 100644 --- a/.make.versions +++ b/.make.versions @@ -114,6 +114,8 @@ KFP_v1=1.8.5 KFP_v1_SDK=1.8.22 RAY=2.36.1 +REQUIRED_PYTHON_VERSIONS=">=3.10,<3.13" + ifeq ($(KFPv2), 1) WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support else diff --git a/data-processing-lib/python/pyproject.toml b/data-processing-lib/python/pyproject.toml index fd1f391c5..b29c889e3 100644 --- a/data-processing-lib/python/pyproject.toml +++ b/data-processing-lib/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Library" license = {text = "Apache-2.0"} diff --git a/data-processing-lib/ray/Makefile b/data-processing-lib/ray/Makefile index a16fdf399..051c37838 100644 --- a/data-processing-lib/ray/Makefile +++ b/data-processing-lib/ray/Makefile @@ -17,6 +17,9 @@ setup:: set-versions: .check-env $(MAKE) TOML_VERSION=$(DPK_LIB_VERSION) .defaults.update-toml + cat pyproject.toml | sed -e 's/"ray[default]==.*",/"ray[default]==$(RAY)",/' > tt.toml + mv tt.toml pyproject.toml + build:: build-dist diff --git a/data-processing-lib/ray/pyproject.toml b/data-processing-lib/ray/pyproject.toml index 5a9fa3340..a7f476560 100644 --- a/data-processing-lib/ray/pyproject.toml +++ b/data-processing-lib/ray/pyproject.toml @@ -2,7 +2,7 @@ name = "data_prep_toolkit_ray" version = "0.2.2.dev0" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Data Preparation Toolkit Library for Ray" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/data-processing-lib/spark/pyproject.toml b/data-processing-lib/spark/pyproject.toml index affd271bf..e8d0c8285 100644 --- a/data-processing-lib/spark/pyproject.toml +++ b/data-processing-lib/spark/pyproject.toml @@ -2,7 +2,7 @@ name = "data_prep_toolkit_spark" version = "0.2.2.dev0" keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Data Preparation Toolkit Library for Spark" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/kfp/kfp_ray_components/Dockerfile b/kfp/kfp_ray_components/Dockerfile index 59db5286e..225ba6b09 100644 --- a/kfp/kfp_ray_components/Dockerfile +++ b/kfp/kfp_ray_components/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=docker.io/rayproject/ray:2.24.0-py310 +ARG BASE_IMAGE=docker.io/rayproject/ray:2.36.1-py312 FROM ${BASE_IMAGE} diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index de79b24a3..34a668bf0 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_code2parquet_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "code2parquet Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index c46bf25ef..3f8808037 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_code2parquet_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index ce8bb817b..58e2affa7 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_code_quality_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 84b0f5cd6..78ded1ce0 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_code_quality_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/header_cleanser/python/pyproject.toml b/transforms/code/header_cleanser/python/pyproject.toml index d70fe54d8..c4326b4a0 100644 --- a/transforms/code/header_cleanser/python/pyproject.toml +++ b/transforms/code/header_cleanser/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_header_cleanser_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "License and Copyright Removal Transform for Python" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index e82159bad..7509027a1 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_header_cleanser_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "License and copyright removal Transform for Ray" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 4a7faa3db..256a10b79 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_malware_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Malware Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index 3671401a1..cf454e856 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_malware_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Malware Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 9963aa5bb..25aa5fdcf 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_proglang_select_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 1bca747a8..1730ab04f 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_proglang_select_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index 08c20467d..e87d133e0 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_repo_level_order_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "repo_level_order Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/doc_chunk/python/pyproject.toml b/transforms/language/doc_chunk/python/pyproject.toml index eb649fd9b..7705779b0 100644 --- a/transforms/language/doc_chunk/python/pyproject.toml +++ b/transforms/language/doc_chunk/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_chunk_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "chunk documents Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/doc_chunk/ray/pyproject.toml b/transforms/language/doc_chunk/ray/pyproject.toml index 762a875f2..6bab175b8 100644 --- a/transforms/language/doc_chunk/ray/pyproject.toml +++ b/transforms/language/doc_chunk/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_chunk_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "chunk documents Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/doc_quality/python/pyproject.toml b/transforms/language/doc_quality/python/pyproject.toml index f9f7f13c5..8ebec8fe3 100644 --- a/transforms/language/doc_quality/python/pyproject.toml +++ b/transforms/language/doc_quality/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_quality_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Document Quality Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index 7c6e98245..0588c1997 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_quality_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Document Quality Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index c26527890..54c874a36 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_lang_id_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index 8ec6501d2..ac4558675 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_lang_id_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/pdf2parquet/python/pyproject.toml b/transforms/language/pdf2parquet/python/pyproject.toml index 96b318466..926a062ae 100644 --- a/transforms/language/pdf2parquet/python/pyproject.toml +++ b/transforms/language/pdf2parquet/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_pdf2parquet_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "PDF2PARQUET Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/pdf2parquet/ray/pyproject.toml b/transforms/language/pdf2parquet/ray/pyproject.toml index 131e96445..0d11afcdd 100644 --- a/transforms/language/pdf2parquet/ray/pyproject.toml +++ b/transforms/language/pdf2parquet/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_pdf2parquet_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "PDF2PARQUET Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/pii_redactor/python/pyproject.toml b/transforms/language/pii_redactor/python/pyproject.toml index 67c06d532..55d4e8970 100644 --- a/transforms/language/pii_redactor/python/pyproject.toml +++ b/transforms/language/pii_redactor/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_pii_redactor_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "PII redactor Transform for Python" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index 41493c3b5..4283df428 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_pii_redactor_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "PII Redactor Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/text_encoder/python/pyproject.toml b/transforms/language/text_encoder/python/pyproject.toml index 18d7bbdc9..e9f84fefd 100644 --- a/transforms/language/text_encoder/python/pyproject.toml +++ b/transforms/language/text_encoder/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_text_encoder_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Text Encoder Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index 65d55e4d7..2735856aa 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_text_encoder_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Text Encoder Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/packaging/python/pyproject.toml b/transforms/packaging/python/pyproject.toml index 37e4c93da..8d760515a 100644 --- a/transforms/packaging/python/pyproject.toml +++ b/transforms/packaging/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_transforms" version = "0.2.2.dev0" -requires-python = ">=3.10,<3.12" +requires-python = ">=3.10,<3.13" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms" license = {text = "Apache-2.0"} diff --git a/transforms/packaging/ray/pyproject.toml b/transforms/packaging/ray/pyproject.toml index 159a97dc7..2f02d4c51 100644 --- a/transforms/packaging/ray/pyproject.toml +++ b/transforms/packaging/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "data_prep_toolkit_transforms_ray" version = "0.2.2.dev0" -requires-python = ">=3.10,<3.12" +requires-python = ">=3.10,<3.13" keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] description = "Data Preparation Toolkit Transforms using Ray" license = {text = "Apache-2.0"} diff --git a/transforms/universal/doc_id/python/pyproject.toml b/transforms/universal/doc_id/python/pyproject.toml index ff8b710e4..46d3f79f8 100644 --- a/transforms/universal/doc_id/python/pyproject.toml +++ b/transforms/universal/doc_id/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_id_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "ededup Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml index 8fb2e2308..836454098 100644 --- a/transforms/universal/doc_id/ray/pyproject.toml +++ b/transforms/universal/doc_id/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_id_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "docid Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/doc_id/spark/pyproject.toml b/transforms/universal/doc_id/spark/pyproject.toml index 0f8c681b0..485174834 100644 --- a/transforms/universal/doc_id/spark/pyproject.toml +++ b/transforms/universal/doc_id/spark/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_doc_id_transform_spark" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Doc ID Spark Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/ededup/python/pyproject.toml b/transforms/universal/ededup/python/pyproject.toml index 846f0a4e3..59d0d72ee 100644 --- a/transforms/universal/ededup/python/pyproject.toml +++ b/transforms/universal/ededup/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_ededup_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "ededup Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml index f1cfd13f5..886832947 100644 --- a/transforms/universal/ededup/ray/pyproject.toml +++ b/transforms/universal/ededup/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_ededup_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "ededup Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml index d591bd4ef..3f2c8ba51 100644 --- a/transforms/universal/fdedup/ray/pyproject.toml +++ b/transforms/universal/fdedup/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_fdedup_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "fdedup Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index db662d433..b9d781573 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_filter_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Filter Transform for Python" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index f4797715e..5c63a90ff 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_filter_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/filter/spark/pyproject.toml b/transforms/universal/filter/spark/pyproject.toml index 8192dfd67..a8a0174b6 100644 --- a/transforms/universal/filter/spark/pyproject.toml +++ b/transforms/universal/filter/spark/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_filter_transform_spark" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Filter Spark Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/html2parquet/python/pyproject.toml b/transforms/universal/html2parquet/python/pyproject.toml index 9a6de3842..799bd7457 100644 --- a/transforms/universal/html2parquet/python/pyproject.toml +++ b/transforms/universal/html2parquet/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_html2parquet_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "HTML2PARQUET Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index 4326fce58..9b1675a69 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_noop_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "NOOP Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 9e8c08b77..c4120753f 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_noop_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index b61865be8..633ee66bd 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_noop_transform_spark" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "NOOP Spark Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml index 03ccabc7c..1ea2df95e 100644 --- a/transforms/universal/profiler/ray/pyproject.toml +++ b/transforms/universal/profiler/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_profiler_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "profiler Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/resize/python/pyproject.toml b/transforms/universal/resize/python/pyproject.toml index b6fa082ab..2396e5b23 100644 --- a/transforms/universal/resize/python/pyproject.toml +++ b/transforms/universal/resize/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_resize_transform_python" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "resize Python Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml index 347725124..249e40c7d 100644 --- a/transforms/universal/resize/ray/pyproject.toml +++ b/transforms/universal/resize/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_resize_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Resize Ray Transform" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index 2b050767d..f69787b3d 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -2,7 +2,7 @@ name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index 2cf84e08a..aa109bbc1 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_ray" version = "0.2.2.dev0" -requires-python = ">=3.10" +requires-python = ">=3.10,<3.13" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} readme = {file = "README.md", content-type = "text/markdown"} From 95115ac3a7850bc90f9c581c4c217feec6dedfa1 Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Fri, 27 Sep 2024 22:32:19 +0300 Subject: [PATCH 5/6] return default to Python 3.10 --- .make.defaults | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.make.defaults b/.make.defaults index 5df7bd933..f9f58500f 100644 --- a/.make.defaults +++ b/.make.defaults @@ -51,7 +51,7 @@ DOCKER_LOCAL_IMAGE=$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) DOCKER_SPARK_BASE_IMAGE_NAME=data-prep-kit-spark-$(SPARK_VERSION) DOCKER_SPARK_BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) -RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py312 +RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py310 # Deprecated in favor of DOCKER_REMOTE_IMAGE DOCKER_IMAGE?=$(DOCKER_REMOTE_IMAGE) KIND_CLUSTER_NAME=dataprep From 09644817358af437f34a485c16c0e9fbd814e845 Mon Sep 17 00:00:00 2001 From: Alexey Roytman Date: Fri, 4 Oct 2024 18:14:37 +0300 Subject: [PATCH 6/6] fix the repo_lvl_store_ray_cpus input parameter Signed-off-by: Alexey Roytman --- .../code/repo_level_ordering/kfp_ray/repo_level_order_wf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py index 42312ef3b..a36cc9c0c 100644 --- a/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py +++ b/transforms/code/repo_level_ordering/kfp_ray/repo_level_order_wf.py @@ -24,7 +24,7 @@ EXEC_SCRIPT_NAME: str = "repo_level_order_transform_ray.py" # components -base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest" +base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing_v2:latest" # path to kfp component specifications files component_spec_path = "../../../../kfp/kfp_ray_components/" @@ -138,7 +138,7 @@ def repo_level_order( repo_lvl_grouping_column: str = "repo_name", repo_lvl_store_type: str = "ray", repo_lvl_store_backend_dir: str = "", - repo_lvl_store_ray_cpus: float = "0.5", + repo_lvl_store_ray_cpus: float = 0.5, repo_lvl_store_ray_nworkers: int = 1, repo_lvl_sorting_enabled: bool = False, repo_lvl_sorting_algo: str = "SORT_BY_PATH",