Skip to content

Commit

Permalink
Merge pull request #765 from IBM/0.2.2.dev2-latest
Browse files Browse the repository at this point in the history
Create new dev2 pre-releases for both tansforms and library with latest from docling 2.0
  • Loading branch information
touma-I authored Nov 4, 2024
2 parents 23f907d + 49815ad commit a41c719
Show file tree
Hide file tree
Showing 89 changed files with 177 additions and 163 deletions.
2 changes: 2 additions & 0 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ ifeq ($(USE_REPO_LIB_SRC), 1)
@# Help: Update pyproject.toml to depend on lib versions defined in .make.versions
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/"data-prep-toolkit\[ray\]\([=><~][=]\).*"/"data-prep-toolkit[ray]\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-ray\([=><~][=]\).*"/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-spark\([=><~][=]\).*"/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-kfp\([=><~][=]\).*"/"data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)"/' \
Expand All @@ -683,6 +684,7 @@ ifeq ($(USE_REPO_LIB_SRC), 1)
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit[ray]\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \
-e 's/data-prep-toolkit-spark\([=><~][=]\).*/data-prep-toolkit-spark\1$(DPK_LIB_VERSION)/' \
Expand Down
2 changes: 1 addition & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2
DPK_MICRO_VERSION=2
# The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi.
DPK_VERSION_SUFFIX=.dev1
DPK_VERSION_SUFFIX=.dev2

DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)

Expand Down
2 changes: 1 addition & 1 deletion data-processing-lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Ray and Python"
Expand Down
10 changes: 8 additions & 2 deletions data-processing-lib/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,14 @@ setup::

set-versions: .check-env
$(MAKE) TOML_VERSION=$(DPK_LIB_VERSION) .defaults.update-toml
cat pyproject.toml | sed -e 's/"ray[default]==.*",/"ray[default]==$(RAY)",/' > tt.toml
mv tt.toml pyproject.toml
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed -e 's/"ray[default]==.*",/"ray[default]==$(RAY)",/' > tt.toml; \
mv tt.toml pyproject.toml; \
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed -e 's/ray[default]==.*/ray[default]==$(RAY)/' > tt.txt; \
mv tt.txt requirements.txt; \
fi


build:: build-dist
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/spark/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_spark"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Spark"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.2.dev1",
"data-prep-toolkit==0.2.2.dev2",
"pyspark>=3.5.2",
"psutil>=6.0.0",
"PyYAML>=6.0.2"
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v1"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -13,7 +13,7 @@ authors = [
]
dependencies = [
"kfp==1.8.22",
"data-prep-toolkit-kfp-shared==0.2.2.dev1",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v2"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"kfp==2.8.0",
"kfp-kubernetes==1.2.0",
"data-prep-toolkit-kfp-shared==0.2.2.dev1",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_shared"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"requests",
"kubernetes",
"data-prep-toolkit[ray]==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
8 changes: 2 additions & 6 deletions transforms/.make.transforms
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,11 @@ minio-stop:
$(call check_defined, TRANSFORM_PYTHON_VERSION)
$(MAKE) .defaults.update-toml
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/\("dpk[_-].*transform[_-]python[=<>~][=]\).*"/\1$(TRANSFORM_PYTHON_VERSION)"/' \
> tt.toml; \
cat pyproject.toml | sed -e 's/\("dpk[_-].*transform[_-]python[=<>~][=]\).*"/\1$(TRANSFORM_PYTHON_VERSION)"/'> tt.toml; \
mv tt.toml pyproject.toml; \
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/\(dpk[_-].*transform[_-]python[=<>~][=]\).*/\1$(TRANSFORM_PYTHON_VERSION)/' \
> tt.txt; \
cat requirements.txt | sed -e 's/\(dpk[_-].*transform[_-]python[=<>~][=]\).*/\1$(TRANSFORM_PYTHON_VERSION)/' > tt.txt; \
mv tt.txt requirements.txt; \
fi

Expand Down
28 changes: 14 additions & 14 deletions transforms/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,19 @@ set-versions::

set-pkg-version:
@# Help: Set tag for this package and its dependencies
cat pyproject.toml | sed -e \
's/^version[ ]*=.*/version = "'${TRANSFORMS_PKG_VERSION}'"/' \
> tt
mv tt pyproject.toml
echo $(DPK_VERSION)
cat requirements.txt | sed -e \
's/data-prep-toolkit\([=><~][=]\).*/data-prep-toolkit\1$(DPK_VERSION)/' \
> tt
mv tt requirements.txt
cat requirements-ray.txt | sed -e \
's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit\[ray\]\1$(DPK_VERSION)/' \
> tt
mv tt requirements-ray.txt
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed -e 's/^version[ ]*=.*/version = "'${TRANSFORMS_PKG_VERSION}'"/' > tt; \
mv tt pyproject.toml; \
fi
#echo $(DPK_VERSION)
if [ -e requirements.txt ]; then \
cat requirements.txt | sed -e 's/data-prep-toolkit\([=><~][=]\).*/data-prep-toolkit\1$(DPK_VERSION)/' > tt; \
mv tt requirements.txt; \
fi
if [ -e requirements-ray.txt ]; then \
cat requirements-ray.txt | sed -e 's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit\[ray\]\1$(DPK_VERSION)/' > tt; \
mv tt requirements-ray.txt; \
fi


build-pkg-dist:
Expand All @@ -122,7 +122,7 @@ test-pkg-dist:
-rm -fr venv
python -m venv venv
source venv/bin/activate && $(PYTHON) -m pip install '$(REPOROOT)/data-processing-lib/dist/data_prep_toolkit-$(DPK_VERSION)-py3-none-any.whl[dev,ray]'
source venv/bin/activate && $(PYTHON) -m pip install 'dist/data_prep_toolkit_transforms-$(DPK_TRANSFORMS_VERSION)-py3-none-any.whl[all]'
source venv/bin/activate && $(PYTHON) -m pip install 'dist/data_prep_toolkit_transforms-$(TRANSFORMS_PKG_VERSION)-py3-none-any.whl[all]'
for T in $(shell find . | grep '[ray| python]/test$$') ; do \
echo "running unit test on: $$T" ; \
source venv/bin/activate && $(PYTEST) $$T; \
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
parameterized
pandas
6 changes: 3 additions & 3 deletions transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code2parquet-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-code2parquet-transform-python==0.2.2.dev2",
"parameterized",
"pandas",
]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/code_profiler/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "dpk_code_profiler_transform_python"
version = "0.2.2.dev0"
requires-python = ">=3.10"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Python Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
parameterized
pandas
aiolimiter==1.1.0
Expand Down
8 changes: 4 additions & 4 deletions transforms/code/code_profiler/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
[project]
name = "dpk_code_profiler_transform_ray"
version = "0.2.2.dev0"
requires-python = ">=3.10"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Ray Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
{ name = "Pankaj Thorat", email = "[email protected]" },
]
dependencies = [
"dpk-code-profiler-transform-python==0.2.2.dev0",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code-profiler-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
bs4==0.0.2
transformers==4.38.2
6 changes: 3 additions & 3 deletions transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Shivdeep Singh", email = "[email protected]" },
]
dependencies = [
"dpk-code-quality-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code-quality-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License and Copyright Removal Transform for Python"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'

6 changes: 3 additions & 3 deletions transforms/code/header_cleanser/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License and copyright removal Transform for Ray"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Yash kalathiya", email = "[email protected]" },
]
dependencies = [
"dpk-header-cleanser-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-header-cleanser-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"scancode-toolkit==32.1.0",
]

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License Select Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
6 changes: 3 additions & 3 deletions transforms/code/license_select/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License Select Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Mark Lewis", email = "[email protected]" },
]
dependencies = [
"dpk-license-select-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-license-select-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/malware/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_malware_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Malware Python Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,7 +9,7 @@ authors = [
{ name = "Takuya Goto", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.2.dev1",
"data-prep-toolkit==0.2.2.dev2",
"clamd==1.0.2",
]

Expand Down
6 changes: 3 additions & 3 deletions transforms/code/malware/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_malware_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Malware Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Takuya Goto", email = "[email protected]" },
]
dependencies = [
"dpk-malware-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-malware-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_proglang_select_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Programming Language Selection Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
Loading

0 comments on commit a41c719

Please sign in to comment.