Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create new dev2 pre-releases for both tansforms and library with latest from docling 2.0 #765

Merged
merged 13 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ ifeq ($(USE_REPO_LIB_SRC), 1)
@# Help: Update pyproject.toml to depend on lib versions defined in .make.versions
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/"data-prep-toolkit\[ray\]\([=><~][=]\).*"/"data-prep-toolkit[ray]\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-ray\([=><~][=]\).*"/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-spark\([=><~][=]\).*"/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-kfp\([=><~][=]\).*"/"data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)"/' \
Expand All @@ -683,6 +684,7 @@ ifeq ($(USE_REPO_LIB_SRC), 1)
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit[ray]\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \
-e 's/data-prep-toolkit-spark\([=><~][=]\).*/data-prep-toolkit-spark\1$(DPK_LIB_VERSION)/' \
Expand Down
2 changes: 1 addition & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2
DPK_MICRO_VERSION=2
# The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi.
DPK_VERSION_SUFFIX=.dev1
DPK_VERSION_SUFFIX=.dev2

DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)

Expand Down
2 changes: 1 addition & 1 deletion data-processing-lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Ray and Python"
Expand Down
10 changes: 8 additions & 2 deletions data-processing-lib/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,14 @@ setup::

set-versions: .check-env
$(MAKE) TOML_VERSION=$(DPK_LIB_VERSION) .defaults.update-toml
cat pyproject.toml | sed -e 's/"ray[default]==.*",/"ray[default]==$(RAY)",/' > tt.toml
mv tt.toml pyproject.toml
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed -e 's/"ray[default]==.*",/"ray[default]==$(RAY)",/' > tt.toml; \
mv tt.toml pyproject.toml; \
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed -e 's/ray[default]==.*/ray[default]==$(RAY)/' > tt.txt; \
mv tt.txt requirements.txt; \
fi


build:: build-dist
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/spark/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_spark"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Spark"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.2.dev1",
"data-prep-toolkit==0.2.2.dev2",
"pyspark>=3.5.2",
"psutil>=6.0.0",
"PyYAML>=6.0.2"
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v1"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -13,7 +13,7 @@ authors = [
]
dependencies = [
"kfp==1.8.22",
"data-prep-toolkit-kfp-shared==0.2.2.dev1",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v2"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"kfp==2.8.0",
"kfp-kubernetes==1.2.0",
"data-prep-toolkit-kfp-shared==0.2.2.dev1",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_shared"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"requests",
"kubernetes",
"data-prep-toolkit[ray]==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
8 changes: 2 additions & 6 deletions transforms/.make.transforms
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,11 @@ minio-stop:
$(call check_defined, TRANSFORM_PYTHON_VERSION)
$(MAKE) .defaults.update-toml
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/\("dpk[_-].*transform[_-]python[=<>~][=]\).*"/\1$(TRANSFORM_PYTHON_VERSION)"/' \
> tt.toml; \
cat pyproject.toml | sed -e 's/\("dpk[_-].*transform[_-]python[=<>~][=]\).*"/\1$(TRANSFORM_PYTHON_VERSION)"/'> tt.toml; \
mv tt.toml pyproject.toml; \
fi
if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/\(dpk[_-].*transform[_-]python[=<>~][=]\).*/\1$(TRANSFORM_PYTHON_VERSION)/' \
> tt.txt; \
cat requirements.txt | sed -e 's/\(dpk[_-].*transform[_-]python[=<>~][=]\).*/\1$(TRANSFORM_PYTHON_VERSION)/' > tt.txt; \
mv tt.txt requirements.txt; \
fi

Expand Down
28 changes: 14 additions & 14 deletions transforms/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,19 @@ set-versions::

set-pkg-version:
@# Help: Set tag for this package and its dependencies
cat pyproject.toml | sed -e \
's/^version[ ]*=.*/version = "'${TRANSFORMS_PKG_VERSION}'"/' \
> tt
mv tt pyproject.toml
echo $(DPK_VERSION)
cat requirements.txt | sed -e \
's/data-prep-toolkit\([=><~][=]\).*/data-prep-toolkit\1$(DPK_VERSION)/' \
> tt
mv tt requirements.txt
cat requirements-ray.txt | sed -e \
's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit\[ray\]\1$(DPK_VERSION)/' \
> tt
mv tt requirements-ray.txt
if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed -e 's/^version[ ]*=.*/version = "'${TRANSFORMS_PKG_VERSION}'"/' > tt; \
mv tt pyproject.toml; \
fi
#echo $(DPK_VERSION)
if [ -e requirements.txt ]; then \
cat requirements.txt | sed -e 's/data-prep-toolkit\([=><~][=]\).*/data-prep-toolkit\1$(DPK_VERSION)/' > tt; \
mv tt requirements.txt; \
fi
if [ -e requirements-ray.txt ]; then \
cat requirements-ray.txt | sed -e 's/data-prep-toolkit\[ray\]\([=><~][=]\).*/data-prep-toolkit\[ray\]\1$(DPK_VERSION)/' > tt; \
mv tt requirements-ray.txt; \
fi


build-pkg-dist:
Expand All @@ -122,7 +122,7 @@ test-pkg-dist:
-rm -fr venv
python -m venv venv
source venv/bin/activate && $(PYTHON) -m pip install '$(REPOROOT)/data-processing-lib/dist/data_prep_toolkit-$(DPK_VERSION)-py3-none-any.whl[dev,ray]'
source venv/bin/activate && $(PYTHON) -m pip install 'dist/data_prep_toolkit_transforms-$(DPK_TRANSFORMS_VERSION)-py3-none-any.whl[all]'
source venv/bin/activate && $(PYTHON) -m pip install 'dist/data_prep_toolkit_transforms-$(TRANSFORMS_PKG_VERSION)-py3-none-any.whl[all]'
for T in $(shell find . | grep '[ray| python]/test$$') ; do \
echo "running unit test on: $$T" ; \
source venv/bin/activate && $(PYTEST) $$T; \
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
parameterized
pandas
6 changes: 3 additions & 3 deletions transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code2parquet-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-code2parquet-transform-python==0.2.2.dev2",
"parameterized",
"pandas",
]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/code_profiler/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "dpk_code_profiler_transform_python"
version = "0.2.2.dev0"
requires-python = ">=3.10"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Python Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
parameterized
pandas
aiolimiter==1.1.0
Expand Down
8 changes: 4 additions & 4 deletions transforms/code/code_profiler/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
[project]
name = "dpk_code_profiler_transform_ray"
version = "0.2.2.dev0"
requires-python = ">=3.10"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Ray Transform"
license = {text = "Apache-2.0"}
readme = {file = "README.md", content-type = "text/markdown"}
authors = [
{ name = "Pankaj Thorat", email = "[email protected]" },
]
dependencies = [
"dpk-code-profiler-transform-python==0.2.2.dev0",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code-profiler-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
bs4==0.0.2
transformers==4.38.2
6 changes: 3 additions & 3 deletions transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Shivdeep Singh", email = "[email protected]" },
]
dependencies = [
"dpk-code-quality-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-code-quality-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License and Copyright Removal Transform for Python"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'

6 changes: 3 additions & 3 deletions transforms/code/header_cleanser/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License and copyright removal Transform for Ray"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Yash kalathiya", email = "[email protected]" },
]
dependencies = [
"dpk-header-cleanser-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-header-cleanser-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"scancode-toolkit==32.1.0",
]

Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License Select Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
6 changes: 3 additions & 3 deletions transforms/code/license_select/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "License Select Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Mark Lewis", email = "[email protected]" },
]
dependencies = [
"dpk-license-select-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-license-select-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/malware/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_malware_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Malware Python Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,7 +9,7 @@ authors = [
{ name = "Takuya Goto", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.2.dev1",
"data-prep-toolkit==0.2.2.dev2",
"clamd==1.0.2",
]

Expand Down
6 changes: 3 additions & 3 deletions transforms/code/malware/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_malware_transform_ray"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Malware Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Takuya Goto", email = "[email protected]" },
]
dependencies = [
"dpk-malware-transform-python==0.2.2.dev1",
"data-prep-toolkit[ray]==0.2.2.dev1",
"dpk-malware-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_proglang_select_transform_python"
version = "0.2.2.dev1"
version = "0.2.2.dev2"
requires-python = ">=3.10,<3.13"
description = "Programming Language Selection Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit==0.2.2.dev1
data-prep-toolkit==0.2.2.dev2
Loading