diff --git a/.make.defaults b/.make.defaults
index f9f58500f..e1bd5275a 100644
--- a/.make.defaults
+++ b/.make.defaults
@@ -235,6 +235,10 @@ __check_defined = \
cp -p -R ${LIB_PATH}/src ${LIB_NAME}
cp -p -R ${LIB_PATH}/pyproject.toml ${LIB_NAME}
cp -p -R ${LIB_PATH}/README.md ${LIB_NAME}
+ if [ -e ${LIB_PATH}/requirements.txt ]; then \
+ cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \
+ fi
+
# Build and image using the local Dockerfile and make the data-processing-lib/python
# available in the current directory for use by the Dockerfile (i.e. to install the library).
@@ -591,8 +595,9 @@ MINIO_ADMIN_PWD= localminiosecretkey
# Updates the versions references to our repo source as defined in .make.versions
.PHONY: .defaults.__update-toml-lib-dep-versions
.defaults.__update-toml-lib-dep-versions:
+ifeq ($(USE_REPO_LIB_SRC), 1)
@# Help: Update pyproject.toml to depend on lib versions defined in .make.versions
- @if [ -e pyproject.toml ]; then \
+ if [ -e pyproject.toml ]; then \
cat pyproject.toml | sed \
-e 's/"data-prep-toolkit-ray\([=><~][=]\).*"/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)"/' \
-e 's/"data-prep-toolkit-spark\([=><~][=]\).*"/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)"/' \
@@ -603,7 +608,7 @@ MINIO_ADMIN_PWD= localminiosecretkey
> tt.toml; \
mv tt.toml pyproject.toml; \
fi
- @if [ -e requirements.txt ]; then \
+ if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \
@@ -615,6 +620,7 @@ MINIO_ADMIN_PWD= localminiosecretkey
> tt.txt; \
mv tt.txt requirements.txt; \
fi
+endif
# Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target
.PHONY: .defaults.build-dist
diff --git a/.make.versions b/.make.versions
index dd599aa04..4346291cc 100644
--- a/.make.versions
+++ b/.make.versions
@@ -25,9 +25,9 @@ DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_
# publish docker images with latest tag
ifeq ($(DPK_VERSION_SUFFIX), )
- DOCKER_IMAGE_VERSION=$(DPK_VERSION)
+ DOCKER_IMAGE_VERSION?=$(DPK_VERSION)
else
- DOCKER_IMAGE_VERSION=latest
+ DOCKER_IMAGE_VERSION?=latest
endif
# Data prep lab wheel version
@@ -39,82 +39,6 @@ DPK_LIB_KFP_SHARED=$(DPK_VERSION)
KFP_DOCKER_VERSION=$(DOCKER_IMAGE_VERSION)
KFP_DOCKER_VERSION_v2=$(DOCKER_IMAGE_VERSION)
-# Begin transform versions/tags
-BLOCKLIST_VERSION=$(DPK_VERSION)
-
-DOC_ID_PYTHON_VERSION=$(DPK_VERSION)
-DOC_ID_RAY_VERSION=$(DPK_VERSION)
-DOC_ID_SPARK_VERSION=$(DPK_VERSION)
-
-EDEDUP_PYTHON_VERSION=$(DPK_VERSION)
-EDEDUP_RAY_VERSION=$(DPK_VERSION)
-
-FDEDUP_RAY_VERSION=$(DPK_VERSION)
-
-FILTER_PYTHON_VERSION=$(DPK_VERSION)
-FILTER_RAY_VERSION=$(DPK_VERSION)
-FILTER_SPARK_VERSION=$(DPK_VERSION)
-
-NOOP_PYTHON_VERSION=$(DPK_VERSION)
-NOOP_RAY_VERSION=$(DPK_VERSION)
-NOOP_SPARK_VERSION=$(DPK_VERSION)
-
-PROFILER_PYTHON_VERSION=$(DPK_VERSION)
-PROFILER_RAY_VERSION=$(DPK_VERSION)
-PROFILER_SPARK_VERSION=$(DPK_VERSION)
-
-RESIZE_PYTHON_VERSION=$(DPK_VERSION)
-RESIZE_RAY_VERSION=$(DPK_VERSION)
-RESIZE_SPARK_VERSION=$(DPK_VERSION)
-
-LANG_ID_PYTHON_VERSION=$(DPK_VERSION)
-LANG_ID_RAY_VERSION=$(DPK_VERSION)
-
-TOKENIZATION_RAY_VERSION=$(DPK_VERSION)
-TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION)
-
-MALWARE_RAY_VERSION=$(DPK_VERSION)
-MALWARE_PYTHON_VERSION=$(DPK_VERSION)
-
-PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION)
-PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION)
-
-DOC_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
-DOC_QUALITY_RAY_VERSION=$(DPK_VERSION)
-
-CODE_QUALITY_RAY_VERSION=$(DPK_VERSION)
-CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
-
-CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
-CODE2PARQUET_RAY_VERSION=$(DPK_VERSION)
-INGEST_TO_PARQUET_VERSION=$(DPK_VERSION)
-REPO_LVL_ORDER_RAY_VERSION=$(DPK_VERSION)
-
-PDF2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
-PDF2PARQUET_RAY_VERSION=$(DPK_VERSION)
-
-DOC_CHUNK_PYTHON_VERSION=$(DPK_VERSION)
-DOC_CHUNK_RAY_VERSION=$(DPK_VERSION)
-
-TEXT_ENCODER_PYTHON_VERSION=$(DPK_VERSION)
-TEXT_ENCODER_RAY_VERSION=$(DPK_VERSION)
-
-HEADER_CLEANSER_PYTHON_VERSION=$(DPK_VERSION)
-HEADER_CLEANSER_RAY_VERSION=$(DPK_VERSION)
-
-LICENSE_SELECT_PYTHON_VERSION=$(DPK_VERSION)
-LICENSE_SELECT_RAY_VERSION=$(DPK_VERSION)
-
-PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION)
-
-HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
-
-DPK_TRANSFORMS_VERSION=$(DPK_VERSION)
-
-SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION=$(DPK_VERSION)
-SYNTACTIC_CONCEPT_EXTRACTOR_RAY_VERSION=$(DPK_VERSION)
-
-
################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.
diff --git a/README.md b/README.md
index aeec4ef70..b4d372356 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,8 @@ The goal is to offer high-level APIs for developers to quickly get started in wo
- [Scaling transforms from laptop to cluster](#laptop_cluster)
- [Repository Use and Navigation](doc/repo.md)
- [How to Contribute](CONTRIBUTING.md)
-- [Papers and Talks](#talks_papers)
+- [Talks and Papers](#talks_papers)
+- [Citations](#citations)
## 📖 About
@@ -131,7 +132,7 @@ The matrix below shows the the combination of modules and supported runtimes. Al
| **Data Ingestion** | | | | |
| [Code (from zip) to Parquet](transforms/code/code2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
| [PDF to Parquet](transforms/language/pdf2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
-| [HTML to Parquet](transforms/universal/html2parquet/python/README.md) | :white_check_mark: | | | |
+| [HTML to Parquet](transforms/language/html2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | |
| **Universal (Code & Language)** | | | | |
| [Exact dedup filter](transforms/universal/ededup/ray/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
| [Fuzzy dedup filter](transforms/universal/fdedup/ray/README.md) | | :white_check_mark: | | :white_check_mark: |
@@ -220,3 +221,23 @@ You can run transforms via docker image or using virtual environments. This [doc
5. Talk on "Hands on session for fine tuning LLMs" [Video](https://www.youtube.com/watch?v=VEHIA3E64DM)
6. Talk on "Build your own data preparation module using data-prep-kit" [Video](https://www.youtube.com/watch?v=0WUMG6HIgMg)
+## Citations
+
+If you use Data Prep Kit in your research, please cite our paper:
+
+```bash
+@misc{wood2024dataprepkitgettingdataready,
+ title={Data-Prep-Kit: getting your data ready for LLM application development},
+ author={David Wood and Boris Lublinsky and Alexy Roytman and Shivdeep Singh
+ and Abdulhamid Adebayo and Revital Eres and Mohammad Nassar and Hima Patel
+ and Yousaf Shah and Constantin Adam and Petros Zerfos and Nirmit Desai
+ and Daiki Tsuzuku and Takuya Goto and Michele Dolfi and Saptha Surendran
+ and Paramesvaran Selvam and Sungeun An and Yuan Chi Chang and Dhiraj Joshi
+ and Hajar Emami-Gohari and Xuan-Hong Dang and Yan Koyfman and Shahrokh Daijavad},
+ year={2024},
+ eprint={2409.18164},
+ archivePrefix={arXiv},
+ primaryClass={cs.AI},
+ url={https://arxiv.org/abs/2409.18164},
+}
+```
\ No newline at end of file
diff --git a/transforms/code/code2parquet/kfp_ray/Makefile b/transforms/code/code2parquet/kfp_ray/Makefile
index 6b9e640d1..847a743b8 100644
--- a/transforms/code/code2parquet/kfp_ray/Makefile
+++ b/transforms/code/code2parquet/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/code/code2parquet/python/Makefile b/transforms/code/code2parquet/python/Makefile
index d0403e601..e27e402c7 100644
--- a/transforms/code/code2parquet/python/Makefile
+++ b/transforms/code/code2parquet/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=code2parquet
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/code/code2parquet/ray/Makefile b/transforms/code/code2parquet/ray/Makefile
index bc1580987..42383457f 100644
--- a/transforms/code/code2parquet/ray/Makefile
+++ b/transforms/code/code2parquet/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=code2parquet
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/code/code2parquet/transform.config b/transforms/code/code2parquet/transform.config
new file mode 100644
index 000000000..2049a2261
--- /dev/null
+++ b/transforms/code/code2parquet/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=code2parquet
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
+CODE2PARQUET_RAY_VERSION=$(CODE2PARQUET_PYTHON_VERSION)
+CODE2PARQUET_SPARK_VERSION=$(CODE2PARQUET_PYTHON_VERSION)
+
diff --git a/transforms/code/code_quality/kfp_ray/Makefile b/transforms/code/code_quality/kfp_ray/Makefile
index a22efcf8e..1cab0d878 100644
--- a/transforms/code/code_quality/kfp_ray/Makefile
+++ b/transforms/code/code_quality/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/code/code_quality/python/Makefile b/transforms/code/code_quality/python/Makefile
index 1b50d41b8..cd9811f79 100644
--- a/transforms/code/code_quality/python/Makefile
+++ b/transforms/code/code_quality/python/Makefile
@@ -1,10 +1,21 @@
-
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=code_quality
+# Include the common configuration for this transform
+include ../transform.config
# Use default rule inherited from makefile.common
clean:: .transforms.clean
diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile
index 720cf9c00..5a744e861 100644
--- a/transforms/code/code_quality/ray/Makefile
+++ b/transforms/code/code_quality/ray/Makefile
@@ -1,10 +1,21 @@
-
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=code_quality
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
diff --git a/transforms/code/code_quality/transform.config b/transforms/code/code_quality/transform.config
new file mode 100644
index 000000000..4ebec625a
--- /dev/null
+++ b/transforms/code/code_quality/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=code_quality
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
+CODE_QUALITY_RAY_VERSION=$(CODE_QUALITY_PYTHON_VERSION)
+CODE_QUALITY_SPARK_VERSION=$(CODE_QUALITY_PYTHON_VERSION)
+
diff --git a/transforms/code/header_cleanser/kfp_ray/Makefile b/transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd
similarity index 91%
rename from transforms/code/header_cleanser/kfp_ray/Makefile
rename to transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd
index 05a343384..411cc97f1 100644
--- a/transforms/code/header_cleanser/kfp_ray/Makefile
+++ b/transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/code/header_cleanser/python/Makefile b/transforms/code/header_cleanser/python/Makefile
index 1e3fa68fd..0a91a14d6 100644
--- a/transforms/code/header_cleanser/python/Makefile
+++ b/transforms/code/header_cleanser/python/Makefile
@@ -1,10 +1,21 @@
-
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=header_cleanser
+# Include the common configuration for this transform
+include ../transform.config
# Use default rule inherited from makefile.common
clean:: .transforms.clean
diff --git a/transforms/code/header_cleanser/ray/Dockerfile b/transforms/code/header_cleanser/ray/Dockerfile
index 6521c8662..16f8cf69c 100644
--- a/transforms/code/header_cleanser/ray/Dockerfile
+++ b/transforms/code/header_cleanser/ray/Dockerfile
@@ -17,9 +17,11 @@ COPY --chown=ray:users pyproject.toml pyproject.toml
RUN pip install --no-cache-dir -e .
# Install system dependencies, including libgomp1
+USER root
RUN sudo apt-get update && sudo apt-get install -y \
libgomp1 \
&& sudo rm -rf /var/lib/apt/lists/*
+User ray
# copy source data
COPY ./src/header_cleanser_transform_ray.py .
@@ -36,4 +38,4 @@ ENV PYTHONPATH /home/ray
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
-LABEL git-commit=$GIT_COMMIT
\ No newline at end of file
+LABEL git-commit=$GIT_COMMIT
diff --git a/transforms/code/header_cleanser/ray/Makefile b/transforms/code/header_cleanser/ray/Makefile
index d223bc1cb..9d83c71d0 100644
--- a/transforms/code/header_cleanser/ray/Makefile
+++ b/transforms/code/header_cleanser/ray/Makefile
@@ -1,10 +1,21 @@
-
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=header_cleanser
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
diff --git a/transforms/code/header_cleanser/transform.config b/transforms/code/header_cleanser/transform.config
new file mode 100644
index 000000000..e1da13d0c
--- /dev/null
+++ b/transforms/code/header_cleanser/transform.config
@@ -0,0 +1,18 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=header_cleanser
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+HEADER_CLEANSER_PYTHON_VERSION=$(DPK_VERSION)
+HEADER_CLEANSER_RAY_VERSION=$(HEADER_CLEANSER_PYTHON_VERSION)
diff --git a/transforms/code/license_select/kfp_ray/Makefile.disable-cicd b/transforms/code/license_select/kfp_ray/Makefile.disable-cicd
index 9f21f3d58..28e244faa 100644
--- a/transforms/code/license_select/kfp_ray/Makefile.disable-cicd
+++ b/transforms/code/license_select/kfp_ray/Makefile.disable-cicd
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/code/license_select/python/Makefile b/transforms/code/license_select/python/Makefile
index 7077c801f..2f3825fda 100644
--- a/transforms/code/license_select/python/Makefile
+++ b/transforms/code/license_select/python/Makefile
@@ -1,10 +1,22 @@
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=license_select
-# $(REPOROOT)/.make.versions file contains the versions
+# Include the common configuration for this transform
+include ../transform.config
+
DOCKER_IMAGE_VERSION=${LICENSE_SELECT_PYTHON_VERSION}
# Use default rule inherited from makefile.common
diff --git a/transforms/code/license_select/ray/Makefile b/transforms/code/license_select/ray/Makefile
index 25fe6ab8a..d69cf00ca 100644
--- a/transforms/code/license_select/ray/Makefile
+++ b/transforms/code/license_select/ray/Makefile
@@ -1,15 +1,24 @@
-
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
BASE_IMAGE=$(RAY_BASE_IMAGE)
-TRANSFORM_NAME=license_select
-# $(REPOROOT)/.make.versions file contains the versions
+
DOCKER_IMAGE_VERSION=${LICENSE_SELECT_RAY_VERSION}
# Use default rule inherited from makefile.common
diff --git a/transforms/code/license_select/transform.config b/transforms/code/license_select/transform.config
new file mode 100644
index 000000000..bba10d3e5
--- /dev/null
+++ b/transforms/code/license_select/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=license_select
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+LICENSE_SELECT_PYTHON_VERSION=$(DPK_VERSION)
+LICENSE_SELECT_RAY_VERSION=$(LICENSE_SELECT_PYTHON_VERSION)
+LICENSE_SELECT_SPARK_VERSION=$(LICENSE_SELECT_PYTHON_VERSION)
+
diff --git a/transforms/code/malware/kfp_ray/Makefile b/transforms/code/malware/kfp_ray/Makefile
index 7b423d8bd..0446e2d29 100644
--- a/transforms/code/malware/kfp_ray/Makefile
+++ b/transforms/code/malware/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/code/malware/python/Makefile b/transforms/code/malware/python/Makefile
index 99174e9a1..bd523b629 100644
--- a/transforms/code/malware/python/Makefile
+++ b/transforms/code/malware/python/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=malware
+# Include the common configuration for this transform
+include ../transform.config
OS := $(shell uname -s)
ifeq ($(OS),Darwin)
diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile
index 99515c036..a92cbd529 100644
--- a/transforms/code/malware/ray/Makefile
+++ b/transforms/code/malware/ray/Makefile
@@ -1,12 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=malware
+# Include the common configuration for this transform
+include ../transform.config
+
BASE_IMAGE=${RAY_BASE_IMAGE}
OS := $(shell uname -s)
ifeq ($(OS),Darwin)
diff --git a/transforms/code/malware/transform.config b/transforms/code/malware/transform.config
new file mode 100644
index 000000000..be0b6651d
--- /dev/null
+++ b/transforms/code/malware/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=malware
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+MALWARE_PYTHON_VERSION=$(DPK_VERSION)
+MALWARE_RAY_VERSION=$(MALWARE_PYTHON_VERSION)
+MALWARE_SPARK_VERSION=$(MALWARE_PYTHON_VERSION)
+
diff --git a/transforms/code/proglang_select/kfp_ray/Makefile b/transforms/code/proglang_select/kfp_ray/Makefile
index abbf75c8c..b8a21bca8 100644
--- a/transforms/code/proglang_select/kfp_ray/Makefile
+++ b/transforms/code/proglang_select/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/code/proglang_select/python/Makefile b/transforms/code/proglang_select/python/Makefile
index 2cec4f6db..7d64e0a90 100644
--- a/transforms/code/proglang_select/python/Makefile
+++ b/transforms/code/proglang_select/python/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=proglang_select
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.python-venv
diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile
index 82db54db7..20315a234 100644
--- a/transforms/code/proglang_select/ray/Makefile
+++ b/transforms/code/proglang_select/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=proglang_select
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/code/proglang_select/transform.config b/transforms/code/proglang_select/transform.config
new file mode 100644
index 000000000..c32cb9775
--- /dev/null
+++ b/transforms/code/proglang_select/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=proglang_select
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION)
+PROGLANG_SELECT_RAY_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION)
+PROGLANG_SELECT_SPARK_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION)
+
diff --git a/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd b/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd
index ef3765e31..5b2425357 100644
--- a/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd
+++ b/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/code/repo_level_ordering/ray/Makefile b/transforms/code/repo_level_ordering/ray/Makefile
index 83f8692de..8d2f784fb 100644
--- a/transforms/code/repo_level_ordering/ray/Makefile
+++ b/transforms/code/repo_level_ordering/ray/Makefile
@@ -1,15 +1,23 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-BASE_IMAGE=$(RAY_BASE_IMAGE)
+# Include the common configuration for this transform
+include ../transform.config
-TRANSFORM_NAME=repo_level_order
+BASE_IMAGE=$(RAY_BASE_IMAGE)
venv:: .transforms.ray-venv
diff --git a/transforms/code/repo_level_ordering/transform.config b/transforms/code/repo_level_ordering/transform.config
new file mode 100644
index 000000000..0d82c6377
--- /dev/null
+++ b/transforms/code/repo_level_ordering/transform.config
@@ -0,0 +1,19 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=repo_level_order
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+REPO_LVL_ORDER_RAY_VERSION=$(DPK_VERSION)
+
+
diff --git a/transforms/language/doc_chunk/kfp_ray/Makefile b/transforms/language/doc_chunk/kfp_ray/Makefile
index 189b36ea5..30e912e33 100644
--- a/transforms/language/doc_chunk/kfp_ray/Makefile
+++ b/transforms/language/doc_chunk/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/doc_chunk/python/Makefile b/transforms/language/doc_chunk/python/Makefile
index a6fbe35dc..2f2a7e789 100644
--- a/transforms/language/doc_chunk/python/Makefile
+++ b/transforms/language/doc_chunk/python/Makefile
@@ -1,14 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=doc_chunk
+# Include the common configuration for this transform
+include ../transform.config
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
diff --git a/transforms/language/doc_chunk/ray/Makefile b/transforms/language/doc_chunk/ray/Makefile
index 6b9b4ae6a..b4f394f84 100644
--- a/transforms/language/doc_chunk/ray/Makefile
+++ b/transforms/language/doc_chunk/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=doc_chunk
+# Include the common configuration for this transform
+include ../transform.config
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
diff --git a/transforms/language/doc_chunk/transform.config b/transforms/language/doc_chunk/transform.config
new file mode 100644
index 000000000..f433f360b
--- /dev/null
+++ b/transforms/language/doc_chunk/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=doc_chunk
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+DOC_CHUNK_PYTHON_VERSION=$(DPK_VERSION)
+DOC_CHUNK_RAY_VERSION=$(DOC_CHUNK_PYTHON_VERSION)
+DOC_CHUNK_SPARK_VERSION=$(DOC_CHUNK_PYTHON_VERSION)
+
diff --git a/transforms/language/doc_quality/kfp_ray/Makefile b/transforms/language/doc_quality/kfp_ray/Makefile
index 004f17616..9f5e93615 100644
--- a/transforms/language/doc_quality/kfp_ray/Makefile
+++ b/transforms/language/doc_quality/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/doc_quality/python/Makefile b/transforms/language/doc_quality/python/Makefile
index 684ce47ae..f0f309400 100644
--- a/transforms/language/doc_quality/python/Makefile
+++ b/transforms/language/doc_quality/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=doc_quality
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.test-src test-image
diff --git a/transforms/language/doc_quality/ray/Makefile b/transforms/language/doc_quality/ray/Makefile
index d462543a1..dd278af88 100644
--- a/transforms/language/doc_quality/ray/Makefile
+++ b/transforms/language/doc_quality/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=doc_quality
-# $(REPOROOT)/.make.versions file contains the versions
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/language/doc_quality/transform.config b/transforms/language/doc_quality/transform.config
new file mode 100644
index 000000000..2ece0e071
--- /dev/null
+++ b/transforms/language/doc_quality/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=doc_quality
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+DOC_QUALITY_PYTHON_VERSION=$(DPK_VERSION)
+DOC_QUALITY_RAY_VERSION=$(DOC_QUALITY_PYTHON_VERSION)
+DOC_QUALITY_SPARK_VERSION=$(DOC_QUALITY_PYTHON_VERSION)
+
diff --git a/transforms/language/html2parquet/python/Makefile b/transforms/language/html2parquet/python/Makefile
index 0e552d5be..284bb8e8a 100644
--- a/transforms/language/html2parquet/python/Makefile
+++ b/transforms/language/html2parquet/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME= html2parquet
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/language/html2parquet/ray/Makefile b/transforms/language/html2parquet/ray/Makefile
index 30c908259..1667be8b9 100644
--- a/transforms/language/html2parquet/ray/Makefile
+++ b/transforms/language/html2parquet/ray/Makefile
@@ -1,15 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
+include $(REPOROOT)/transforms/.make.transforms
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME= html2parquet
+# Include the common configuration for this transform
+include ../transform.config
-include $(REPOROOT)/transforms/.make.transforms
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
@@ -59,3 +66,6 @@ kind-load-image:: .transforms.kind-load-image
docker-load-image: .defaults.docker-load-image
docker-save-image: .defaults.docker-save-image
+
+
+
diff --git a/transforms/language/html2parquet/ray/requirements.txt b/transforms/language/html2parquet/ray/requirements.txt
index dc2111e9e..dc796d602 100644
--- a/transforms/language/html2parquet/ray/requirements.txt
+++ b/transforms/language/html2parquet/ray/requirements.txt
@@ -1,3 +1,2 @@
dpk-html2parquet-transform-python==0.2.2.dev0
data-prep-toolkit-ray==0.2.2.dev0
-trafilatura==1.12.0
\ No newline at end of file
diff --git a/transforms/language/html2parquet/transform.config b/transforms/language/html2parquet/transform.config
new file mode 100644
index 000000000..10847c6af
--- /dev/null
+++ b/transforms/language/html2parquet/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=html2parquet
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
+HTML2PARQUET_RAY_VERSION=$(HTML2PARQUET_PYTHON_VERSION)
+HTML2PARQUET_SPARK_VERSION=$(HTML2PARQUET_PYTHON_VERSION)
+
diff --git a/transforms/language/lang_id/kfp_ray/Makefile b/transforms/language/lang_id/kfp_ray/Makefile
index b8f11ffc8..fd2c42d8e 100644
--- a/transforms/language/lang_id/kfp_ray/Makefile
+++ b/transforms/language/lang_id/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/lang_id/python/Makefile b/transforms/language/lang_id/python/Makefile
index 441f6093d..972ccb729 100644
--- a/transforms/language/lang_id/python/Makefile
+++ b/transforms/language/lang_id/python/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
+include $(REPOROOT)/transforms/.make.transforms
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=lang_id
+# Include the common configuration for this transform
+include ../transform.config
include $(REPOROOT)/transforms/.make.transforms
diff --git a/transforms/language/lang_id/ray/Makefile b/transforms/language/lang_id/ray/Makefile
index 6b0e307d7..1339af964 100644
--- a/transforms/language/lang_id/ray/Makefile
+++ b/transforms/language/lang_id/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=lang_id
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/language/lang_id/transform.config b/transforms/language/lang_id/transform.config
new file mode 100644
index 000000000..3a969f41d
--- /dev/null
+++ b/transforms/language/lang_id/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=lang_id
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+LANG_ID_PYTHON_VERSION=$(DPK_VERSION)
+LANG_ID_RAY_VERSION=$(LANG_ID_PYTHON_VERSION)
+LANG_ID_SPARK_VERSION=$(LANG_ID_PYTHON_VERSION)
+
diff --git a/transforms/language/pdf2parquet/kfp_ray/Makefile b/transforms/language/pdf2parquet/kfp_ray/Makefile
index 24154bffa..66edd91fc 100644
--- a/transforms/language/pdf2parquet/kfp_ray/Makefile
+++ b/transforms/language/pdf2parquet/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/pdf2parquet/python/Makefile b/transforms/language/pdf2parquet/python/Makefile
index 0e06a5900..b18b068ac 100644
--- a/transforms/language/pdf2parquet/python/Makefile
+++ b/transforms/language/pdf2parquet/python/Makefile
@@ -1,14 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=pdf2parquet
+# Include the common configuration for this transform
+include ../transform.config
RUN_ARGS=" --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" \
--data_files_to_use \"['.pdf','.zip']\" "
diff --git a/transforms/language/pdf2parquet/ray/Makefile b/transforms/language/pdf2parquet/ray/Makefile
index fba43ea15..ced1f45f1 100644
--- a/transforms/language/pdf2parquet/ray/Makefile
+++ b/transforms/language/pdf2parquet/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=pdf2parquet
+# Include the common configuration for this transform
+include ../transform.config
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
diff --git a/transforms/language/pdf2parquet/transform.config b/transforms/language/pdf2parquet/transform.config
new file mode 100644
index 000000000..1bda1908e
--- /dev/null
+++ b/transforms/language/pdf2parquet/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=pdf2parquet
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+PDF2PARQUET_PYTHON_VERSION=$(DPK_VERSION)
+PDF2PARQUET_RAY_VERSION=$(PDF2PARQUET_PYTHON_VERSION)
+PDF2PARQUET_SPARK_VERSION=$(PDF2PARQUET_PYTHON_VERSION)
+
diff --git a/transforms/language/pii_redactor/kfp_ray/Makefile b/transforms/language/pii_redactor/kfp_ray/Makefile
index 77844a79e..370f85cb0 100644
--- a/transforms/language/pii_redactor/kfp_ray/Makefile
+++ b/transforms/language/pii_redactor/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/pii_redactor/python/Makefile b/transforms/language/pii_redactor/python/Makefile
index 28fd33fff..50161da6e 100644
--- a/transforms/language/pii_redactor/python/Makefile
+++ b/transforms/language/pii_redactor/python/Makefile
@@ -1,16 +1,22 @@
-# Define the root of the local git clone for the common rules to be able
+# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
-# Include a library of common .transform.* targets which most
-# transforms should be able to reuse. However, feel free
-# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-TRANSFORM_NAME=pii_redactor
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+# Include a library of common .transform.* targets which most
+# transforms should be able to reuse. However, feel free
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/language/pii_redactor/ray/Makefile b/transforms/language/pii_redactor/ray/Makefile
index 3a67b90b8..e52494534 100644
--- a/transforms/language/pii_redactor/ray/Makefile
+++ b/transforms/language/pii_redactor/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=pii_redactor
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/language/pii_redactor/transform.config b/transforms/language/pii_redactor/transform.config
new file mode 100644
index 000000000..c06adf82c
--- /dev/null
+++ b/transforms/language/pii_redactor/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=pii_redactor
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION)
+PII_REDACTOR_RAY_VERSION=$(PII_REDACTOR_PYTHON_VERSION)
+PII_REDACTOR_SPARK_VERSION=$(PII_REDACTOR_PYTHON_VERSION)
+
diff --git a/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd b/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd
index 70613cc01..36bd47560 100644
--- a/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd
+++ b/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/language/text_encoder/python/Makefile b/transforms/language/text_encoder/python/Makefile
index c9e8b8c1b..564bb405b 100644
--- a/transforms/language/text_encoder/python/Makefile
+++ b/transforms/language/text_encoder/python/Makefile
@@ -1,14 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=text_encoder
+# Include the common configuration for this transform
+include ../transform.config
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
diff --git a/transforms/language/text_encoder/ray/Makefile b/transforms/language/text_encoder/ray/Makefile
index b95b299c4..85cf45cac 100644
--- a/transforms/language/text_encoder/ray/Makefile
+++ b/transforms/language/text_encoder/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=text_encoder
+# Include the common configuration for this transform
+include ../transform.config
LINUX_WITH_CPU_TORCH?=true
OS := $(shell uname -s)
diff --git a/transforms/language/text_encoder/transform.config b/transforms/language/text_encoder/transform.config
new file mode 100644
index 000000000..df5754fb8
--- /dev/null
+++ b/transforms/language/text_encoder/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=text_encoder
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+TEXT_ENCODER_PYTHON_VERSION=$(DPK_VERSION)
+TEXT_ENCODER_RAY_VERSION=$(TEXT_ENCODER_PYTHON_VERSION)
+TEXT_ENCODER_SPARK_VERSION=$(TEXT_ENCODER_PYTHON_VERSION)
+
diff --git a/transforms/packaging/.make.packaging b/transforms/packaging/.make.packaging
index 5268889d0..29506aaf1 100644
--- a/transforms/packaging/.make.packaging
+++ b/transforms/packaging/.make.packaging
@@ -2,6 +2,11 @@ ifndef T_SET
T_SET=all
endif
+# Defines the version of the wheel for the package transforms
+# If you change this value, you will need to run "make set-versions" to
+# apply the new version number to the toml files.
+DPK_TRANSFORMS_VERSION=$(DPK_VERSION)
+
venv:
$(MAKE) .defaults.create-venv
diff --git a/transforms/universal/doc_id/kfp_ray/Makefile b/transforms/universal/doc_id/kfp_ray/Makefile
index 94fc75145..f170326e2 100644
--- a/transforms/universal/doc_id/kfp_ray/Makefile
+++ b/transforms/universal/doc_id/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/universal/doc_id/python/Makefile b/transforms/universal/doc_id/python/Makefile
index 1f7d0d353..26da1fc8f 100644
--- a/transforms/universal/doc_id/python/Makefile
+++ b/transforms/universal/doc_id/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=doc_id
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile
index d7844f2f9..79787406b 100644
--- a/transforms/universal/doc_id/ray/Makefile
+++ b/transforms/universal/doc_id/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=doc_id
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/doc_id/spark/Makefile b/transforms/universal/doc_id/spark/Makefile
index 954786dac..9303d021f 100644
--- a/transforms/universal/doc_id/spark/Makefile
+++ b/transforms/universal/doc_id/spark/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=doc_id
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.spark-venv
diff --git a/transforms/universal/doc_id/transform.config b/transforms/universal/doc_id/transform.config
new file mode 100644
index 000000000..d3715f3b2
--- /dev/null
+++ b/transforms/universal/doc_id/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=doc_id
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+DOC_ID_PYTHON_VERSION=$(DPK_VERSION)
+DOC_ID_RAY_VERSION=$(DOC_ID_PYTHON_VERSION)
+DOC_ID_SPARK_VERSION=$(DOC_ID_PYTHON_VERSION)
+
diff --git a/transforms/universal/ededup/kfp_ray/Makefile b/transforms/universal/ededup/kfp_ray/Makefile
index 456cf76d1..f0c5cc217 100644
--- a/transforms/universal/ededup/kfp_ray/Makefile
+++ b/transforms/universal/ededup/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -47,4 +50,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/universal/ededup/python/Makefile b/transforms/universal/ededup/python/Makefile
index 92f3fac27..348edc74d 100644
--- a/transforms/universal/ededup/python/Makefile
+++ b/transforms/universal/ededup/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=ededup
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile
index f828e107e..1ff055e29 100644
--- a/transforms/universal/ededup/ray/Makefile
+++ b/transforms/universal/ededup/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=ededup
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/ededup/transform.config b/transforms/universal/ededup/transform.config
new file mode 100644
index 000000000..12f5357f1
--- /dev/null
+++ b/transforms/universal/ededup/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=ededup
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+EDEDUP_PYTHON_VERSION=$(DPK_VERSION)
+EDEDUP_RAY_VERSION=$(EDEDUP_PYTHON_VERSION)
+EDEDUP_SPARK_VERSION=$(EDEDUP_PYTHON_VERSION)
+
diff --git a/transforms/universal/fdedup/kfp_ray/Makefile b/transforms/universal/fdedup/kfp_ray/Makefile
index f6b215984..55f7851f6 100644
--- a/transforms/universal/fdedup/kfp_ray/Makefile
+++ b/transforms/universal/fdedup/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile
index 15173ba00..f5f06c3c3 100644
--- a/transforms/universal/fdedup/ray/Makefile
+++ b/transforms/universal/fdedup/ray/Makefile
@@ -1,14 +1,24 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=fdedup
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
+
venv:: .transforms.ray-venv
test:: .transforms.ray-test
diff --git a/transforms/universal/fdedup/transform.config b/transforms/universal/fdedup/transform.config
new file mode 100644
index 000000000..774716e15
--- /dev/null
+++ b/transforms/universal/fdedup/transform.config
@@ -0,0 +1,18 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=fdedup
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+FDEDUP_RAY_VERSION=$(DPK_VERSION)
+
diff --git a/transforms/universal/filter/kfp_ray/Makefile b/transforms/universal/filter/kfp_ray/Makefile
index bd26792be..c48298d22 100644
--- a/transforms/universal/filter/kfp_ray/Makefile
+++ b/transforms/universal/filter/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/universal/filter/python/Makefile b/transforms/universal/filter/python/Makefile
index 1ea1151ce..9a01deea1 100644
--- a/transforms/universal/filter/python/Makefile
+++ b/transforms/universal/filter/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=filter
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile
index 5960a9670..0c0af0004 100644
--- a/transforms/universal/filter/ray/Makefile
+++ b/transforms/universal/filter/ray/Makefile
@@ -1,13 +1,21 @@
-
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=filter
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/filter/spark/Makefile b/transforms/universal/filter/spark/Makefile
index 329da35a2..72bc78a15 100644
--- a/transforms/universal/filter/spark/Makefile
+++ b/transforms/universal/filter/spark/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-# This is included in the image name, if defined
-TRANSFORM_NAME=filter
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.spark-venv
diff --git a/transforms/universal/filter/transform.config b/transforms/universal/filter/transform.config
new file mode 100644
index 000000000..70f2ada5b
--- /dev/null
+++ b/transforms/universal/filter/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=filter
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+FILTER_PYTHON_VERSION=$(DPK_VERSION)
+FILTER_RAY_VERSION=$(FILTER_PYTHON_VERSION)
+FILTER_SPARK_VERSION=$(FILTER_PYTHON_VERSION)
+
diff --git a/transforms/universal/hap/python/Makefile b/transforms/universal/hap/python/Makefile
index c7c15dba7..2363e51c2 100644
--- a/transforms/universal/hap/python/Makefile
+++ b/transforms/universal/hap/python/Makefile
@@ -1,15 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-#TRANSFORM_RUNTIME_SRC_FILE=hap_transform_python.py
-TRANSFORM_NAME=hap
-
-HAP_PYTHON_VERSION= $(DPK_VERSION)
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.python-venv
diff --git a/transforms/universal/hap/transform.config b/transforms/universal/hap/transform.config
new file mode 100644
index 000000000..6aa7018b3
--- /dev/null
+++ b/transforms/universal/hap/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=hap
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+HAP_PYTHON_VERSION=$(DPK_VERSION)
+HAP_RAY_VERSION=$(HAP_PYTHON_VERSION)
+HAP_SPARK_VERSION=$(HAP_PYTHON_VERSION)
+
diff --git a/transforms/universal/noop/kfp_ray/Makefile b/transforms/universal/noop/kfp_ray/Makefile
index d1198e5a2..fc541f367 100644
--- a/transforms/universal/noop/kfp_ray/Makefile
+++ b/transforms/universal/noop/kfp_ray/Makefile
@@ -1,7 +1,11 @@
REPOROOT=${CURDIR}/../../../../
+
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/universal/noop/python/Makefile b/transforms/universal/noop/python/Makefile
index 80797bcc9..5e6121b04 100644
--- a/transforms/universal/noop/python/Makefile
+++ b/transforms/universal/noop/python/Makefile
@@ -1,15 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=noop
-
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.python-venv
diff --git a/transforms/universal/noop/ray/Makefile b/transforms/universal/noop/ray/Makefile
index 0b70f6662..ad7ff3320 100644
--- a/transforms/universal/noop/ray/Makefile
+++ b/transforms/universal/noop/ray/Makefile
@@ -1,15 +1,24 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=noop
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
+
venv:: .transforms.ray-venv
test:: .transforms.ray-test
diff --git a/transforms/universal/noop/spark/Makefile b/transforms/universal/noop/spark/Makefile
index 726fd9e6a..ebc72992e 100644
--- a/transforms/universal/noop/spark/Makefile
+++ b/transforms/universal/noop/spark/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=noop
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.spark-venv
diff --git a/transforms/universal/noop/transform.config b/transforms/universal/noop/transform.config
new file mode 100644
index 000000000..49c9b2cbf
--- /dev/null
+++ b/transforms/universal/noop/transform.config
@@ -0,0 +1,21 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=noop
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+NOOP_PYTHON_VERSION=$(DPK_VERSION)
+NOOP_RAY_VERSION=$(NOOP_PYTHON_VERSION)
+NOOP_SPARK_VERSION=$(NOOP_PYTHON_VERSION)
+
diff --git a/transforms/universal/profiler/kfp_ray/Makefile b/transforms/universal/profiler/kfp_ray/Makefile
index 2fbd17653..e4f6b860b 100644
--- a/transforms/universal/profiler/kfp_ray/Makefile
+++ b/transforms/universal/profiler/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
diff --git a/transforms/universal/profiler/python/Makefile b/transforms/universal/profiler/python/Makefile
index 61c807a23..983250184 100644
--- a/transforms/universal/profiler/python/Makefile
+++ b/transforms/universal/profiler/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=profiler
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile
index 8cec28968..12d75c4c3 100644
--- a/transforms/universal/profiler/ray/Makefile
+++ b/transforms/universal/profiler/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=profiler
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/profiler/spark/Makefile b/transforms/universal/profiler/spark/Makefile
index cb90b4020..39b16cac6 100644
--- a/transforms/universal/profiler/spark/Makefile
+++ b/transforms/universal/profiler/spark/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=profiler
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.spark-venv
diff --git a/transforms/universal/profiler/transform.config b/transforms/universal/profiler/transform.config
new file mode 100644
index 000000000..c86cd6415
--- /dev/null
+++ b/transforms/universal/profiler/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=profiler
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+PROFILER_PYTHON_VERSION=$(DPK_VERSION)
+PROFILER_RAY_VERSION=$(PROFILER_PYTHON_VERSION)
+PROFILER_SPARK_VERSION=$(PROFILER_PYTHON_VERSION)
+
diff --git a/transforms/universal/resize/kfp_ray/Makefile b/transforms/universal/resize/kfp_ray/Makefile
index a0e2faf37..8c7e592af 100644
--- a/transforms/universal/resize/kfp_ray/Makefile
+++ b/transforms/universal/resize/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/universal/resize/python/Makefile b/transforms/universal/resize/python/Makefile
index 7de0032e3..66453c846 100644
--- a/transforms/universal/resize/python/Makefile
+++ b/transforms/universal/resize/python/Makefile
@@ -1,16 +1,22 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
-# $(REPOROOT)/.make.versions file contains the versions
-
-TRANSFORM_NAME=resize
-
include $(REPOROOT)/transforms/.make.transforms
+# Include the common configuration for this transform
+include ../transform.config
+
venv:: .transforms.python-venv
test:: .transforms.python-test
diff --git a/transforms/universal/resize/ray/Makefile b/transforms/universal/resize/ray/Makefile
index 1a2f2496f..dd229b3f4 100644
--- a/transforms/universal/resize/ray/Makefile
+++ b/transforms/universal/resize/ray/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
-# to override/redefine the rules below.
-
+# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=resize
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/resize/spark/Makefile b/transforms/universal/resize/spark/Makefile
index f02e9db3f..18d72d31d 100644
--- a/transforms/universal/resize/spark/Makefile
+++ b/transforms/universal/resize/spark/Makefile
@@ -1,13 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
-
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=resize
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.spark-venv
diff --git a/transforms/universal/resize/transform.config b/transforms/universal/resize/transform.config
new file mode 100644
index 000000000..4b7171a4e
--- /dev/null
+++ b/transforms/universal/resize/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=resize
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+RESIZE_PYTHON_VERSION=$(DPK_VERSION)
+RESIZE_RAY_VERSION=$(RESIZE_PYTHON_VERSION)
+RESIZE_SPARK_VERSION=$(RESIZE_PYTHON_VERSION)
+
diff --git a/transforms/universal/tokenization/kfp_ray/Makefile b/transforms/universal/tokenization/kfp_ray/Makefile
index 09656297a..c43105ff1 100644
--- a/transforms/universal/tokenization/kfp_ray/Makefile
+++ b/transforms/universal/tokenization/kfp_ray/Makefile
@@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows
+# Include the common configuration for this transform
+include ../transform.config
+
SRC_DIR=${CURDIR}/../ray/
PYTHON_WF := $(shell find ./ -name '*_wf.py')
@@ -48,4 +51,4 @@ workflow-test: workflow-build
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
- done
\ No newline at end of file
+ done
diff --git a/transforms/universal/tokenization/python/Makefile b/transforms/universal/tokenization/python/Makefile
index d23661983..8f4f7fbf5 100644
--- a/transforms/universal/tokenization/python/Makefile
+++ b/transforms/universal/tokenization/python/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=tokenization
+# Include the common configuration for this transform
+include ../transform.config
venv:: .transforms.python-venv
diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile
index 3d5a46d09..0a4e3a370 100644
--- a/transforms/universal/tokenization/ray/Makefile
+++ b/transforms/universal/tokenization/ray/Makefile
@@ -1,12 +1,21 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
+
+# Set this, before including .make.defaults, to
+# 1 if requirements reference the latest code in the data processing library
+# in this repo (that is not yet published to pypi). This is the default setting.
+# 0 if the transforms DPK dependencies are on wheels published to
+# pypi (e.g. data-prep-toolkit=0.2.1)
+#USE_REPO_LIB_SRC=1
+
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.
include $(REPOROOT)/transforms/.make.transforms
-TRANSFORM_NAME=tokenization
+# Include the common configuration for this transform
+include ../transform.config
BASE_IMAGE=${RAY_BASE_IMAGE}
venv:: .transforms.ray-venv
diff --git a/transforms/universal/tokenization/transform.config b/transforms/universal/tokenization/transform.config
new file mode 100644
index 000000000..04f517d42
--- /dev/null
+++ b/transforms/universal/tokenization/transform.config
@@ -0,0 +1,20 @@
+#
+# This is intended to be included across the Makefiles provided within
+# a given transform's directory tree, so must use compatible syntax.
+#
+################################################################################
+# This defines the name of the transform and is used to match against
+# expected files and is used to define the transform's image name.
+TRANSFORM_NAME=tokenization
+
+################################################################################
+# This defines the transforms' version number as would be used
+# when publishing the wheel. In general, only the micro version
+# number should be advanced relative to the DPK_VERSION.
+#
+# If you change the versions numbers, be sure to run "make set-versions" to
+# update version numbers across the transform (e.g., pyproject.toml).
+TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION)
+TOKENIZATION_RAY_VERSION=$(TOKENIZATION_PYTHON_VERSION)
+TOKENIZATION_SPARK_VERSION=$(TOKENIZATION_PYTHON_VERSION)
+