From fc14c7aac23eccba16a271dc5b8c96b1cedbd372 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jul 2024 14:31:09 -0400 Subject: [PATCH 1/3] Add INSTALL_LIB_SRC_INTO_VENV flag to .make.defaults Signed-off-by: David Wood --- .make.defaults | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/.make.defaults b/.make.defaults index a68d0212d..b984d1667 100644 --- a/.make.defaults +++ b/.make.defaults @@ -51,6 +51,9 @@ RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py310 DOCKER_IMAGE?=$(DOCKER_REMOTE_IMAGE) KIND_CLUSTER_NAME=dataprep ARTIFACTS_DIR?=$(REPOROOT)/artifacts +# Set this to 0 to disable installation of data-processing-lib/* into virtual environments +# Should cause data-prep-kit dependencies to be loaded from pypi instead. +INSTALL_LIB_SRC_INTO_VENV?=1 DPK_PYTHON_LIB_DIR=$(REPOROOT)/data-processing-lib/python DPK_RAY_LIB_DIR=$(REPOROOT)/data-processing-lib/ray @@ -284,7 +287,7 @@ __check_defined = \ if [ ! -z "$(EXTRA_INDEX_URL)" ]; then \ extra_url='--extra-index-url $(EXTRA_INDEX_URL)'; \ fi; \ - pip install $${extra_url} -e $(PYTHON_PROJECT_DIR); + pip install --force-reinstall $${extra_url} -e $(PYTHON_PROJECT_DIR); @echo Installed python project source in $(PYTHON_PROJECT_DIR) into venv # Install local requirements last as it generally includes our lib source @@ -308,13 +311,17 @@ __check_defined = \ # Install all source from the repo for a python runtime transform into an existing venv .PHONY: .defaults.install-python-lib-src-venv .defaults.install-python-lib-src-venv:: +ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) @# Help: Install Python data processing library source into existing venv @echo Installing Python data processing library source to existing venv @source venv/bin/activate; \ - pip install pytest; \ $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ echo Installed source from Python processing library for `which $(PYTHON)` +else + @# Help: DO NOT install Python data processing library source into existing venv + @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python data processing library source into existing venv +endif # Install local requirements last as it generally includes our lib source .PHONY: .defaults.ray-lib-src-venv @@ -325,17 +332,21 @@ __check_defined = \ # And if there is an adjacent python dir (as for transforms), then also install that source .PHONY: .defaults.install-ray-lib-src-venv .defaults.install-ray-lib-src-venv:: +ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) @# Help: Install Ray and Python data processing library source into existing venv @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ - pip install pytest; \ - pip install pytest pytest-cov; \ $(MAKE) PIP_TARGET=data-prep-toolkit-ray .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_RAY_LIB_DIR) .defaults.install-src-venv; \ - echo Installed source from Ray data processing library for `which $(PYTHON)`; \ - if [ -d ../python ]; then \ + echo Installed source from Python and Ray data processing libraries for `which $(PYTHON)` +else + @# Help: DO NOT install Python or Ray data processing library source into existing venv + @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python or Ray data processing library source into existing venv +endif + @if [ -d ../python ]; then \ + source venv/bin/activate; \ $(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \ fi @@ -347,18 +358,23 @@ __check_defined = \ # Install the python-based lib BEFORE spark assuming spark depends on the same version as python source. .PHONY: .defaults.install-spark-lib-src-venv .defaults.install-spark-lib-src-venv:: +ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) @# Help: Install Spark and Python data processing library source into existing venv @echo Installing Spark and Python data processing library source to existing venv @source venv/bin/activate; \ - pip install pytest; \ $(MAKE) PIP_TARGET=data-prep-toolkit-spark .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_SPARK_LIB_DIR) .defaults.install-src-venv; \ - if [ -d ../python ]; then \ - $(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \ + echo Installed source from Python and Spark processing libraries for `which $(PYTHON)` +else + @# Help: DO NOT install Python or Spark data processing library source into existing venv + @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python or Spark data processing library source into existing venv +endif + if [ -d ../python ]; then \ + source venv/bin/activate; \ + $(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \ fi - echo Installed source from Spark processing library for `which $(PYTHON)` # Run tests in test directory from that dir after adding ../src to PYTHONPATH # Assumes a Makefile target of venv to create the venv @@ -434,7 +450,7 @@ __check_defined = \ $(PYTHON) -m venv venv @source venv/bin/activate; \ pip install --upgrade pip; \ - pip install wheel; \ + pip install wheel pytest pytest-cov; # Install requirements defined in the current directory into an existing venv .PHONY: .defaults.install-local-requirements-venv From 002d71dcec26078fa6076140aa901eba5f15dfaf Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jul 2024 14:41:39 -0400 Subject: [PATCH 2/3] more logging in .make.defaults Signed-off-by: David Wood --- .make.defaults | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.make.defaults b/.make.defaults index b984d1667..a5f6e7271 100644 --- a/.make.defaults +++ b/.make.defaults @@ -283,12 +283,13 @@ __check_defined = \ # PYTHON_PROJECT_DIR is expected to have src and pyproject.toml .PHONY: .defaults.install-src-venv .defaults.install-src-venv:: + @echo Begin installing source from $(PYTHON_PROJECT_DIR) into venv @source venv/bin/activate; \ if [ ! -z "$(EXTRA_INDEX_URL)" ]; then \ extra_url='--extra-index-url $(EXTRA_INDEX_URL)'; \ fi; \ pip install --force-reinstall $${extra_url} -e $(PYTHON_PROJECT_DIR); - @echo Installed python project source in $(PYTHON_PROJECT_DIR) into venv + @echo Done installing source from $(PYTHON_PROJECT_DIR) into venv # Install local requirements last as it generally includes our lib source .PHONY: .defaults.python-lib-src-venv @@ -460,10 +461,10 @@ endif extra_url='--extra-index-url $(EXTRA_INDEX_URL)'; \ fi; \ if [ -e requirements.txt ]; then \ - echo Install requirements from requirements.txt; \ + echo Installing requirements from requirements.txt; \ pip install $$extra_url -r requirements.txt; \ elif [ -e pyproject.toml ]; then \ - echo Install requirements using pyproject.toml; \ + echo Installing from pyproject.toml; \ pip install $$extra_url -e .; \ fi From ef16e7f3298d8b8042085c714b053f5415b99494 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 12 Jul 2024 07:47:27 -0400 Subject: [PATCH 3/3] also don't copy lib source for image building Signed-off-by: David Wood --- .make.defaults | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/.make.defaults b/.make.defaults index a5f6e7271..26a13cb90 100644 --- a/.make.defaults +++ b/.make.defaults @@ -53,7 +53,7 @@ KIND_CLUSTER_NAME=dataprep ARTIFACTS_DIR?=$(REPOROOT)/artifacts # Set this to 0 to disable installation of data-processing-lib/* into virtual environments # Should cause data-prep-kit dependencies to be loaded from pypi instead. -INSTALL_LIB_SRC_INTO_VENV?=1 +USE_REPO_LIB_SRC?=1 DPK_PYTHON_LIB_DIR=$(REPOROOT)/data-processing-lib/python DPK_RAY_LIB_DIR=$(REPOROOT)/data-processing-lib/ray @@ -235,9 +235,11 @@ __check_defined = \ .PHONY: .defaults.python-lib-src-image .defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source +ifeq ($(USE_REPO_LIB_SRC), 1) $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib +endif $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image - rm -rf data-processing-lib-python + -rm -rf data-processing-lib-python # Build an image using the local Dockerfile and make the data-processing-lib/ray # available in the current directory for use by the Dockerfile (i.e. to install the library). @@ -246,15 +248,17 @@ __check_defined = \ .PHONY: .defaults.ray-lib-src-image .defaults.ray-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings. @# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source +ifeq ($(USE_REPO_LIB_SRC), 1) $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib $(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray .defaults.copy-lib +endif if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image - rm -rf data-processing-lib-python - rm -rf data-processing-lib-ray - rm -rf python-transform + -rm -rf data-processing-lib-python + -rm -rf data-processing-lib-ray + -rm -rf python-transform # Build the base spark image used by spark-based transforms .PHONY: .defaults.spark-lib-base-image @@ -268,15 +272,17 @@ __check_defined = \ .defaults.spark-lib-src-image:: .defaults.spark-lib-base-image @# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source $(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability +ifeq ($(USE_REPO_LIB_SRC), 1) $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib $(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib +endif if [ -e ../python ]; then \ $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \ fi $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image - rm -rf data-processing-lib-python - rm -rf data-processing-lib-spark - rm -rf python-transform + -rm -rf data-processing-lib-python + -rm -rf data-processing-lib-spark + -rm -rf python-transform # Install the source from the given directory into an existing venv # Expected PYTHON_PROJECT_DIR and uses EXTRA_INDEX_URL if set. @@ -312,7 +318,7 @@ __check_defined = \ # Install all source from the repo for a python runtime transform into an existing venv .PHONY: .defaults.install-python-lib-src-venv .defaults.install-python-lib-src-venv:: -ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) +ifeq ($(USE_REPO_LIB_SRC), 1) @# Help: Install Python data processing library source into existing venv @echo Installing Python data processing library source to existing venv @source venv/bin/activate; \ @@ -321,7 +327,7 @@ ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) echo Installed source from Python processing library for `which $(PYTHON)` else @# Help: DO NOT install Python data processing library source into existing venv - @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python data processing library source into existing venv + @echo USE_REPO_LIB_SRC!=1 so do NOT installing Python data processing library source into existing venv endif # Install local requirements last as it generally includes our lib source @@ -333,7 +339,7 @@ endif # And if there is an adjacent python dir (as for transforms), then also install that source .PHONY: .defaults.install-ray-lib-src-venv .defaults.install-ray-lib-src-venv:: -ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) +ifeq ($(USE_REPO_LIB_SRC), 1) @# Help: Install Ray and Python data processing library source into existing venv @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ @@ -344,7 +350,7 @@ ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) echo Installed source from Python and Ray data processing libraries for `which $(PYTHON)` else @# Help: DO NOT install Python or Ray data processing library source into existing venv - @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python or Ray data processing library source into existing venv + @echo USE_REPO_LIB_SRC!=1 so do NOT installing Python or Ray data processing library source into existing venv endif @if [ -d ../python ]; then \ source venv/bin/activate; \ @@ -359,7 +365,7 @@ endif # Install the python-based lib BEFORE spark assuming spark depends on the same version as python source. .PHONY: .defaults.install-spark-lib-src-venv .defaults.install-spark-lib-src-venv:: -ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) +ifeq ($(USE_REPO_LIB_SRC), 1) @# Help: Install Spark and Python data processing library source into existing venv @echo Installing Spark and Python data processing library source to existing venv @source venv/bin/activate; \ @@ -370,7 +376,7 @@ ifeq ($(INSTALL_LIB_SRC_INTO_VENV), 1) echo Installed source from Python and Spark processing libraries for `which $(PYTHON)` else @# Help: DO NOT install Python or Spark data processing library source into existing venv - @echo INSTALL_LIB_SRC_INTO_VENV!=1 so do NOT installing Python or Spark data processing library source into existing venv + @echo USE_REPO_LIB_SRC!=1 so do NOT installing Python or Spark data processing library source into existing venv endif if [ -d ../python ]; then \ source venv/bin/activate; \