From 5c95b28af6d89b1e40a5f1e0d63f76a7e78c8f46 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Wed, 23 Oct 2024 21:40:57 +0000 Subject: [PATCH 01/12] Enable Python distroless container image variants --- .../org/apache/beam/gradle/BeamModulePlugin.groovy | 4 ++++ sdks/python/container/Dockerfile | 11 +++++++++-- sdks/python/container/common.gradle | 9 ++++++++- sdks/python/container/run_validatescontainer.sh | 3 ++- sdks/python/test-suites/dataflow/common.gradle | 3 +++ 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 8a094fd56217..e69e7149ebac 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -576,6 +576,10 @@ class BeamModulePlugin implements Plugin { return project.containerArchitectures() != [project.nativeArchitecture()] } + project.ext.containerBuildTarget = { + return project.findProperty('container-build-target') + } + /** ***********************************************************************************************/ // Define and export a map dependencies shared across multiple sub-projects. // diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 7bea6229668f..d770c34b452c 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -82,7 +82,7 @@ RUN \ # Remove pip cache. rm -rf /root/.cache/pip -ENTRYPOINT ["/opt/apache/beam/boot"] +#ENTRYPOINT ["/opt/apache/beam/boot"] #### # Pull and add third party licenses to the image if pull_licenses is true. @@ -103,9 +103,16 @@ RUN if [ "$pull_licenses" = "true" ] ; then \ python /tmp/license_scripts/pull_licenses_py.py ; \ fi -FROM beam +FROM beam as base ARG pull_licenses COPY --from=third_party_licenses /opt/apache/beam/third_party_licenses /opt/apache/beam/third_party_licenses RUN if [ "$pull_licenses" != "true" ] ; then \ rm -rf /opt/apache/beam/third_party_licenses ; \ fi + +FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} as distroless +COPY --from=base /usr/local/bin /usr/local/bin +COPY --from=base /usr/local/lib/python${pyversion} /usr/local/lib/python${pyversion} +COPY --from=base /root /root +COPY --from=base /opt /opt +ENV PATH "$PATH:/usr/local/bin" diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 0175778a6301..eca4bc784626 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,10 +71,16 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") +var baseTarget = 'base' +var buildTarget = project.containerBuildTarget() ?: baseTarget +var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" +if (buildTarget != baseTarget) { + imageName += "_${buildTarget}" +} docker { name containerImageName( - name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk", + name: imageName, root: project.rootProject.hasProperty(["docker-repository-root"]) ? project.rootProject["docker-repository-root"] : project.docker_image_default_repo_root, @@ -90,6 +96,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers + target buildTarget } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/container/run_validatescontainer.sh b/sdks/python/container/run_validatescontainer.sh index 5ee3342a1efa..6957f2787a5e 100755 --- a/sdks/python/container/run_validatescontainer.sh +++ b/sdks/python/container/run_validatescontainer.sh @@ -56,7 +56,8 @@ IMAGE_PREFIX="$(grep 'docker_image_default_repo_prefix' gradle.properties | cut SDK_VERSION="$(grep 'sdk_version' gradle.properties | cut -d'=' -f2)" PY_VERSION=$1 ARCH=${3:-"x86"} -IMAGE_NAME="${IMAGE_PREFIX}python${PY_VERSION}_sdk" +IMAGE_SUFFIX=${IMAGE_SUFFIX:-_sdk} +IMAGE_NAME="${IMAGE_PREFIX}python${PY_VERSION}${IMAGE_SUFFIX}" CONTAINER_PROJECT="sdks:python:container:py${PY_VERSION//.}" # Note: we substitute away the dot in the version. PY_INTERPRETER="python${PY_VERSION}" MACHINE_TYPE_ARGS="" diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 6bca904c1a64..b41b424b9fa6 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -370,9 +370,12 @@ task validatesContainer() { dependsOn 'initializeForDataflowJob' dependsOn ":sdks:python:container:py${pyversion}:docker" def runScriptsPath = "${rootDir}/sdks/python/container/run_validatescontainer.sh" + var containerBuildTarget = project.containerBuildTarget() ?: '' + var imageSuffix = containerBuildTarget != '' ? "_sdk_${containerBuildTarget}": '_sdk' doLast { exec { executable 'sh' + environment 'IMAGE_SUFFIX', imageSuffix args '-c', ". ${envdir}/bin/activate && cd ${rootDir} && ${runScriptsPath} " + "${project.ext.pythonVersion} " + "${project.ext.sdkLocation}" From 060f3b60a4f52405e814e14428c3a387c557b6ee Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Thu, 24 Oct 2024 16:10:55 -0700 Subject: [PATCH 02/12] Fix missing entrypoint --- sdks/python/container/Dockerfile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index d770c34b452c..46f3486eb5c9 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -82,7 +82,7 @@ RUN \ # Remove pip cache. rm -rf /root/.cache/pip -#ENTRYPOINT ["/opt/apache/beam/boot"] +ENTRYPOINT ["/opt/apache/beam/boot"] #### # Pull and add third party licenses to the image if pull_licenses is true. @@ -110,9 +110,17 @@ RUN if [ "$pull_licenses" != "true" ] ; then \ rm -rf /opt/apache/beam/third_party_licenses ; \ fi +ARG TARGETARCH FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} as distroless +ARG py_version +COPY --from=base /usr/local/include /usr/local/include COPY --from=base /usr/local/bin /usr/local/bin -COPY --from=base /usr/local/lib/python${pyversion} /usr/local/lib/python${pyversion} -COPY --from=base /root /root +COPY --from=base /usr/local/lib /usr/local/lib +# Python standard library modules: +COPY --from=base /usr/lib/python${py_version} /usr/lib/python${py_version} +# Contains the boot entrypoint: COPY --from=base /opt /opt ENV PATH "$PATH:/usr/local/bin" + +# Despite the ENTRYPOINT set above, need to reset since deriving the layer from a different image: +ENTRYPOINT ["/opt/apache/beam/boot"] From 4167525c88878fb2af1c611fc494b87378ce2da0 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Fri, 25 Oct 2024 14:46:09 -0700 Subject: [PATCH 03/12] Revert testing using validatescontainer.sh --- sdks/python/container/run_validatescontainer.sh | 3 +-- sdks/python/test-suites/dataflow/common.gradle | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sdks/python/container/run_validatescontainer.sh b/sdks/python/container/run_validatescontainer.sh index 6957f2787a5e..5ee3342a1efa 100755 --- a/sdks/python/container/run_validatescontainer.sh +++ b/sdks/python/container/run_validatescontainer.sh @@ -56,8 +56,7 @@ IMAGE_PREFIX="$(grep 'docker_image_default_repo_prefix' gradle.properties | cut SDK_VERSION="$(grep 'sdk_version' gradle.properties | cut -d'=' -f2)" PY_VERSION=$1 ARCH=${3:-"x86"} -IMAGE_SUFFIX=${IMAGE_SUFFIX:-_sdk} -IMAGE_NAME="${IMAGE_PREFIX}python${PY_VERSION}${IMAGE_SUFFIX}" +IMAGE_NAME="${IMAGE_PREFIX}python${PY_VERSION}_sdk" CONTAINER_PROJECT="sdks:python:container:py${PY_VERSION//.}" # Note: we substitute away the dot in the version. PY_INTERPRETER="python${PY_VERSION}" MACHINE_TYPE_ARGS="" diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index b41b424b9fa6..6bca904c1a64 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -370,12 +370,9 @@ task validatesContainer() { dependsOn 'initializeForDataflowJob' dependsOn ":sdks:python:container:py${pyversion}:docker" def runScriptsPath = "${rootDir}/sdks/python/container/run_validatescontainer.sh" - var containerBuildTarget = project.containerBuildTarget() ?: '' - var imageSuffix = containerBuildTarget != '' ? "_sdk_${containerBuildTarget}": '_sdk' doLast { exec { executable 'sh' - environment 'IMAGE_SUFFIX', imageSuffix args '-c', ". ${envdir}/bin/activate && cd ${rootDir} && ${runScriptsPath} " + "${project.ext.pythonVersion} " + "${project.ext.sdkLocation}" From de91c8ed75f4352f8d90ba76e5c7c538028b1fec Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Mon, 28 Oct 2024 17:34:09 -0700 Subject: [PATCH 04/12] Create validateDistrolessContainerTests --- sdks/python/test-suites/dataflow/build.gradle | 6 +++ .../python/test-suites/dataflow/common.gradle | 44 +++++++++++++++++++ sdks/python/test-suites/gradle.properties | 3 ++ 3 files changed, 53 insertions(+) diff --git a/sdks/python/test-suites/dataflow/build.gradle b/sdks/python/test-suites/dataflow/build.gradle index 04a79683fd36..f924059230b2 100644 --- a/sdks/python/test-suites/dataflow/build.gradle +++ b/sdks/python/test-suites/dataflow/build.gradle @@ -60,6 +60,12 @@ task validatesContainerTests { } } +task validateDistrolessContainerTests { + getVersionsAsList('distroless_python_versions').each { + dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesDistrolessContainer") + } +} + task examplesPostCommit { getVersionsAsList('dataflow_examples_postcommit_py_versions').each { dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples") diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 6bca904c1a64..bafd9f2a10bd 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,6 +380,50 @@ task validatesContainer() { } } +task validatesDistrolessContainer() { + def pyversion = "${project.ext.pythonVersion.replace('.', '')}" + dependsOn 'initializeForDataflowJob' + def pyProject = project(":sdks:python:container:py${pyversion}") + pyProject.ext['container-build-target'] = 'distroless' + rootProject.ext['docker-repository-root'] = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" + rootProject.ext['docker-tag'] = System.currentTimeMillis() + rootProject.ext['push-containers'] = true + dependsOn ":sdks:python:container:py${pyversion}:docker" + + // TODO(damondouglas): the following is duplicate from container/common.gradle; attempts to single source via ext failed + var baseTarget = 'base' + var buildTarget = project.containerBuildTarget() ?: baseTarget + var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" + if (buildTarget != baseTarget) { + imageName += "_${buildTarget}" + } + var imageURI = containerImageName( + name: imageName, + root: project.rootProject["docker-repository-root"], + tag: project.rootProject["docker-tag"]) + + var testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" + + def argMap = [ + "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", + "project": "apache-beam-testing", + "region": "us-central1", + "runner": "TestDataflowRunner", + "sdk_container_image": imageURI, + "sdk_location": "container", + "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", + "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", + ] + def cmdArgs = mapToArgString(argMap) + doLast { + exec { + workingDir = "${rootDir}/sdks/python" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\"" + } + } +} + task validatesContainerARM() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob' diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index d027cd3144d3..08266c4b0dd5 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -54,3 +54,6 @@ prism_examples_postcommit_py_versions=3.9,3.12 # cross language postcommit python test suites cross_language_validates_py_versions=3.9,3.12 + +# Python versions to support distroless variants +distroless_python_versions=3.9,3.10,3.11,3.12 From c244b1bac2c9177d26fd7f53a61d47be77f3fda6 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Wed, 30 Oct 2024 09:25:16 -0700 Subject: [PATCH 05/12] Refactor for reusable gradle methods --- .../beam/gradle/BeamModulePlugin.groovy | 13 +++++++++++ sdks/python/container/Dockerfile | 15 ++++++++++--- sdks/python/container/common.gradle | 15 ++----------- .../python/test-suites/dataflow/common.gradle | 22 +++++-------------- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index e69e7149ebac..a13662b68864 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -3219,6 +3219,19 @@ class BeamModulePlugin implements Plugin { return project.getProperty(propertyName).split(',') } } + // Reports the container image URL i.e. /:tag based on gradle properties state and + // ext.containerBuildTarget. + project.ext.containerImageURL = { + var baseTarget = 'base' + var buildTarget = project.containerBuildTarget() ?: baseTarget + var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" + if (buildTarget != baseTarget) { + imageName += "_${buildTarget}" + } + var root = project.findProperty('docker-repository-root') ?: project.docker_image_default_repo_root + var tag = project.findProperty('docker-tag') ?: project.sdk_version + return project.containerImageName( name: imageName, root: root, tag: tag) + } } private void setAutomaticModuleNameHeader(JavaNatureConfiguration configuration, Project project) { diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 46f3486eb5c9..f3d22a4b5bc6 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -113,14 +113,23 @@ RUN if [ "$pull_licenses" != "true" ] ; then \ ARG TARGETARCH FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} as distroless ARG py_version + +# Contains header files needed by the Python interpreter. COPY --from=base /usr/local/include /usr/local/include + +# Contains the Python interpreter executables. COPY --from=base /usr/local/bin /usr/local/bin + +# Contains the Python library dependencies. COPY --from=base /usr/local/lib /usr/local/lib -# Python standard library modules: + +# Python standard library modules. COPY --from=base /usr/lib/python${py_version} /usr/lib/python${py_version} -# Contains the boot entrypoint: + +# Contains the boot entrypoint and related files such as licenses. COPY --from=base /opt /opt + ENV PATH "$PATH:/usr/local/bin" -# Despite the ENTRYPOINT set above, need to reset since deriving the layer from a different image: +# Despite the ENTRYPOINT set above, need to reset since deriving the layer derives from a different image. ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index eca4bc784626..01ee7b0756eb 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,21 +71,10 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") -var baseTarget = 'base' -var buildTarget = project.containerBuildTarget() ?: baseTarget -var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" -if (buildTarget != baseTarget) { - imageName += "_${buildTarget}" -} +var buildTarget = project.containerBuildTarget() ?: 'base' docker { - name containerImageName( - name: imageName, - root: project.rootProject.hasProperty(["docker-repository-root"]) ? - project.rootProject["docker-repository-root"] : - project.docker_image_default_repo_root, - tag: project.rootProject.hasProperty(["docker-tag"]) ? - project.rootProject["docker-tag"] : project.sdk_version) + name project.containerImageURL() // tags used by dockerTag task tags containerImageTags() files "../Dockerfile", "./build" diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index bafd9f2a10bd..c891b55932bc 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -383,33 +383,21 @@ task validatesContainer() { task validatesDistrolessContainer() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob' - def pyProject = project(":sdks:python:container:py${pyversion}") - pyProject.ext['container-build-target'] = 'distroless' + rootProject.ext['container-build-target'] = 'distroless' rootProject.ext['docker-repository-root'] = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" - rootProject.ext['docker-tag'] = System.currentTimeMillis() - rootProject.ext['push-containers'] = true + rootProject.ext['docker-tag'] = java.time.Instant.now().getEpochSecond() dependsOn ":sdks:python:container:py${pyversion}:docker" - - // TODO(damondouglas): the following is duplicate from container/common.gradle; attempts to single source via ext failed - var baseTarget = 'base' - var buildTarget = project.containerBuildTarget() ?: baseTarget - var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" - if (buildTarget != baseTarget) { - imageName += "_${buildTarget}" - } - var imageURI = containerImageName( - name: imageName, - root: project.rootProject["docker-repository-root"], - tag: project.rootProject["docker-tag"]) + dependsOn ":sdks:python:container:py${pyversion}:dockerPush" var testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" + var imageURL = project.containerImageURL() def argMap = [ "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", "project": "apache-beam-testing", "region": "us-central1", "runner": "TestDataflowRunner", - "sdk_container_image": imageURI, + "sdk_container_image": imageURL, "sdk_location": "container", "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", From d9b955e0476e5d58ffe807f888ca9605fb22627f Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Wed, 30 Oct 2024 11:21:57 -0700 Subject: [PATCH 06/12] Revert back --- .../beam/gradle/BeamModulePlugin.groovy | 17 ---------- sdks/python/container/common.gradle | 11 +++++-- sdks/python/test-suites/dataflow/build.gradle | 6 ---- .../python/test-suites/dataflow/common.gradle | 32 ------------------- sdks/python/test-suites/gradle.properties | 3 -- 5 files changed, 8 insertions(+), 61 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index a13662b68864..8a094fd56217 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -576,10 +576,6 @@ class BeamModulePlugin implements Plugin { return project.containerArchitectures() != [project.nativeArchitecture()] } - project.ext.containerBuildTarget = { - return project.findProperty('container-build-target') - } - /** ***********************************************************************************************/ // Define and export a map dependencies shared across multiple sub-projects. // @@ -3219,19 +3215,6 @@ class BeamModulePlugin implements Plugin { return project.getProperty(propertyName).split(',') } } - // Reports the container image URL i.e. /:tag based on gradle properties state and - // ext.containerBuildTarget. - project.ext.containerImageURL = { - var baseTarget = 'base' - var buildTarget = project.containerBuildTarget() ?: baseTarget - var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" - if (buildTarget != baseTarget) { - imageName += "_${buildTarget}" - } - var root = project.findProperty('docker-repository-root') ?: project.docker_image_default_repo_root - var tag = project.findProperty('docker-tag') ?: project.sdk_version - return project.containerImageName( name: imageName, root: root, tag: tag) - } } private void setAutomaticModuleNameHeader(JavaNatureConfiguration configuration, Project project) { diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 01ee7b0756eb..56540b76c231 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,10 +71,15 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") -var buildTarget = project.containerBuildTarget() ?: 'base' docker { - name project.containerImageURL() + name containerImageName( + name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk", + root: project.rootProject.hasProperty(["docker-repository-root"]) ? + project.rootProject["docker-repository-root"] : + project.docker_image_default_repo_root, + tag: project.rootProject.hasProperty(["docker-tag"]) ? + project.rootProject["docker-tag"] : project.sdk_version) // tags used by dockerTag task tags containerImageTags() files "../Dockerfile", "./build" @@ -85,7 +90,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers - target buildTarget + target project.findProperty('container-build-target') ?: 'base' } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/build.gradle b/sdks/python/test-suites/dataflow/build.gradle index f924059230b2..04a79683fd36 100644 --- a/sdks/python/test-suites/dataflow/build.gradle +++ b/sdks/python/test-suites/dataflow/build.gradle @@ -60,12 +60,6 @@ task validatesContainerTests { } } -task validateDistrolessContainerTests { - getVersionsAsList('distroless_python_versions').each { - dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesDistrolessContainer") - } -} - task examplesPostCommit { getVersionsAsList('dataflow_examples_postcommit_py_versions').each { dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples") diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index c891b55932bc..6bca904c1a64 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,38 +380,6 @@ task validatesContainer() { } } -task validatesDistrolessContainer() { - def pyversion = "${project.ext.pythonVersion.replace('.', '')}" - dependsOn 'initializeForDataflowJob' - rootProject.ext['container-build-target'] = 'distroless' - rootProject.ext['docker-repository-root'] = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" - rootProject.ext['docker-tag'] = java.time.Instant.now().getEpochSecond() - dependsOn ":sdks:python:container:py${pyversion}:docker" - dependsOn ":sdks:python:container:py${pyversion}:dockerPush" - - var testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" - var imageURL = project.containerImageURL() - - def argMap = [ - "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", - "project": "apache-beam-testing", - "region": "us-central1", - "runner": "TestDataflowRunner", - "sdk_container_image": imageURL, - "sdk_location": "container", - "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", - "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", - ] - def cmdArgs = mapToArgString(argMap) - doLast { - exec { - workingDir = "${rootDir}/sdks/python" - executable 'sh' - args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\"" - } - } -} - task validatesContainerARM() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob' diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index 08266c4b0dd5..d027cd3144d3 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -54,6 +54,3 @@ prism_examples_postcommit_py_versions=3.9,3.12 # cross language postcommit python test suites cross_language_validates_py_versions=3.9,3.12 - -# Python versions to support distroless variants -distroless_python_versions=3.9,3.10,3.11,3.12 From 2ebb919672b79dc5cb9d79a4b852916633624b14 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Wed, 30 Oct 2024 18:06:54 -0700 Subject: [PATCH 07/12] Finalize gradle --- sdks/python/container/common.gradle | 3 +- sdks/python/test-suites/dataflow/build.gradle | 6 +++ .../python/test-suites/dataflow/common.gradle | 38 +++++++++++++++++++ sdks/python/test-suites/gradle.properties | 3 ++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 56540b76c231..fe0fce2821c6 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,6 +71,7 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") +def baseBuildTarget = 'base' docker { name containerImageName( @@ -90,7 +91,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers - target project.findProperty('container-build-target') ?: 'base' + target project.rootProject.findProperty('container-build-target') ?: baseBuildTarget } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/build.gradle b/sdks/python/test-suites/dataflow/build.gradle index 04a79683fd36..4500b395b0a6 100644 --- a/sdks/python/test-suites/dataflow/build.gradle +++ b/sdks/python/test-suites/dataflow/build.gradle @@ -60,6 +60,12 @@ task validatesContainerTests { } } +task validatesDistrolessContainerTests { + getVersionsAsList('distroless_python_versions').each { + dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesDistrolessContainer") + } +} + task examplesPostCommit { getVersionsAsList('dataflow_examples_postcommit_py_versions').each { dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples") diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 6bca904c1a64..faf2519e921c 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,6 +380,44 @@ task validatesContainer() { } } +task validatesDistrolessContainer() { + def pyversion = "${project.ext.pythonVersion.replace('.', '')}" + def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" + def buildTarget = 'distroless' + def tag = java.time.Instant.now().getEpochSecond() + project.rootProject.ext['docker-tag'] = "${tag}-${buildTarget}" + rootProject.ext['container-build-target'] = buildTarget + rootProject.ext['docker-repository-root'] = repository + if (project.hasProperty("testRCDependencies")) { + // Generate a requirements file with pre-release versions for the docker task + // if testing with pre-release dependencies. + dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements" + mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements" + } + def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk" + dependsOn 'initializeForDataflowJob' + dependsOn ":sdks:python:container:py${pyversion}:docker" + dependsOn ":sdks:python:container:py${pyversion}:dockerPush" + def argMap = [ + "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", + "project": "apache-beam-testing", + "region": "us-central1", + "runner": "TestDataflowRunner", + "sdk_container_image": imageURL, + "sdk_location": "container", + "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", + "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", + ] + def cmdArgs = mapToArgString(argMap) + doLast { + exec { + workingDir = "${rootDir}/sdks/python" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\"" + } + } +} + task validatesContainerARM() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob' diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index d027cd3144d3..08266c4b0dd5 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -54,3 +54,6 @@ prism_examples_postcommit_py_versions=3.9,3.12 # cross language postcommit python test suites cross_language_validates_py_versions=3.9,3.12 + +# Python versions to support distroless variants +distroless_python_versions=3.9,3.10,3.11,3.12 From 41b4a404cd8a6e15d4a59c9b386f44b15bc9e887 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Thu, 31 Oct 2024 10:10:05 -0700 Subject: [PATCH 08/12] Migrate distroless build to its own gradle task --- sdks/python/container/common.gradle | 26 +++++++++++++++++-- .../python/test-suites/dataflow/common.gradle | 9 +++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index fe0fce2821c6..495b84b03062 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,7 +71,6 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") -def baseBuildTarget = 'base' docker { name containerImageName( @@ -91,7 +90,30 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers - target project.rootProject.findProperty('container-build-target') ?: baseBuildTarget + target 'base' +} + +task dockerDistroless { + group = 'docker' + docker{ + name containerImageName( + name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk_distroless", + root: project.rootProject.hasProperty(["docker-repository-root"]) ? + project.rootProject["docker-repository-root"] : + project.docker_image_default_repo_root, + tag: project.rootProject.hasProperty(["docker-tag"]) ? + project.rootProject["docker-tag"] : project.sdk_version) + tags containerImageTags() + files "../Dockerfile", "./build" + buildArgs(['py_version': "${project.ext.pythonVersion}", + 'pull_licenses': project.rootProject.hasProperty(["docker-pull-licenses"]) || + project.rootProject.hasProperty(["isRelease"])]) + buildx project.useBuildx() + platform(*project.containerPlatforms()) + load project.useBuildx() && !pushContainers + push pushContainers + target 'distroless' + } } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index faf2519e921c..8ceedbaacc5d 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -383,10 +383,8 @@ task validatesContainer() { task validatesDistrolessContainer() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" - def buildTarget = 'distroless' def tag = java.time.Instant.now().getEpochSecond() - project.rootProject.ext['docker-tag'] = "${tag}-${buildTarget}" - rootProject.ext['container-build-target'] = buildTarget + project.rootProject.ext['docker-tag'] = tag rootProject.ext['docker-repository-root'] = repository if (project.hasProperty("testRCDependencies")) { // Generate a requirements file with pre-release versions for the docker task @@ -394,10 +392,11 @@ task validatesDistrolessContainer() { dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements" mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements" } - def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk" + def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_distroless:${tag}" dependsOn 'initializeForDataflowJob' - dependsOn ":sdks:python:container:py${pyversion}:docker" + dependsOn ":sdks:python:container:py${pyversion}:dockerDistroless" dependsOn ":sdks:python:container:py${pyversion}:dockerPush" + def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" def argMap = [ "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", "project": "apache-beam-testing", From 658d9b36038f7e9ee3f99e2b928ade807af096ad Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Thu, 31 Oct 2024 17:35:16 -0700 Subject: [PATCH 09/12] Remove gradle distroless build task --- sdks/python/container/common.gradle | 24 ----------------- .../python/test-suites/dataflow/common.gradle | 27 +++++++++++++++---- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 495b84b03062..0175778a6301 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -90,30 +90,6 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers - target 'base' -} - -task dockerDistroless { - group = 'docker' - docker{ - name containerImageName( - name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk_distroless", - root: project.rootProject.hasProperty(["docker-repository-root"]) ? - project.rootProject["docker-repository-root"] : - project.docker_image_default_repo_root, - tag: project.rootProject.hasProperty(["docker-tag"]) ? - project.rootProject["docker-tag"] : project.sdk_version) - tags containerImageTags() - files "../Dockerfile", "./build" - buildArgs(['py_version': "${project.ext.pythonVersion}", - 'pull_licenses': project.rootProject.hasProperty(["docker-pull-licenses"]) || - project.rootProject.hasProperty(["isRelease"])]) - buildx project.useBuildx() - platform(*project.containerPlatforms()) - load project.useBuildx() && !pushContainers - push pushContainers - target 'distroless' - } } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 8ceedbaacc5d..b93f76a6ffd9 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,22 +380,28 @@ task validatesContainer() { } } +/** + * Validates the distroless (https://github.com/GoogleContainerTools/distroless)variant of the Python SDK container + * image (sdks/python/container/Dockerfile). + * To test a single version of Python: + * ./gradlew :sdks:python:test-suites:dataflow:py311:validatesDistrolessContainer \ + * -PpythonVersion=3 -Pjava11Home=$JAVA_HOME -PtestJavaVersion=11 + * See https://cwiki.apache.org/confluence/display/BEAM/Python+Tips#PythonTips-VirtualEnvironmentSetup + * for more information on setting up different Python versions. + */ task validatesDistrolessContainer() { + def buildTarget = 'distroless' def pyversion = "${project.ext.pythonVersion.replace('.', '')}" def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" def tag = java.time.Instant.now().getEpochSecond() - project.rootProject.ext['docker-tag'] = tag - rootProject.ext['docker-repository-root'] = repository if (project.hasProperty("testRCDependencies")) { // Generate a requirements file with pre-release versions for the docker task // if testing with pre-release dependencies. dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements" mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements" } - def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_distroless:${tag}" + def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_${buildTarget}:${tag}" dependsOn 'initializeForDataflowJob' - dependsOn ":sdks:python:container:py${pyversion}:dockerDistroless" - dependsOn ":sdks:python:container:py${pyversion}:dockerPush" def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" def argMap = [ "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", @@ -408,6 +414,17 @@ task validatesDistrolessContainer() { "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", ] def cmdArgs = mapToArgString(argMap) + doFirst { + exec { + workingDir = "${rootDir}/sdks/python/container" + executable 'sh' + args '-c', "docker build -t ${imageURL} --target=${buildTarget} ." + } + exec { + executable 'sh' + args '-c', "docker push ${imageURL}" + } + } doLast { exec { workingDir = "${rootDir}/sdks/python" From a3c1d4c6e3753ef307ae2d81241e9aa55398345c Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Thu, 31 Oct 2024 17:37:07 -0700 Subject: [PATCH 10/12] Add base target --- sdks/python/container/common.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index 0175778a6301..f162673d257a 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -90,6 +90,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers + target 'base' } dockerPrepare.dependsOn copyLauncherDependencies From 96d64de912c0e894819fa2206da3d2ae8e72ff43 Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Thu, 31 Oct 2024 17:56:17 -0700 Subject: [PATCH 11/12] Build docker image directly in test --- sdks/python/test-suites/dataflow/common.gradle | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index b93f76a6ffd9..c78d81946d49 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -390,16 +390,10 @@ task validatesContainer() { * for more information on setting up different Python versions. */ task validatesDistrolessContainer() { - def buildTarget = 'distroless' def pyversion = "${project.ext.pythonVersion.replace('.', '')}" + def buildTarget = 'distroless' def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" def tag = java.time.Instant.now().getEpochSecond() - if (project.hasProperty("testRCDependencies")) { - // Generate a requirements file with pre-release versions for the docker task - // if testing with pre-release dependencies. - dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements" - mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements" - } def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_${buildTarget}:${tag}" dependsOn 'initializeForDataflowJob' def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" @@ -416,9 +410,9 @@ task validatesDistrolessContainer() { def cmdArgs = mapToArgString(argMap) doFirst { exec { - workingDir = "${rootDir}/sdks/python/container" + workingDir = "${rootDir}/sdks/python/container/${pyversion}" executable 'sh' - args '-c', "docker build -t ${imageURL} --target=${buildTarget} ." + args '-c', "docker build -t ${imageURL} --target=${buildTarget} -f ../Dockerfile --build-arg=py_version=${project.ext.pythonVersion} ." } exec { executable 'sh' From 5c6720cf60e0abcf9d64a07178159259fba7565f Mon Sep 17 00:00:00 2001 From: Damon Douglas Date: Fri, 1 Nov 2024 17:04:58 -0700 Subject: [PATCH 12/12] Revert back to using plugin --- sdks/python/container/common.gradle | 10 +++++-- .../python/test-suites/dataflow/common.gradle | 27 +++++++++---------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/sdks/python/container/common.gradle b/sdks/python/container/common.gradle index f162673d257a..885662362894 100644 --- a/sdks/python/container/common.gradle +++ b/sdks/python/container/common.gradle @@ -71,10 +71,16 @@ def copyLauncherDependencies = tasks.register("copyLauncherDependencies", Copy) } def pushContainers = project.rootProject.hasProperty(["isRelease"]) || project.rootProject.hasProperty("push-containers") +def baseBuildTarget = 'base' +def buildTarget = project.findProperty('container-build-target') ?: 'base' +var imageName = project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk" +if (buildTarget != baseBuildTarget) { + imageName += "_${buildTarget}" +} docker { name containerImageName( - name: project.docker_image_default_repo_prefix + "python${project.ext.pythonVersion}_sdk", + name: imageName, root: project.rootProject.hasProperty(["docker-repository-root"]) ? project.rootProject["docker-repository-root"] : project.docker_image_default_repo_root, @@ -90,7 +96,7 @@ docker { platform(*project.containerPlatforms()) load project.useBuildx() && !pushContainers push pushContainers - target 'base' + target buildTarget } dockerPrepare.dependsOn copyLauncherDependencies diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index c78d81946d49..3b48d7033233 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -381,11 +381,10 @@ task validatesContainer() { } /** - * Validates the distroless (https://github.com/GoogleContainerTools/distroless)variant of the Python SDK container + * Validates the distroless (https://github.com/GoogleContainerTools/distroless) variant of the Python SDK container * image (sdks/python/container/Dockerfile). * To test a single version of Python: - * ./gradlew :sdks:python:test-suites:dataflow:py311:validatesDistrolessContainer \ - * -PpythonVersion=3 -Pjava11Home=$JAVA_HOME -PtestJavaVersion=11 + * ./gradlew :sdks:python:test-suites:dataflow:py311:validatesDistrolessContainer * See https://cwiki.apache.org/confluence/display/BEAM/Python+Tips#PythonTips-VirtualEnvironmentSetup * for more information on setting up different Python versions. */ @@ -395,30 +394,28 @@ task validatesDistrolessContainer() { def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" def tag = java.time.Instant.now().getEpochSecond() def imageURL = "${repository}/beam_python${project.ext.pythonVersion}_sdk_${buildTarget}:${tag}" + project.rootProject.ext['docker-repository-root'] = repository + project.rootProject.ext['container-build-target'] = buildTarget + project.rootProject.ext['docker-tag'] = tag + if (project.rootProject.hasProperty('dry-run')) { + println "Running in dry run mode: imageURL: ${imageURL}, pyversion: ${pyversion}, buildTarget: ${buildTarget}, repository: ${repository}, tag: ${tag}, envdir: ${envdir}" + return + } dependsOn 'initializeForDataflowJob' + dependsOn ":sdks:python:container:py${pyversion}:docker" + dependsOn ":sdks:python:container:py${pyversion}:dockerPush" def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" def argMap = [ "output": "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", "project": "apache-beam-testing", "region": "us-central1", "runner": "TestDataflowRunner", - "sdk_container_image": imageURL, + "sdk_container_image": "${imageURL}", "sdk_location": "container", "staging_location": "gs://temp-storage-for-end-to-end-tests/staging-it", "temp_location": "gs://temp-storage-for-end-to-end-tests/temp-it", ] def cmdArgs = mapToArgString(argMap) - doFirst { - exec { - workingDir = "${rootDir}/sdks/python/container/${pyversion}" - executable 'sh' - args '-c', "docker build -t ${imageURL} --target=${buildTarget} -f ../Dockerfile --build-arg=py_version=${project.ext.pythonVersion} ." - } - exec { - executable 'sh' - args '-c', "docker push ${imageURL}" - } - } doLast { exec { workingDir = "${rootDir}/sdks/python"