diff --git a/.github/workflows/beam_PreCommit_Python.yml b/.github/workflows/beam_PreCommit_Python.yml index c891a79cefd0..35e7b937068d 100644 --- a/.github/workflows/beam_PreCommit_Python.yml +++ b/.github/workflows/beam_PreCommit_Python.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs="--ignore=apache_beam/dataframe/ --ignore=apache_beam/examples/ --ignore=apache_beam/runners/ --ignore=apache_beam/transforms/" \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 33be9644d34a..4b274d643e07 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -86,6 +86,8 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :sdks:python:test-suites:tox:py38:preCommitPyCoverage + arguments: | + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Dataframes.yml b/.github/workflows/beam_PreCommit_Python_Dataframes.yml index 2862d7d5936c..f03716d06795 100644 --- a/.github/workflows/beam_PreCommit_Python_Dataframes.yml +++ b/.github/workflows/beam_PreCommit_Python_Dataframes.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/dataframe/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Examples.yml b/.github/workflows/beam_PreCommit_Python_Examples.yml index 7f980885180a..d629ee09b725 100644 --- a/.github/workflows/beam_PreCommit_Python_Examples.yml +++ b/.github/workflows/beam_PreCommit_Python_Examples.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/examples/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Runners.yml b/.github/workflows/beam_PreCommit_Python_Runners.yml index b0c5ab4fa34a..f823112e23dd 100644 --- a/.github/workflows/beam_PreCommit_Python_Runners.yml +++ b/.github/workflows/beam_PreCommit_Python_Runners.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/runners/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/beam_PreCommit_Python_Transforms.yml b/.github/workflows/beam_PreCommit_Python_Transforms.yml index 88ac59c9de96..7374af7f38c9 100644 --- a/.github/workflows/beam_PreCommit_Python_Transforms.yml +++ b/.github/workflows/beam_PreCommit_Python_Transforms.yml @@ -97,6 +97,7 @@ jobs: arguments: | -Pposargs=apache_beam/transforms/ \ -PpythonVersion=${{ matrix.python_version }} \ + -PuseWheelDistribution - name: Archive code coverage results uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/build_release_candidate.yml b/.github/workflows/build_release_candidate.yml index 3956551431c6..12f1537dac18 100644 --- a/.github/workflows/build_release_candidate.yml +++ b/.github/workflows/build_release_candidate.yml @@ -314,7 +314,7 @@ jobs: pip install -U pip pip install tox # TODO(https://github.com/apache/beam/issues/20209): Don't hardcode py version in this file. - pip install -r build-requirements.txt && tox -e py38-docs + tox -e py38-docs rm -rf target/docs/_build/.doctrees - name: Build Typescript Docs working-directory: beam/sdks/typescript diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 94248be0c008..f4ccf368bacb 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -92,11 +92,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: python -m pip install -r build-requirements.txt - - name: Install wheels - run: python -m pip install wheel - name: Get tag id: get_tag run: | @@ -117,15 +112,15 @@ jobs: echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT - name: Build source working-directory: ./sdks/python - run: python setup.py sdist --formats=zip + run: pip install -U build && python -m build --sdist - name: Add checksums working-directory: ./sdks/python/dist run: | - file=$(ls | grep .zip | head -n 1) + file=$(ls | grep .tar.gz | head -n 1) sha512sum $file > ${file}.sha512 - name: Unzip source working-directory: ./sdks/python - run: unzip dist/$(ls dist | grep .zip | head -n 1) + run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) - name: Rename source directory working-directory: ./sdks/python run: mv $(ls | grep apache-beam) apache-beam-source @@ -155,17 +150,17 @@ jobs: - name: Build RC source if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python - run: python setup.py sdist --formats=zip + run: pip install -U build && pythom -m build --sdist - name: Add RC checksums if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python/dist run: | - file=$(ls | grep .zip | head -n 1) + file=$(ls | grep .tar.gz | head -n 1) sha512sum $file > ${file}.sha512 - name: Unzip RC source if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python - run: unzip dist/$(ls dist | grep .zip | head -n 1) + run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1) - name: Rename RC source directory if: steps.is_rc.outputs.is_rc == 1 working-directory: ./sdks/python diff --git a/.github/workflows/dask_runner_tests.yml b/.github/workflows/dask_runner_tests.yml index 423a304db825..35c320086992 100644 --- a/.github/workflows/dask_runner_tests.yml +++ b/.github/workflows/dask_runner_tests.yml @@ -44,12 +44,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Build source working-directory: ./sdks/python - run: python setup.py sdist + run: pip install -U build && python -m build --sdist - name: Rename source file working-directory: ./sdks/python/dist run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz @@ -78,9 +75,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install tox run: pip install tox - name: Install SDK with dask diff --git a/.github/workflows/python_dependency_tests.yml b/.github/workflows/python_dependency_tests.yml index 6fd865bda754..166899df90cb 100644 --- a/.github/workflows/python_dependency_tests.yml +++ b/.github/workflows/python_dependency_tests.yml @@ -7,7 +7,7 @@ on: branches: ['master', 'release-*'] tags: 'v*' # paths where Beam Python's dependencies are configured. - paths: ['sdks/python/setup.py', 'sdks/python/build-requirements.txt', 'sdks/python/container/base_image_requirements_manual.txt'] + paths: ['sdks/python/setup.py', 'sdks/python/pyproject.toml', 'sdks/python/container/base_image_requirements_manual.txt'] # This allows a subsequently queued workflow run to interrupt previous runs concurrency: @@ -38,9 +38,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Install Build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install base_image_requirements.txt working-directory: ./sdks/python run: pip install --no-deps -r container/${{ matrix.params.py_env }}/base_image_requirements.txt diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 57ec895c2431..0309329e84e1 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -78,12 +78,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Build source working-directory: ./sdks/python - run: python setup.py sdist + run: pip install -U build && python -m build --sdist - name: Rename source file working-directory: ./sdks/python/dist run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz @@ -99,7 +96,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [macos-latest, windows-latest] params: [ {"py_ver": "3.8", "tox_env": "py38"}, {"py_ver": "3.9", "tox_env": "py39"}, @@ -113,9 +110,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.params.py_ver }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt --use-pep517 - name: Install tox run: pip install tox - name: Run tests basic unix @@ -148,9 +142,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install requirements working-directory: ./sdks/python run: pip install setuptools --upgrade && pip install -e . @@ -192,9 +183,6 @@ jobs: service_account_key: ${{ secrets.GCP_SA_KEY }} project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - - name: Get build dependencies - working-directory: ./sdks/python - run: pip install -r build-requirements.txt - name: Install requirements working-directory: ./sdks/python run: pip install setuptools --upgrade && pip install -e ".[gcp]" diff --git a/.github/workflows/run_perf_alert_tool.yml b/.github/workflows/run_perf_alert_tool.yml index 1bd8d525c2fb..c61665c1bc7c 100644 --- a/.github/workflows/run_perf_alert_tool.yml +++ b/.github/workflows/run_perf_alert_tool.yml @@ -46,9 +46,6 @@ jobs: with: service_account_key: ${{ secrets.GCP_SA_KEY }} export_default_credentials: true - - name: Get Apache Beam Build dependencies - working-directory: ./sdks/python - run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt - name: Install Apache Beam working-directory: ./sdks/python run: pip install -e .[gcp,test] diff --git a/.github/workflows/typescript_tests.yml b/.github/workflows/typescript_tests.yml index a4aa14c42efd..edbe8399e7d8 100644 --- a/.github/workflows/typescript_tests.yml +++ b/.github/workflows/typescript_tests.yml @@ -89,10 +89,8 @@ jobs: - name: Setup Beam Python working-directory: ./sdks/python run: | - pip install pip setuptools --upgrade - pip install -r build-requirements.txt pip install 'pandas>=1.0,<1.5' - python setup.py develop + pip install -e . - run: npm ci working-directory: ./sdks/typescript - run: npm run build @@ -146,10 +144,7 @@ jobs: - name: Setup Beam Python working-directory: ./sdks/python run: | - pip install pip setuptools --upgrade - pip install -r build-requirements.txt pip install 'pandas>=1.0,<1.5' - python setup.py develop pip install -e ".[gcp]" - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 diff --git a/.test-infra/jenkins/job_PreCommit_Python.groovy b/.test-infra/jenkins/job_PreCommit_Python.groovy index 0e439d788877..9c9740e3c97e 100644 --- a/.test-infra/jenkins/job_PreCommit_Python.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python.groovy @@ -29,6 +29,7 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( '^release/.*$', ], gradleSwitches: [ + '-PuseWheelDistribution', '-Pposargs=\"--ignore=apache_beam/dataframe/ --ignore=apache_beam/examples/ --ignore=apache_beam/runners/ --ignore=apache_beam/transforms/\"' // All these tests are covered by different jobs. ], numBuildsToRetain: 40 diff --git a/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy b/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy index c0cb48cf6231..43a204fd7cfc 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Coverage.groovy @@ -22,6 +22,9 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( scope: this, nameBase: 'Python_Coverage', gradleTask: ':sdks:python:test-suites:tox:py38:preCommitPyCoverage', + gradleSwitches: [ + '-PuseWheelDistribution' + ], timeoutMins: 180, triggerPathPatterns: [ '^model/.*$', diff --git a/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy b/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy index e2914e9bdb8e..dea034f613a5 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Dataframes.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Dataframes', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/dataframe/' + '-Pposargs=apache_beam/dataframe/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy b/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy index f4ef9f51d7fb..3dd7bf6f6f47 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Examples.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Examples', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/examples/' + '-Pposargs=apache_beam/examples/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy b/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy index e80dba6cf5cd..4ae1d283b7a9 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Runners.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Runners', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/runners/' + '-Pposargs=apache_beam/runners/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy b/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy index dd16d48b1731..ccd3f08b78ab 100644 --- a/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_Transforms.groovy @@ -23,7 +23,8 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder( nameBase: 'Python_Transforms', gradleTask: ':pythonPreCommit', gradleSwitches: [ - '-Pposargs=apache_beam/transforms/' + '-Pposargs=apache_beam/transforms/', + '-PuseWheelDistribution' ], timeoutMins: 180, triggerPathPatterns: [ diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index c32717aae725..6fa5ff0ee5f3 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -2998,7 +2998,7 @@ class BeamModulePlugin implements Plugin { executable 'sh' args '-c', ". ${project.ext.envdir}/bin/activate && " + "pip install --pre --retries 10 --upgrade pip && " + - "pip install --pre --retries 10 --upgrade tox -r ${project.rootDir}/sdks/python/build-requirements.txt" + "pip install --pre --retries 10 --upgrade tox" } } // Gradle will delete outputs whenever it thinks they are stale. Putting a @@ -3081,30 +3081,40 @@ class BeamModulePlugin implements Plugin { } return argList.join(' ') } - project.ext.toxTask = { name, tox_env, posargs='' -> project.tasks.register(name) { dependsOn setupVirtualenv dependsOn ':sdks:python:sdist' - - doLast { - // Python source directory is also tox execution workspace, We want - // to isolate them per tox suite to avoid conflict when running - // multiple tox suites in parallel. - project.copy { from project.pythonSdkDeps; into copiedSrcRoot } - - def copiedPyRoot = "${copiedSrcRoot}/sdks/python" - def distTarBall = "${pythonRootDir}/build/apache-beam.tar.gz" - project.exec { - executable 'sh' - args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env $distTarBall '$posargs'" + if (project.hasProperty('useWheelDistribution')) { + def pythonVersionNumber = project.ext.pythonVersion.replace('.', '') + dependsOn ":sdks:python:bdistPy${pythonVersionNumber}linux" + doLast { + project.copy { from project.pythonSdkDeps; into copiedSrcRoot } + def copiedPyRoot = "${copiedSrcRoot}/sdks/python" + def collection = project.fileTree(project.project(':sdks:python').buildDir){ + include "**/apache_beam-*cp${pythonVersionNumber}*manylinux*.whl" + } + String packageFilename = collection.singleFile.toString() + project.exec { + executable 'sh' + args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env ${packageFilename} '$posargs' " + } + } + } else { + // tox task will run in editable mode, which is configured in the tox.ini file. + doLast { + project.copy { from project.pythonSdkDeps; into copiedSrcRoot } + def copiedPyRoot = "${copiedSrcRoot}/sdks/python" + project.exec { + executable 'sh' + args '-c', ". ${project.ext.envdir}/bin/activate && cd ${copiedPyRoot} && scripts/run_tox.sh $tox_env '$posargs'" + } } } inputs.files project.pythonSdkDeps outputs.files project.fileTree(dir: "${pythonRootDir}/target/.tox/${tox_env}/log/") } } - // Run single or a set of integration tests with provided test options and pipeline options. project.ext.enablePythonPerformanceTest = { diff --git a/release/src/main/scripts/build_release_candidate.sh b/release/src/main/scripts/build_release_candidate.sh index 057a38833adc..d0e6310f50aa 100755 --- a/release/src/main/scripts/build_release_candidate.sh +++ b/release/src/main/scripts/build_release_candidate.sh @@ -346,7 +346,7 @@ if [[ $confirmation = "y" ]]; then cd ${BEAM_ROOT_DIR} RELEASE_COMMIT=$(git rev-list -n 1 "tags/${RC_TAG}") # TODO(https://github.com/apache/beam/issues/20209): Don't hardcode py version in this file. - cd sdks/python && pip install -r build-requirements.txt && tox -e py38-docs + cd sdks/python && tox -e py38-docs GENERATED_PYDOC=~/${LOCAL_WEBSITE_UPDATE_DIR}/${LOCAL_PYTHON_DOC}/${BEAM_ROOT_DIR}/sdks/python/target/docs/_build rm -rf ${GENERATED_PYDOC}/.doctrees diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py b/sdks/python/apache_beam/coders/slow_coders_test.py index fe1c707a62e5..7915116a19a3 100644 --- a/sdks/python/apache_beam/coders/slow_coders_test.py +++ b/sdks/python/apache_beam/coders/slow_coders_test.py @@ -25,6 +25,9 @@ from apache_beam.coders.coders_test_common import * +@unittest.skip( + 'Remove non-cython tests.' + 'https://github.com/apache/beam/issues/28307') class SlowCoders(unittest.TestCase): def test_using_slow_impl(self): try: diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md index 19262dead586..cd92d9c127ee 100644 --- a/sdks/python/apache_beam/examples/inference/README.md +++ b/sdks/python/apache_beam/examples/inference/README.md @@ -29,7 +29,6 @@ Some examples are also used in [our benchmarks](http://s.apache.org/beam-communi You must have the latest (possibly unreleased) `apache-beam` or greater installed from the Beam repo in order to run these pipelines, because some examples rely on the latest features that are actively in development. To install Beam, run the following from the `sdks/python` directory: ``` -pip install -r build-requirements.txt pip install -e .[gcp] ``` diff --git a/sdks/python/apache_beam/examples/kafkataxi/README.md b/sdks/python/apache_beam/examples/kafkataxi/README.md index c4e808cad8b4..72a8d8f85c03 100644 --- a/sdks/python/apache_beam/examples/kafkataxi/README.md +++ b/sdks/python/apache_beam/examples/kafkataxi/README.md @@ -157,9 +157,9 @@ Install Beam and dependencies and build a Beam distribution. ```sh cd beam/sdks/python -pip install -r build-requirements.txt pip install -e '.[gcp]' -python setup.py sdist +pip install -q build +python -m build --sdist ``` Run the Beam pipeline. You can either use the default Kafka topic name or specify diff --git a/sdks/python/apache_beam/io/azure/integration_test/Dockerfile b/sdks/python/apache_beam/io/azure/integration_test/Dockerfile index e9ac396b8e17..257fa72cb668 100644 --- a/sdks/python/apache_beam/io/azure/integration_test/Dockerfile +++ b/sdks/python/apache_beam/io/azure/integration_test/Dockerfile @@ -32,7 +32,7 @@ COPY sdks/python /app/sdks/python COPY model /app/model # This step should look like setupVirtualenv minus virtualenv creation. -RUN pip install --no-cache-dir tox -r sdks/python/build-requirements.txt +RUN pip install --no-cache-dir tox # Add Azurite's self-signed cert to the global CA cert store. COPY cert.pem /usr/local/share/ca-certificates/azurite.crt diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index 7e9c1e634748..95b6c2a5fa60 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -49,14 +49,12 @@ from apache_beam.io.gcp.bigquery import TableRowJsonCoder from apache_beam.io.gcp.bigquery import WriteToBigQuery from apache_beam.io.gcp.bigquery import _StreamToBigQuery -from apache_beam.io.gcp.bigquery_file_loads_test import _ELEMENTS from apache_beam.io.gcp.bigquery_read_internal import _JsonToDictCoder from apache_beam.io.gcp.bigquery_read_internal import bigquery_export_destination_uri from apache_beam.io.gcp.bigquery_tools import JSON_COMPLIANCE_ERROR from apache_beam.io.gcp.bigquery_tools import BigQueryWrapper from apache_beam.io.gcp.bigquery_tools import RetryStrategy from apache_beam.io.gcp.internal.clients import bigquery -from apache_beam.io.gcp.internal.clients.bigquery import bigquery_v2_client from apache_beam.io.gcp.pubsub import ReadFromPubSub from apache_beam.io.gcp.tests import utils from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryFullResultMatcher @@ -82,6 +80,7 @@ # pylint: disable=wrong-import-order, wrong-import-position try: + from apache_beam.io.gcp.internal.clients.bigquery import bigquery_v2_client from apitools.base.py.exceptions import HttpError from google.cloud import bigquery as gcp_bigquery from google.api_core import exceptions @@ -93,6 +92,42 @@ _LOGGER = logging.getLogger(__name__) +_ELEMENTS = [ + { + 'name': 'beam', 'language': 'py' + }, + { + 'name': 'beam', 'language': 'java' + }, + { + 'name': 'beam', 'language': 'go' + }, + { + 'name': 'flink', 'language': 'java' + }, + { + 'name': 'flink', 'language': 'scala' + }, + { + 'name': 'spark', 'language': 'scala' + }, + { + 'name': 'spark', 'language': 'py' + }, + { + 'name': 'spark', 'language': 'scala' + }, + { + 'name': 'beam', 'foundation': 'apache' + }, + { + 'name': 'flink', 'foundation': 'apache' + }, + { + 'name': 'spark', 'foundation': 'apache' + }, +] + def _load_or_default(filename): try: diff --git a/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile b/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile index 487d5c3487ab..ab7940563394 100644 --- a/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile +++ b/sdks/python/apache_beam/io/hdfs_integration_test/Dockerfile @@ -30,7 +30,7 @@ COPY sdks/python /app/sdks/python COPY model /app/model # This step should look like setupVirtualenv minus virtualenv creation. -RUN pip install --no-cache-dir tox -r sdks/python/build-requirements.txt +RUN pip install --no-cache-dir tox # Run wordcount, and write results to HDFS. CMD cd sdks/python && tox -e hdfs_integration_test diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py index bad7443d0d94..891726cb2688 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py @@ -20,11 +20,7 @@ import unittest -import mock - -import apache_beam as beam from apache_beam.metrics import MetricsFilter -from apache_beam.testing.test_pipeline import TestPipeline # Protect against environments where Google Cloud Natural Language client # is not available. @@ -60,21 +56,6 @@ def test_document_source(self): self.assertFalse('content' in dict_) self.assertTrue('gcs_content_uri' in dict_) - def test_annotate_test_called(self): - with mock.patch('apache_beam.ml.gcp.naturallanguageml._AnnotateTextFn' - '._get_api_client'): - p = TestPipeline() - features = [ - naturallanguageml.language_v1.AnnotateTextRequest.Features( - extract_syntax=True) - ] - _ = ( - p | beam.Create([naturallanguageml.Document('Hello, world!')]) - | naturallanguageml.AnnotateText(features)) - result = p.run() - result.wait_until_finish() - self.assertCounterEqual(result, 'api_calls', 1) - if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index 99cd26cc4098..ed0dc2d9a0c1 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -765,6 +765,7 @@ def __init__(self, # Try to prepare all the arguments that can just be filled in # without any additional work. in the process function. # Also cache all the placeholders needed in the process function. + input_args = list(input_args) ( self.placeholders_for_process, self.args_for_process, @@ -1437,7 +1438,8 @@ def process(self, windowed_value): return [] def _maybe_sample_exception( - self, exn: BaseException, windowed_value: WindowedValue) -> None: + self, exn: BaseException, + windowed_value: Optional[WindowedValue]) -> None: if self.execution_context is None: return diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py index ace573de0a62..4afe5eaaa370 100644 --- a/sdks/python/apache_beam/runners/portability/stager.py +++ b/sdks/python/apache_beam/runners/portability/stager.py @@ -49,6 +49,7 @@ import glob import hashlib +import importlib.util import logging import os import shutil @@ -771,13 +772,26 @@ def _build_setup_package(setup_file, # type: str try: os.chdir(os.path.dirname(setup_file)) if build_setup_args is None: - build_setup_args = [ - Stager._get_python_executable(), - os.path.basename(setup_file), - 'sdist', - '--dist-dir', - temp_dir - ] + # if build is installed in the user env, use it to + # build the sdist else fallback to legacy setup.py sdist call. + if importlib.util.find_spec('build'): + build_setup_args = [ + Stager._get_python_executable(), + '-m', + 'build', + '--sdist', + '--outdir', + temp_dir, + os.path.dirname(setup_file), + ] + else: + build_setup_args = [ + Stager._get_python_executable(), + os.path.basename(setup_file), + 'sdist', + '--dist-dir', + temp_dir + ] _LOGGER.info('Executing command: %s', build_setup_args) processes.check_output(build_setup_args) output_files = glob.glob(os.path.join(temp_dir, '*.tar.gz')) diff --git a/sdks/python/build-requirements.txt b/sdks/python/build-requirements.txt deleted file mode 100644 index 4fe47079d8d0..000000000000 --- a/sdks/python/build-requirements.txt +++ /dev/null @@ -1,28 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# TODO(https://github.com/apache/beam/issues/20051): Consider PEP-517/PEP-518 instead of this file. - -setuptools -wheel>=0.36.0 -grpcio-tools==1.53.0 -mypy-protobuf==3.5.0 -# Avoid https://github.com/pypa/virtualenv/issues/2006 -distlib==0.3.7 - -# Numpy headers -numpy>=1.14.3,<1.26 diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 762bed268d63..7795e77e3963 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -30,7 +30,8 @@ def buildPython = tasks.register("buildPython") { logger.info('Building Python Dependencies') exec { executable 'sh' - args '-c', ". ${envdir}/bin/activate && python setup.py build --build-base ${buildDir}" + // args '-c', ". ${envdir}/bin/activate && python setup.py build --build-base ${buildDir}" + args '-c', ". ${envdir}/bin/activate && pip install -e ." } } } @@ -46,7 +47,7 @@ def sdist = tasks.register("sdist") { // Build artifact exec { executable 'sh' - args '-c', ". ${envdir}/bin/activate && python setup.py -q sdist --formats zip,gztar --dist-dir ${buildDir}" + args '-c', ". ${envdir}/bin/activate && pip install -U build && python -m build --sdist --outdir=${buildDir}" } def collection = fileTree(buildDir){ include "**/*${project.sdk_version}*.tar.gz" exclude 'srcs/**'} @@ -96,7 +97,6 @@ platform_identifiers_map.each { platform, idsuffix -> exec { environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy --config-settings=setup-args='-Dallow-noblas=true' && pip install --upgrade setuptools" // note: sync cibuildwheel version with GitHub Action // .github/workflow/build_wheel.yml:build_wheels "Install cibuildwheel" step executable 'sh' @@ -110,6 +110,7 @@ platform_identifiers_map.each { platform, idsuffix -> } } + /*************************************************************************************************/ // Non-testing builds and analysis tasks diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 73d83343e033..a49933ee6604 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -45,7 +45,7 @@ RUN \ && \ rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade setuptools && \ + pip install --upgrade pip setuptools wheel && \ # Install required packages for Beam Python SDK and common dependencies used by users. # use --no-deps to ensure the list includes all transitive dependencies. diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index e952b2126604..f2f3ea44b44c 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -43,3 +43,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy;python_version<"3.11" # Optimizes execution of some Beam codepaths. scipy scikit-learn +build>=1.0,<2 # tool to build sdist from setup.py in stager. \ No newline at end of file diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 94d80c8d263b..2b488af0afb5 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -18,7 +18,7 @@ """ Generates Python proto modules and grpc stubs for Beam protos. """ - +import argparse import contextlib import glob import inspect @@ -27,9 +27,7 @@ import platform import re import shutil -import subprocess import sys -import time from collections import defaultdict from importlib import import_module @@ -60,7 +58,7 @@ NO_PROMISES_NOTICE = """ \"\"\" For internal use only; no backwards-compatibility guarantees. -Automatically generated when running setup.py sdist or build[_py]. +Automatically generated when running python -m build. \"\"\" """ @@ -321,43 +319,6 @@ def find_by_ext(root_dir, ext): if file.endswith(ext): yield clean_path(os.path.join(root, file)) - -def ensure_grpcio_exists(): - try: - from grpc_tools import protoc # pylint: disable=unused-import - except ImportError: - return _install_grpcio_tools() - - -def _install_grpcio_tools(): - """ - Though wheels are available for grpcio-tools, setup_requires uses - easy_install which doesn't understand them. This means that it is - compiled from scratch (which is expensive as it compiles the full - protoc compiler). Instead, we attempt to install a wheel in a temporary - directory and add it to the path as needed. - See https://github.com/pypa/setuptools/issues/377 - """ - install_path = os.path.join(PYTHON_SDK_ROOT, '.eggs', 'grpcio-wheels') - logging.warning('Installing grpcio-tools into %s', install_path) - start = time.time() - subprocess.check_call([ - sys.executable, - '-m', - 'pip', - 'install', - '--target', - install_path, - '--upgrade', - '-r', - os.path.join(PYTHON_SDK_ROOT, 'build-requirements.txt') - ]) - logging.warning( - 'Installing grpcio-tools took %0.2f seconds.', time.time() - start) - - return install_path - - def build_relative_import(root_path, import_path, start_file_path): tail_path = import_path.replace('.', os.path.sep) source_path = os.path.join(root_path, tail_path) @@ -511,33 +472,31 @@ def generate_proto_files(force=False): if not os.path.exists(PYTHON_OUTPUT_PATH): os.mkdir(PYTHON_OUTPUT_PATH) - grpcio_install_loc = ensure_grpcio_exists() protoc_gen_mypy = _find_protoc_gen_mypy() - with PythonPath(grpcio_install_loc): - from grpc_tools import protoc - builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto') - args = ( - [sys.executable] + # expecting to be called from command line - ['--proto_path=%s' % builtin_protos] + - ['--proto_path=%s' % d - for d in proto_dirs] + ['--python_out=%s' % PYTHON_OUTPUT_PATH] + - ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + - # new version of mypy-protobuf converts None to zero default value - # and remove Optional from the param type annotation. This causes - # some mypy errors. So to mitigate and fall back to old behavior, - # use `relax_strict_optional_primitives` flag. more at - # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-too-long - ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH - ] + - # TODO(robertwb): Remove the prefix once it's the default. - ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + proto_files) - - LOG.info('Regenerating Python proto definitions (%s).' % regenerate_reason) - ret_code = protoc.main(args) - if ret_code: - raise RuntimeError( - 'Protoc returned non-zero status (see logs for details): ' - '%s' % ret_code) + from grpc_tools import protoc + builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto') + args = ( + [sys.executable] + # expecting to be called from command line + ['--proto_path=%s' % builtin_protos] + + ['--proto_path=%s' % d + for d in proto_dirs] + ['--python_out=%s' % PYTHON_OUTPUT_PATH] + + ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + + # new version of mypy-protobuf converts None to zero default value + # and remove Optional from the param type annotation. This causes + # some mypy errors. So to mitigate and fall back to old behavior, + # use `relax_strict_optional_primitives` flag. more at + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-too-long + ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH + ] + + # TODO(robertwb): Remove the prefix once it's the default. + ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + proto_files) + + LOG.info('Regenerating Python proto definitions (%s).' % regenerate_reason) + ret_code = protoc.main(args) + if ret_code: + raise RuntimeError( + 'Protoc returned non-zero status (see logs for details): ' + '%s' % ret_code) # copy resource files for path in MODEL_RESOURCES: @@ -548,7 +507,7 @@ def generate_proto_files(force=False): # force relative import paths for proto files compiled_import_re = re.compile('^from (.*) import (.*)$') for file_path in find_by_ext(PYTHON_OUTPUT_PATH, - ('_pb2.py', '_pb2_grpc.py', '_pb2.pyi')): + ('_pb2.py', '_pb2_grpc.py', '_pb2.pyi')): proto_packages.add(os.path.dirname(file_path)) lines = [] with open(file_path, encoding='utf-8') as f: @@ -566,12 +525,14 @@ def generate_proto_files(force=False): f.writelines(lines) generate_init_files_lite(PYTHON_OUTPUT_PATH) - with PythonPath(grpcio_install_loc): - for proto_package in proto_packages: - generate_urn_files(proto_package, PYTHON_OUTPUT_PATH) + for proto_package in proto_packages: + generate_urn_files(proto_package, PYTHON_OUTPUT_PATH) generate_init_files_full(PYTHON_OUTPUT_PATH) if __name__ == '__main__': - generate_proto_files(force=True) + parser = argparse.ArgumentParser() + parser.add_argument('--no-force', dest='force', action='store_false') + args = parser.parse_args() + generate_proto_files(force=args.force) diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml new file mode 100644 index 000000000000..d185c45f6191 --- /dev/null +++ b/sdks/python/pyproject.toml @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# since we rely on setuptools and according to https://peps.python.org/pep-0518/#build-system-table +# this is the minimum requirements for the build system to execute. +[build-system] +requires = [ + "setuptools", + "wheel>=0.36.0", + "grpcio-tools==1.53.0", + "mypy-protobuf==3.5.0", + # Avoid https://github.com/pypa/virtualenv/issues/2006 + "distlib==0.3.7", + # Numpy headers + "numpy>=1.14.3,<1.25", # Update setup.py as well. + # having cython here will create wheels that are platform dependent. + "cython==0.29.36", +] + + +# legacy installation is needed to generate `apache_beam.portability.api` package. +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/sdks/python/scripts/run_pytest.sh b/sdks/python/scripts/run_pytest.sh index 01f2318164c4..ad35b48972b6 100755 --- a/sdks/python/scripts/run_pytest.sh +++ b/sdks/python/scripts/run_pytest.sh @@ -42,10 +42,10 @@ echo "posargs: $posargs" # Run with pytest-xdist and without. pytest -o junit_suite_name=${envname} \ - --junitxml=pytest_${envname}.xml -m 'not no_xdist' -n 6 ${pytest_args} --pyargs ${posargs} + --junitxml=pytest_${envname}.xml -m 'not no_xdist' -n 6 --import-mode=importlib ${pytest_args} --pyargs ${posargs} status1=$? pytest -o junit_suite_name=${envname}_no_xdist \ - --junitxml=pytest_${envname}_no_xdist.xml -m 'no_xdist' ${pytest_args} --pyargs ${posargs} + --junitxml=pytest_${envname}_no_xdist.xml -m 'no_xdist' --import-mode=importlib ${pytest_args} --pyargs ${posargs} status2=$? # Exit with error if no tests were run in either suite (status code 5). diff --git a/sdks/python/scripts/run_tox.sh b/sdks/python/scripts/run_tox.sh index ebbacf5494ea..ac60f26b32ba 100755 --- a/sdks/python/scripts/run_tox.sh +++ b/sdks/python/scripts/run_tox.sh @@ -53,12 +53,21 @@ if [[ "$JENKINS_HOME" != "" ]]; then export PY_COLORS=1 fi -if [[ ! -z $2 ]]; then +# Determine if the second argument is SDK_LOCATION or posargs +if [[ -f "$1" ]]; then # Check if the argument corresponds to a file SDK_LOCATION="$1" - shift; - tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" -- "$@" -else - tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" + shift +fi + +# If SDK_LOCATION is identified and there are still arguments left, those are posargs. +if [[ ! -z "$SDK_LOCATION" ]]; then + if [[ $# -gt 0 ]]; then # There are posargs + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" -- "$@" + else + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" --installpkg "$SDK_LOCATION" + fi +else # No SDK_LOCATION; all arguments are posargs + tox -c tox.ini run --recreate -e "$TOX_ENVIRONMENT" -- "$@" fi exit_code=$? diff --git a/sdks/python/setup.py b/sdks/python/setup.py index ca585ccf7167..4a05544526fc 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -18,6 +18,7 @@ """Apache Beam SDK for Python setup file.""" import os +import subprocess import sys import warnings # Pylint and isort disagree here. @@ -62,7 +63,6 @@ def get_project_path(self): return os.path.join(project_path, to_filename(ei_cmd.egg_name)) def run(self): - import subprocess args = ['mypy', self.get_project_path()] result = subprocess.call(args) if result != 0: @@ -155,12 +155,18 @@ def cythonize(*args, **kwargs): # We must generate protos after setup_requires are installed. def generate_protos_first(): try: - # pylint: disable=wrong-import-position - import gen_protos - gen_protos.generate_proto_files() - - except ImportError: - warnings.warn("Could not import gen_protos, skipping proto generation.") + # Pyproject toml build happens in isolated environemnts. In those envs, + # gen_protos is unable to get imported. so we run a subprocess call. + cwd = os.path.abspath(os.path.dirname(__file__)) + out = subprocess.run([ + sys.executable, + os.path.join(cwd, 'gen_protos.py'), + '--no-force' + ], capture_output=True, check=True) + print(out.stdout) + except subprocess.CalledProcessError as err: + raise RuntimeError('Could not generate protos due to error: %s', + err.stderr) def get_portability_package_data(): @@ -188,6 +194,27 @@ def get_portability_package_data(): # structure must exist before the call to setuptools.find_packages() # executes below. generate_protos_first() + + # generate cythonize extensions only if we are building a wheel or + # building an extension or running in editable mode. + cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel') + if any(cmd in sys.argv for cmd in cythonize_cmds): + extensions = cythonize([ + 'apache_beam/**/*.pyx', + 'apache_beam/coders/coder_impl.py', + 'apache_beam/metrics/cells.py', + 'apache_beam/metrics/execution.py', + 'apache_beam/runners/common.py', + 'apache_beam/runners/worker/logger.py', + 'apache_beam/runners/worker/opcounters.py', + 'apache_beam/runners/worker/operations.py', + 'apache_beam/transforms/cy_combiners.py', + 'apache_beam/transforms/stats.py', + 'apache_beam/utils/counters.py', + 'apache_beam/utils/windowed_value.py', + ]) + else: + extensions = [] # Keep all dependencies inlined in the setup call, otherwise Dependabot won't # be able to parse it. setuptools.setup( @@ -213,21 +240,7 @@ def get_portability_package_data(): *get_portability_package_data() ] }, - ext_modules=cythonize([ - 'apache_beam/**/*.pyx', - 'apache_beam/coders/coder_impl.py', - 'apache_beam/metrics/cells.py', - 'apache_beam/metrics/execution.py', - 'apache_beam/runners/common.py', - 'apache_beam/runners/worker/logger.py', - 'apache_beam/runners/worker/opcounters.py', - 'apache_beam/runners/worker/operations.py', - 'apache_beam/transforms/cy_combiners.py', - 'apache_beam/transforms/stats.py', - 'apache_beam/utils/counters.py', - 'apache_beam/utils/windowed_value.py', - ], - language_level=3), + ext_modules=extensions, install_requires=[ 'crcmod>=1.7,<2.0', 'orjson>=3.9.7,<4', @@ -250,7 +263,7 @@ def get_portability_package_data(): 'js2py>=0.74,<1', # numpy can have breaking changes in minor versions. # Use a strict upper bound. - 'numpy>=1.14.3,<1.25.0', # Update build-requirements.txt as well. + 'numpy>=1.14.3,<1.25.0', # Update pyproject.toml as well. 'objsize>=0.6.1,<0.7.0', 'packaging>=22.0', 'pymongo>=3.8.0,<5.0.0', diff --git a/sdks/python/test-suites/tox/common.gradle b/sdks/python/test-suites/tox/common.gradle index ee183dff4064..3fdd0c0c553b 100644 --- a/sdks/python/test-suites/tox/common.gradle +++ b/sdks/python/test-suites/tox/common.gradle @@ -29,18 +29,12 @@ test.dependsOn "testPy${pythonVersionSuffix}Cloud" // toxTask "testPy${pythonVersionSuffix}Dask", "py${pythonVersionSuffix}-dask", "${posargs}" // test.dependsOn "testPy${pythonVersionSuffix}Dask" -toxTask "testPy${pythonVersionSuffix}Cython", "py${pythonVersionSuffix}-cython", "${posargs}" -test.dependsOn "testPy${pythonVersionSuffix}Cython" toxTask "testPy38CloudCoverage", "py38-cloudcoverage", "${posargs}" test.dependsOn "testPy38CloudCoverage" project.tasks.register("preCommitPy${pythonVersionSuffix}") { - // Since codecoverage reports will always be generated for py38, - // all tests will be exercised. - if (pythonVersionSuffix.equals('38')) { - dependsOn = ["testPy38Cython"] - } else { - dependsOn = ["testPy${pythonVersionSuffix}Cloud", "testPy${pythonVersionSuffix}Cython"] - } + // Since codecoverage reports will always be generated for py38, + // all tests will be exercised. + dependsOn = ["testPy${pythonVersionSuffix}Cloud", "testPython${pythonVersionSuffix}"] } \ No newline at end of file diff --git a/sdks/python/test-suites/tox/py310/build.gradle b/sdks/python/test-suites/tox/py310/build.gradle index ea10fde831c6..f1e40a17951f 100644 --- a/sdks/python/test-suites/tox/py310/build.gradle +++ b/sdks/python/test-suites/tox/py310/build.gradle @@ -28,5 +28,3 @@ pythonVersion = '3.10' apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy310Cython.mustRunAfter testPython310, testPy310Cloud diff --git a/sdks/python/test-suites/tox/py311/build.gradle b/sdks/python/test-suites/tox/py311/build.gradle index 1bb3766500bb..fabf9fd4365a 100644 --- a/sdks/python/test-suites/tox/py311/build.gradle +++ b/sdks/python/test-suites/tox/py311/build.gradle @@ -28,5 +28,3 @@ pythonVersion = '3.11' apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy311Cython.mustRunAfter testPython311, testPy311Cloud diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index bc4aa99c79b4..b1ed5f88c7c9 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -43,8 +43,6 @@ lint.dependsOn mypyPy38 apply from: "../common.gradle" -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy38Cython.mustRunAfter testPython38, testPy38CloudCoverage // PyCoverage Precommit runs test suites that evaluate test coverage and compatibility of diff --git a/sdks/python/test-suites/tox/py39/build.gradle b/sdks/python/test-suites/tox/py39/build.gradle index 380cc1486daa..5bb73b60a5d2 100644 --- a/sdks/python/test-suites/tox/py39/build.gradle +++ b/sdks/python/test-suites/tox/py39/build.gradle @@ -27,6 +27,3 @@ applyPythonNature() pythonVersion = '3.9' apply from: "../common.gradle" - -// TODO(https://github.com/apache/beam/issues/20051): Remove this once tox uses isolated builds. -testPy39Cython.mustRunAfter testPython39, testPy39Cloud diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 1caf25caf080..1e797d96074f 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -17,7 +17,7 @@ [tox] # new environments will be excluded by default unless explicitly added to envlist. -envlist = py38,py39,py310,py311,py38-{cloud,cython,docs,lint,mypy,cloudcoverage,dask},py39-{cloud,cython},py310-{cloud,cython,dask},py311-{cloud,cython,dask},whitespacelint +envlist = py38,py39,py310,py311,py38-{cloud,docs,lint,mypy,cloudcoverage,dask},py39-{cloud},py310-{cloud,dask},py311-{cloud,dask},whitespacelint toxworkdir = {toxinidir}/target/{env:ENV_NAME:.tox} [pycodestyle] @@ -44,9 +44,6 @@ allowlist_externals = curl ./codecov chmod -deps = - cython: cython==0.29.33 - -r build-requirements.txt setenv = RUN_SKIPPED_PY3_TESTS=0 # Use an isolated tmp dir for tests that get slowed down by scanning /tmp. @@ -67,6 +64,7 @@ commands_pre = bash {toxinidir}/scripts/run_tox_cleanup.sh commands_post = bash {toxinidir}/scripts/run_tox_cleanup.sh + commands = false {envname} is misconfigured [testenv:py{38,39,310,311}] @@ -81,28 +79,18 @@ commands = install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze -[testenv:py{38,39,310,311}-cython] -# cython tests are only expected to work in linux (2.x and 3.x) -# If we want to add other platforms in the future, it should be: -# `platform = linux2|darwin|...` -# See https://docs.python.org/2/library/sys.html#sys.platform for platform codes -platform = linux -commands = - # TODO(https://github.com/apache/beam/issues/20051): Remove this build_ext invocation once local source no longer - # shadows the installed apache_beam. - python setup.py build_ext --inplace - python apache_beam/examples/complete/autocomplete_test.py - bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" - [testenv:py{38,39,310,311}-cloud] +; extras = test,gcp,interactive,dataframe,aws,azure extras = test,gcp,interactive,dataframe,aws,azure commands = + python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{38,39,310,311}-dask] extras = test,dask commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" + [testenv:py38-cloudcoverage] deps = pytest-cov==3.0.0 @@ -124,7 +112,6 @@ commands = setenv = # keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml deps = - -r build-requirements.txt astroid<2.17.0,>=2.15.6 pycodestyle==2.8.0 pylint==2.17.5 @@ -143,7 +130,6 @@ commands = [testenv:py38-mypy] deps = - -r build-requirements.txt mypy==0.790 dask==2022.01.0 distributed==2022.01.0 @@ -173,7 +159,6 @@ commands = # Used by hdfs_integration_test.sh. Do not run this directly, as it depends on # nodes defined in hdfs_integration_test/docker-compose.yml. deps = - -r build-requirements.txt holdup==1.8.0 extras = gcp @@ -206,7 +191,6 @@ commands_pre = # Do not run this directly, as it depends on nodes defined in # azure/integration_test/docker-compose.yml. deps = - -r build-requirements.txt extras = azure passenv = REQUESTS_CA_BUNDLE @@ -335,7 +319,6 @@ commands = [testenv:py{38,39,310,311}-pytorch-{19,110,111,112,113}] deps = - -r build-requirements.txt 19: torch>=1.9.0,<1.10.0 110: torch>=1.10.0,<1.11.0 111: torch>=1.11.0,<1.12.0 @@ -353,7 +336,6 @@ commands = [testenv:py{38,39,310}-pytorch-200] deps = - -r build-requirements.txt 200: torch>=2.0.0,<2.1.0 extras = test,gcp # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. @@ -387,7 +369,6 @@ commands = [testenv:py{38,39,310}-tensorflow-212] deps = - -r build-requirements.txt 212: tensorflow>=2.12rc1,<2.13 extras = test,gcp commands = @@ -399,7 +380,6 @@ commands = [testenv:py{38,39,310}-xgboost-{160,170}] deps = - -r build-requirements.txt 160: xgboost>=1.6.0,<1.7.0 datatable==1.0.0 @@ -416,7 +396,6 @@ commands = [testenv:py{38,39,310,311}-transformers-{428,429,430}] deps = - -r build-requirements.txt 428: transformers>=4.28.0,<4.29.0 429: transformers>=4.29.0,<4.30.0 430: transformers>=4.30.0,<4.31.0 @@ -434,7 +413,6 @@ commands = [testenv:py{38,311}-vertex-ai] deps = - -r build-requirements.txt tensorflow==2.12.0 extras = test,gcp commands = diff --git a/website/www/site/content/en/documentation/ml/multi-language-inference.md b/website/www/site/content/en/documentation/ml/multi-language-inference.md index 0d7a972e0765..1480b37ab484 100644 --- a/website/www/site/content/en/documentation/ml/multi-language-inference.md +++ b/website/www/site/content/en/documentation/ml/multi-language-inference.md @@ -99,7 +99,7 @@ Finally, we postprocess the model predictions in the `Postprocess` DoFn. The `Po The custom Python code needs to be written in a local package and be compiled as a tarball. This package can then be used by the Java pipeline. The following example shows how to compile the Python package into a tarball: ```bash - python setup.py sdist + pip install --upgrade build && python -m build --sdist ``` In order to run this, a `setup.py` is required. The path to the tarball will be used as an argument in the pipeline options of the Java pipeline. diff --git a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md index 378032ab6b58..c99c0b9c7cf8 100644 --- a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md @@ -66,16 +66,17 @@ If your pipeline uses packages that are not available publicly (e.g. packages th This command lists all packages that are installed on your machine, regardless of where they were installed from. -2. Run your pipeline with the following command-line option: + 1. Run your pipeline with the following command-line option: - --extra_package /path/to/package/package-name + --extra_package /path/to/package/package-name - where package-name is the package's tarball. If you have the `setup.py` for that - package then you can build the tarball with the following command: + where package-name is the package's tarball. You can build the package tarball using a command line tool called [build](https://setuptools.pypa.io/en/latest/userguide/quickstart.html#install-build). - python setup.py sdist + # Install build using pip + pip install --upgrade build + python -m build --sdist - See the [sdist documentation](https://docs.python.org/3/distutils/sourcedist.html) for more details on this command. + See the [build documentation](https://pypa-build.readthedocs.io/en/latest/index.html) for more details on this command. ## Multiple File Dependencies