From 75764072f9c88de497b793bd40749330bd7d86cf Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Thu, 23 May 2024 23:12:43 +0000 Subject: [PATCH] merge the test script --- .github/workflows/_test.yml | 21 ++++ .../workflows/_test_requiring_torch_cuda.yml | 118 ------------------ .github/workflows/build_and_test.yml | 6 +- 3 files changed, 25 insertions(+), 120 deletions(-) delete mode 100644 .github/workflows/_test_requiring_torch_cuda.yml diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml index 8a454cc075bc..59a86db74d99 100644 --- a/.github/workflows/_test.yml +++ b/.github/workflows/_test.yml @@ -28,6 +28,11 @@ on: type: boolean default: false description: Whether to install CUDA plugin package + run-tests-requiring-torch-cuda: + required: false + type: boolean + default: false + description: Whether to run tests that requires torch with CUDA enabled secrets: gcloud-service-key: @@ -87,6 +92,14 @@ jobs: with: name: torch-xla-wheels path: /tmp/wheels/ + # The step below will overwrite the torch wheel + # if run-tests-requiring-torch-cuda is true. + - name: Fetch torch CUDA wheel + uses: actions/download-artifact@v4 + with: + name: torch-with-cuda-xla-with-cuda-wheels + path: /tmp/wheels/ + if: ${{ inputs.run-tests-requiring-torch-cuda }} - name: Fetch CPP test binaries uses: actions/download-artifact@v4 with: @@ -159,6 +172,14 @@ jobs: - name: Test shell: bash run: pytorch/xla/.github/scripts/run_tests.sh pytorch/ pytorch/xla/ $USE_COVERAGE + if: ! ${{ inputs.run-tests-requiring-torch-cuda }} + - name: Test that requires torch with CUDA enabled + shell: bash + run: | + set -xue + PJRT_DEVICE=CUDA python pytorch/xla/test/test_operations.py -v + PJRT_DEVICE=CUDA python pytorch/xla/test/dynamo/test_dynamo.py -v + if: ${{ inputs.run-tests-requiring-torch-cuda }} - name: Upload coverage results if: ${{ inputs.collect-coverage }} shell: bash diff --git a/.github/workflows/_test_requiring_torch_cuda.yml b/.github/workflows/_test_requiring_torch_cuda.yml deleted file mode 100644 index 40f1205c7ba6..000000000000 --- a/.github/workflows/_test_requiring_torch_cuda.yml +++ /dev/null @@ -1,118 +0,0 @@ -name: xla-test-requiring-torch-cuda -on: - workflow_call: - inputs: - dev-image: - required: true - type: string - description: Base image for builds - runner: - required: false - type: string - description: Runner type for the test - default: linux.12xlarge - collect-coverage: - required: false - type: boolean - description: Set to true to collect coverage information - default: false - timeout-minutes: - required: false - type: number - default: 270 - description: | - Set the maximum (in minutes) how long the workflow should take to finish - timeout-minutes: - - secrets: - gcloud-service-key: - required: true - description: Secret to access Bazel build cache -jobs: - test: - runs-on: ${{ inputs.runner }} - container: - image: ${{ inputs.dev-image }} - options: "--gpus all --shm-size 16g" - timeout-minutes: ${{ inputs.timeout-minutes }} - env: - GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json - USE_COVERAGE: ${{ inputs.collect-coverage && '1' || '0' }} - BAZEL_JOBS: 16 - BAZEL_REMOTE_CACHE: 1 - steps: - # See https://github.com/actions/checkout/issues/1014#issuecomment-1906802802 - - name: Clean up workspace - run: | - ls -la - rm -rvf ${GITHUB_WORKSPACE}/* - - name: Setup gcloud - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS - - name: Fetch torch/torch_xla/torchvision wheels with CUDA enabled - uses: actions/download-artifact@v4 - with: - name: torch-with-cuda-xla-with-cuda-wheels - path: /tmp/wheels/ - pattern: torch-*.whl - - name: Fetch CUDA plugin - uses: actions/download-artifact@v4 - with: - name: cuda-plugin - path: /tmp/wheels/ - - name: Setup CUDA environment - shell: bash - run: | - # TODO: Make PJRT_DEVICE=CPU work with XLA_REGISTER_INSTALLED_PLUGINS=1 - echo "XLA_REGISTER_INSTALLED_PLUGINS=1" >> $GITHUB_ENV - - echo "PATH=$PATH:/usr/local/cuda-12.1/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64" >> $GITHUB_ENV - - name: Check GPU - run: nvidia-smi - - name: Install wheels - shell: bash - run: | - pip install /tmp/wheels/*.whl - # TODO: Add these in setup.py - pip install fsspec - pip install rich - - echo "Import check..." - python -c "import torch, torch_xla, torchvision" - echo "Import check done." - echo "Check if CUDA is available for PyTorch..." - python -c "import torch; assert torch.cuda.is_available()" - echo "CUDA is available for PyTorch." - - name: Record PyTorch commit - run: | - # Don't just pipe output in shell because imports may do extra logging - python -c " - import torch_xla.version - with open('$GITHUB_ENV', 'a') as f: - f.write(f'PYTORCH_COMMIT={torch_xla.version.__torch_gitrev__}\n') - " - - name: Checkout PyTorch Repo - uses: actions/checkout@v4 - with: - repository: pytorch/pytorch - path: pytorch - ref: ${{ env.PYTORCH_COMMIT }} - - name: Checkout PyTorch/XLA Repo - uses: actions/checkout@v4 - with: - path: pytorch/xla - - name: Extra CI deps - shell: bash - run: | - set -x - - pip install expecttest unittest-xml-reporting - - name: Test - shell: bash - run: | - set -xue - PJRT_DEVICE=CUDA python pytorch/xla/test/test_operations.py -v - PJRT_DEVICE=CUDA python pytorch/xla/test/dynamo/test_dynamo.py -v diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 71b86985911e..f0c0545f3602 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -71,13 +71,15 @@ jobs: test-cuda-with-pytorch-cuda-enabled: name: "GPU tests with PyTorch CUDA enabled" - uses: ./.github/workflows/_test_requiring_torch_cuda.yml - needs: [build-torch-with-cuda-xla-with-cuda, build-cuda-plugin] + uses: ./.github/workflows/_test.yml + needs: [build-torch-with-cuda-xla-with-cuda, build-torch-xla, build-cuda-plugin] with: dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 runner: linux.8xlarge.nvidia.gpu timeout-minutes: 300 collect-coverage: false + install-cuda-plugin: true + run-tests-requiring-torch-cuda: true secrets: gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}