Skip to content

Commit

Permalink
Create a separate CI tab for triton tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bhavya01 committed Apr 16, 2024
1 parent ade444d commit cb0bb85
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 14 deletions.
24 changes: 21 additions & 3 deletions .circleci/triton.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,34 @@

set -ex

source ./xla_env
source .circleci/common.sh

PYTORCH_DIR=/tmp/pytorch
XLA_DIR=$PYTORCH_DIR/xla
clone_pytorch $PYTORCH_DIR $XLA_DIR

# Use bazel cache
USE_CACHE=1

pushd $PYTORCH_DIR
export TORCH_CUDA_ARCH_LIST="8.6"
checkout_torch_pin_if_available

if ! install_deps_pytorch_xla $XLA_DIR $USE_CACHE; then
exit 1
fi

apply_patches

python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"

export PATH=$PATH:/usr/local/cuda-12.1/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.1/lib64
export USE_CUDA=1
export TORCH_CUDA_ARCH_LIST='8.6'
python setup.py install

XLA_DIR=$PYTORCH_DIR/xla
export TF_CUDA_COMPUTE_CAPABILITIES="compute_86"
export XLA_CUDA=1
build_torch_xla $XLA_DIR

export GCLOUD_SERVICE_KEY_FILE="$XLA_DIR/default_credentials.json"
Expand Down
11 changes: 0 additions & 11 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,6 @@ jobs:
collect-coverage: false # TODO(yeounoh) separate from CPU coverage metrics
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-triton:
name: "Triton tests"
uses: ./.github/workflows/_triton.yml
needs: build
with:
docker-image: ${{ needs.build.outputs.docker-image }}
runner: linux.g5.4xlarge.nvidia.gpu
timeout-minutes: 300
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-tpu:
name: "TPU tests"
Expand Down
57 changes: 57 additions & 0 deletions .github/workflows/triton.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
on:
pull_request:
branches:
- master
- r[0-9]+.[0-9]+
paths:
- 'torch_xla/experimental/torch_triton.py'
- 'torch_xla/csrc/triton/**'
push:
branches:
- master
- r[0-9]+.[0-9]+
paths:
- 'torch_xla/experimental/torch_triton.py'
- 'torch_xla/csrc/triton/**'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true

jobs:
test-triton:
runs-on: linux.g5.4xlarge.nvidia.gpu
timeout-minutes: 300
env:
DOCKER_IMAGE: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1
WORKDIR: /triton_dir
steps:
- name: Setup Linux
uses: pytorch/test-infra/.github/actions/setup-linux@main
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
Tests are done inside the container, to start an interactive session run:
docker exec -it $(docker container ps --format '{{.ID}}') bash
- name: Checkout repo
uses: actions/checkout@v3
- name: Download docker image from GCR
shell: bash
run: docker pull "${DOCKER_IMAGE}"
- name: Start the container
shell: bash
run: |
pid=$(docker run --privileged --shm-size=16g --net=host --gpus all -it -d -w "${WORKDIR}" "${DOCKER_IMAGE}")
docker exec -u jenkins "${pid}" sudo chown -R jenkins "${WORKDIR}"
docker cp "${GITHUB_WORKSPACE}/." "$pid:$WORKDIR"
echo "pid=${pid}" >> "${GITHUB_ENV}"
- name: Build and Test
shell: bash
run: |
docker exec --privileged -u jenkins "${pid}" bash -c ".circleci/triton.sh"
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()

0 comments on commit cb0bb85

Please sign in to comment.