diff --git a/.gitmodules b/.gitmodules index 1a6ef12df7..40fdd49a0f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "submodules/lit-llama"] path = submodules/lit-llama url = https://github.com/Lightning-AI/lit-llama.git +[submodule "submodules/FBGEMM"] + path = submodules/FBGEMM + url = https://github.com/pytorch/FBGEMM.git diff --git a/docker/build-torchbench-nightly-docker.sh b/docker/build-torchbench-nightly-docker.sh index 5194affc4c..8f53442dd9 100644 --- a/docker/build-torchbench-nightly-docker.sh +++ b/docker/build-torchbench-nightly-docker.sh @@ -1 +1,4 @@ -docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest +TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} + +docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest \ + --build-arg TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH} diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 2d67106417..98f012def7 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -28,9 +28,9 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem # Use the CUDA installation scripts from pytorch/builder +# Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_118; prune_118' -RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_121; prune_121' +RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_124; prune_124' # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile index c45f33a2e3..f6f574af8e 100644 --- a/docker/torchbench-nightly.dockerfile +++ b/docker/torchbench-nightly.dockerfile @@ -8,10 +8,11 @@ ENV SETUP_SCRIPT=/workspace/setup_instance.sh ARG TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} ARG FORCE_DATE=${FORCE_DATE} -# Setup Conda env and CUDA -RUN git clone -b "${TORCHBENCH_BRANCH}" --single-branch \ - https://github.com/pytorch/benchmark /workspace/benchmark +# Checkout Torchbench and submodules +RUN git clone --recurse-submodules -b "${TORCHBENCH_BRANCH}" --single-branch \ + https://github.com/pytorch/benchmark /workspace/benchmark +# Setup conda env and CUDA RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python ./utils/python_utils.py --create-conda-env ${CONDA_ENV} && \ @@ -45,6 +46,12 @@ RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python utils/cuda_utils.py --install-torchbench-deps +# Install FBGEMM GENAI +RUN cd /workspace/benchmark && \ + . ${SETUP_SCRIPT} && \ + python install.py --userbenchmark triton --fbgemm + +# Install Torchbench models RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python install.py diff --git a/install.py b/install.py index 7486c22209..ea825e6f7c 100644 --- a/install.py +++ b/install.py @@ -46,7 +46,7 @@ def pip_install_requirements(requirements_txt="requirements.txt"): choices=list_userbenchmarks(), help="Install requirements for optional components.", ) - args = parser.parse_args() + args, extra_args = parser.parse_known_args() os.chdir(os.path.realpath(os.path.dirname(__file__))) @@ -68,9 +68,11 @@ def pip_install_requirements(requirements_txt="requirements.txt"): if args.userbenchmark: # Install userbenchmark dependencies if exists userbenchmark_dir = REPO_ROOT.joinpath("userbenchmark", args.userbenchmark) + cmd = [sys.executable, "install.py"] + cmd.extend(extra_args) if userbenchmark_dir.joinpath("install.py").is_file(): subprocess.check_call( - [sys.executable, "install.py"], cwd=userbenchmark_dir.absolute() + cmd, cwd=userbenchmark_dir.absolute() ) sys.exit(0) diff --git a/submodules/FBGEMM b/submodules/FBGEMM new file mode 160000 index 0000000000..182f3a3d09 --- /dev/null +++ b/submodules/FBGEMM @@ -0,0 +1 @@ +Subproject commit 182f3a3d09434da305186325ab7fe5e691ba45b1 diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py new file mode 100644 index 0000000000..d5faff068c --- /dev/null +++ b/userbenchmark/triton/install.py @@ -0,0 +1,27 @@ +import argparse +import subprocess +import sys +import os +from pathlib import Path + +REPO_PATH = Path(os.path.abspath(__file__)).parent.parent.parent +FBGEMM_PATH = REPO_PATH.joinpath("submodules", "FBGEMM", "fbgemm_gpu") + +def install_fbgemm(): + cmd = ["pip", "install", "-r", "requirements.txt"] + subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) + # Build target A100(8.0) or H100(9.0) + cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai", "-DTORCH_CUDA_ARCH_LIST=8.0;9.0"] + subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) + +def test_fbgemm(): + cmd = [sys.executable, "-c", '"import fbgemm_gpu.experimental.gen_ai"'] + subprocess.check_call(cmd) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fbgemm", action="store_true", help="Install FBGEMM GPU") + args = parser.parse_args() + if args.fbgemm: + install_fbgemm() + test_fbgemm() diff --git a/utils/cuda_utils.py b/utils/cuda_utils.py index 798f15dcde..80f677af1e 100644 --- a/utils/cuda_utils.py +++ b/utils/cuda_utils.py @@ -8,16 +8,12 @@ from typing import Optional # defines the default CUDA version to compile against -DEFAULT_CUDA_VERSION = "12.1" +DEFAULT_CUDA_VERSION = "12.4" CUDA_VERSION_MAP = { - "11.8": { - "pytorch_url": "cu118", - "magma_version": "magma-cuda118", - }, - "12.1": { - "pytorch_url": "cu121", - "magma_version": "magma-cuda121", + "12.4": { + "pytorch_url": "cu124", + "magma_version": "magma-cuda124", }, } PIN_CMAKE_VERSION = "3.22.*"