From cfa3f7cd259ec204d1ee653ab1fad7083be1a791 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 11 Jun 2024 19:32:51 -0400 Subject: [PATCH 1/7] Add FBGEMM submodule --- .gitmodules | 3 +++ submodules/FBGEMM | 1 + 2 files changed, 4 insertions(+) create mode 160000 submodules/FBGEMM diff --git a/.gitmodules b/.gitmodules index 1a6ef12df7..40fdd49a0f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "submodules/lit-llama"] path = submodules/lit-llama url = https://github.com/Lightning-AI/lit-llama.git +[submodule "submodules/FBGEMM"] + path = submodules/FBGEMM + url = https://github.com/pytorch/FBGEMM.git diff --git a/submodules/FBGEMM b/submodules/FBGEMM new file mode 160000 index 0000000000..182f3a3d09 --- /dev/null +++ b/submodules/FBGEMM @@ -0,0 +1 @@ +Subproject commit 182f3a3d09434da305186325ab7fe5e691ba45b1 From 89f5e4094a4becfd68beb19d703f2186baf5d2b6 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 11 Jun 2024 23:27:56 -0400 Subject: [PATCH 2/7] Add fbgemm genai to the nightly docker --- docker/gcp-a100-runner-dind.dockerfile | 4 ++-- docker/torchbench-nightly.dockerfile | 5 +++++ install.py | 6 ++++-- userbenchmark/triton/install.py | 22 ++++++++++++++++++++++ 4 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 userbenchmark/triton/install.py diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 2d67106417..98f012def7 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -28,9 +28,9 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem # Use the CUDA installation scripts from pytorch/builder +# Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_118; prune_118' -RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_121; prune_121' +RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_124; prune_124' # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile index c45f33a2e3..fe0e744484 100644 --- a/docker/torchbench-nightly.dockerfile +++ b/docker/torchbench-nightly.dockerfile @@ -48,3 +48,8 @@ RUN cd /workspace/benchmark && \ RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python install.py + +# Install FBGEMM GENAI +RUN cd /workspace/benchmark && \ + . ${SETUP_SCRIPT} && \ + python install.py --userbenchmark triton --fbgemm diff --git a/install.py b/install.py index 7486c22209..ea825e6f7c 100644 --- a/install.py +++ b/install.py @@ -46,7 +46,7 @@ def pip_install_requirements(requirements_txt="requirements.txt"): choices=list_userbenchmarks(), help="Install requirements for optional components.", ) - args = parser.parse_args() + args, extra_args = parser.parse_known_args() os.chdir(os.path.realpath(os.path.dirname(__file__))) @@ -68,9 +68,11 @@ def pip_install_requirements(requirements_txt="requirements.txt"): if args.userbenchmark: # Install userbenchmark dependencies if exists userbenchmark_dir = REPO_ROOT.joinpath("userbenchmark", args.userbenchmark) + cmd = [sys.executable, "install.py"] + cmd.extend(extra_args) if userbenchmark_dir.joinpath("install.py").is_file(): subprocess.check_call( - [sys.executable, "install.py"], cwd=userbenchmark_dir.absolute() + cmd, cwd=userbenchmark_dir.absolute() ) sys.exit(0) diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py new file mode 100644 index 0000000000..ca9a8542d4 --- /dev/null +++ b/userbenchmark/triton/install.py @@ -0,0 +1,22 @@ +import argparse +import subprocess +import sys + +from torchbenchmark import REPO_PATH +FBGEMM_PATH = REPO_PATH.joinpath("submodules", "FBGEMM", "fbgemm_gpu") + +def install_fbgemm(): + cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai"] + subprocess.check_call(cmd, cwd=FBGEMM_PATH) + +def test_fbgemm(): + cmd = [sys.executable, "-c", '"import fbgemm_gpu.experimental.gen_ai"'] + subprocess.check_call(cmd) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--fbgemm", action="store_true", help="Install FBGEMM GPU") + args = parser.parse_args() + if args.fbgemm: + install_fbgemm() + test_fbgemm() From 1d2830a46edf3d08be92b52feaf543edb75d51b6 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 11 Jun 2024 23:30:06 -0400 Subject: [PATCH 3/7] Update the default version in cuda utils --- utils/cuda_utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/utils/cuda_utils.py b/utils/cuda_utils.py index 798f15dcde..80f677af1e 100644 --- a/utils/cuda_utils.py +++ b/utils/cuda_utils.py @@ -8,16 +8,12 @@ from typing import Optional # defines the default CUDA version to compile against -DEFAULT_CUDA_VERSION = "12.1" +DEFAULT_CUDA_VERSION = "12.4" CUDA_VERSION_MAP = { - "11.8": { - "pytorch_url": "cu118", - "magma_version": "magma-cuda118", - }, - "12.1": { - "pytorch_url": "cu121", - "magma_version": "magma-cuda121", + "12.4": { + "pytorch_url": "cu124", + "magma_version": "magma-cuda124", }, } PIN_CMAKE_VERSION = "3.22.*" From bacbff4981bde94907a01b1be201332bc1c2dc15 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 12 Jun 2024 09:55:43 -0400 Subject: [PATCH 4/7] Install fbgemm --- docker/torchbench-nightly.dockerfile | 7 ++++--- userbenchmark/triton/install.py | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile index fe0e744484..c48436f47f 100644 --- a/docker/torchbench-nightly.dockerfile +++ b/docker/torchbench-nightly.dockerfile @@ -45,11 +45,12 @@ RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python utils/cuda_utils.py --install-torchbench-deps +# Install FBGEMM GENAI RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ - python install.py + python install.py --userbenchmark triton --fbgemm -# Install FBGEMM GENAI +# Install Torchbench models RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ - python install.py --userbenchmark triton --fbgemm + python install.py diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py index ca9a8542d4..99051367e2 100644 --- a/userbenchmark/triton/install.py +++ b/userbenchmark/triton/install.py @@ -1,8 +1,10 @@ import argparse import subprocess import sys +import os +from pathlib import Path -from torchbenchmark import REPO_PATH +REPO_PATH = Path(os.path.abspath(__file__)).parent.parent.parent FBGEMM_PATH = REPO_PATH.joinpath("submodules", "FBGEMM", "fbgemm_gpu") def install_fbgemm(): From aeaaa0d850471dddc600a6f73e1a333d4864263a Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 12 Jun 2024 10:15:42 -0400 Subject: [PATCH 5/7] Fix dockerfile issues --- docker/build-torchbench-nightly-docker.sh | 5 ++++- docker/torchbench-nightly.dockerfile | 7 +++++-- userbenchmark/triton/install.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docker/build-torchbench-nightly-docker.sh b/docker/build-torchbench-nightly-docker.sh index 5194affc4c..8f53442dd9 100644 --- a/docker/build-torchbench-nightly-docker.sh +++ b/docker/build-torchbench-nightly-docker.sh @@ -1 +1,4 @@ -docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest +TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} + +docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest \ + --build-arg TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH} diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile index c48436f47f..a35a3dda20 100644 --- a/docker/torchbench-nightly.dockerfile +++ b/docker/torchbench-nightly.dockerfile @@ -8,10 +8,13 @@ ENV SETUP_SCRIPT=/workspace/setup_instance.sh ARG TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} ARG FORCE_DATE=${FORCE_DATE} -# Setup Conda env and CUDA +# Checkout Torchbench and submodules RUN git clone -b "${TORCHBENCH_BRANCH}" --single-branch \ - https://github.com/pytorch/benchmark /workspace/benchmark + https://github.com/pytorch/benchmark /workspace/benchmark +RUN cd /workspace/benchmark \ + git submodule update --init --recursive +# Setup conda env and CUDA RUN cd /workspace/benchmark && \ . ${SETUP_SCRIPT} && \ python ./utils/python_utils.py --create-conda-env ${CONDA_ENV} && \ diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py index 99051367e2..360806c3d2 100644 --- a/userbenchmark/triton/install.py +++ b/userbenchmark/triton/install.py @@ -9,7 +9,7 @@ def install_fbgemm(): cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai"] - subprocess.check_call(cmd, cwd=FBGEMM_PATH) + subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) def test_fbgemm(): cmd = [sys.executable, "-c", '"import fbgemm_gpu.experimental.gen_ai"'] From 7e6c79df0efad75f3ae9c428202ad1b890f16e97 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 12 Jun 2024 10:31:01 -0400 Subject: [PATCH 6/7] Another fix --- docker/torchbench-nightly.dockerfile | 4 +--- userbenchmark/triton/install.py | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile index a35a3dda20..f6f574af8e 100644 --- a/docker/torchbench-nightly.dockerfile +++ b/docker/torchbench-nightly.dockerfile @@ -9,10 +9,8 @@ ARG TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} ARG FORCE_DATE=${FORCE_DATE} # Checkout Torchbench and submodules -RUN git clone -b "${TORCHBENCH_BRANCH}" --single-branch \ +RUN git clone --recurse-submodules -b "${TORCHBENCH_BRANCH}" --single-branch \ https://github.com/pytorch/benchmark /workspace/benchmark -RUN cd /workspace/benchmark \ - git submodule update --init --recursive # Setup conda env and CUDA RUN cd /workspace/benchmark && \ diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py index 360806c3d2..21c61dc802 100644 --- a/userbenchmark/triton/install.py +++ b/userbenchmark/triton/install.py @@ -8,6 +8,8 @@ FBGEMM_PATH = REPO_PATH.joinpath("submodules", "FBGEMM", "fbgemm_gpu") def install_fbgemm(): + cmd = ["pip", "install", "-r", "requirements.txt"] + subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai"] subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) From b156359533cddb038fadbc40a3cc5c6a823bd4fa Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 12 Jun 2024 11:01:34 -0400 Subject: [PATCH 7/7] Limit build target to A100/H100 --- userbenchmark/triton/install.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py index 21c61dc802..d5faff068c 100644 --- a/userbenchmark/triton/install.py +++ b/userbenchmark/triton/install.py @@ -10,7 +10,8 @@ def install_fbgemm(): cmd = ["pip", "install", "-r", "requirements.txt"] subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) - cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai"] + # Build target A100(8.0) or H100(9.0) + cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai", "-DTORCH_CUDA_ARCH_LIST=8.0;9.0"] subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve())) def test_fbgemm():