Skip to content

Commit

Permalink
Use ExecuTorch prebuilt library in pip package to build custom kernels (
Browse files Browse the repository at this point in the history
#1059)

* Use ExecuTorch prebuilt library in pip package to build custom kernels

As titled. The biggest change is this line:

```
find_package(ExecuTorch REQUIRED HINTS ${CMAKE_PREFIX_PATH}/executorch/share/cmake)
```
This gives `EXECUTORCH_INCLUDE_DIRS` with headers and
`EXECUTORCH_LIBRARIES` for custom kernels to depend on.

* Address comments
  • Loading branch information
larryliu0820 authored Oct 11, 2024
1 parent d4b2f33 commit 9cd0da6
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 81 deletions.
65 changes: 25 additions & 40 deletions torchao/experimental/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,70 +3,55 @@
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
cmake_minimum_required(VERSION 3.19)

project(torchao)

cmake_minimum_required(VERSION 3.19)

set(CMAKE_CXX_STANDARD 17)

if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

option(TORCHAO_OP_EXECUTORCH_BUILD "Building torchao ops for ExecuTorch." OFF)

# Source root directory for torchao/experimental
if(NOT TORCHAO_ROOT)
set(TORCHAO_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
endif()

if(NOT TORCHAO_INCLUDE_DIRS)
set(TORCHAO_INCLUDE_DIRS ${TORCHAO_ROOT}/../..)
set(TORCHAO_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

if (NOT TORCHAO_OP_TARGET)
message(FATAL_ERROR "TORCHAO_OP_TARGET is not set. Set it to aten or executorch.")
endif()

if (NOT TORCHAO_PARALLEL_BACKEND)
if (TORCHAO_OP_TARGET STREQUAL "aten")
set(TORCHAO_PARALLEL_BACKEND "aten_openmp")
elseif(TORCHAO_OP_TARGET STREQUAL "executorch")
set(TORCHAO_PARALLEL_BACKEND "executorch")
else()
message(TORCHAO_PARALLEL_BACKEND "TORCHAO_PARALLEL_BACKEND is not set. Please set it directly or set TORCHAO_OP_TARGET to get a default.")
endif()
endif()

include(CMakePrintHelpers)

add_compile_options("-Wall" "-Werror")
add_compile_options("-Wall" "-Werror" "-Wno-deprecated")

include(CMakePrintHelpers)
message("TORCHAO_INCLUDE_DIRS: ${TORCHAO_INCLUDE_DIRS}")
include_directories(${TORCHAO_INCLUDE_DIRS})

if(TORCHAO_OP_TARGET STREQUAL "aten")
add_library(torchao_ops_${TORCHAO_OP_TARGET} SHARED)
elseif(TORCHAO_OP_TARGET STREQUAL "executorch")
add_library(torchao_ops_${TORCHAO_OP_TARGET} STATIC)
add_compile_options("-Wno-error=deprecated")
else()
message(FATAL_ERROR "Unknown TORCHAO_OP_TARGET: ${TORCHAO_OP_TARGET}. Please choose one of: aten, executorch.")
endif()

if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
# Defines target torchao_kernels_aarch64
add_subdirectory(${TORCHAO_ROOT}/kernels/cpu/aarch64)
add_subdirectory(${TORCHAO_ROOT}/ops/linear_8bit_act_xbit_weight)
add_subdirectory(kernels/cpu/aarch64)
add_subdirectory(ops/linear_8bit_act_xbit_weight)

add_library(torchao_ops_aten SHARED)
target_link_libraries(
torchao_ops_${TORCHAO_OP_TARGET} PRIVATE
torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET}
torchao_ops_aten PRIVATE
torchao_ops_linear_8bit_act_xbit_weight_aten
)
install(
TARGETS torchao_ops_aten
DESTINATION lib
)
if(TORCHAO_OP_EXECUTORCH_BUILD)
add_library(torchao_ops_executorch STATIC)
target_link_libraries(torchao_ops_executorch PRIVATE torchao_ops_linear_8bit_act_xbit_weight_executorch)

install(
TARGETS torchao_ops_executorch
DESTINATION lib
)
endif()
else()
message(FATAL_ERROR "Torchao experimental ops can only be built on arm64 CPUs.")
endif()

install(
TARGETS torchao_ops_${TORCHAO_OP_TARGET}
DESTINATION lib
)
14 changes: 7 additions & 7 deletions torchao/experimental/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe
target_link_libraries(${target_name} PRIVATE ${TORCH_INSTALL_PREFIX}/lib/libomp${CMAKE_SHARED_LIBRARY_SUFFIX})

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "EXECUTORCH")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH")
message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}")
message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}")
target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1)
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH")
message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}")
message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}")
target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1)

elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "OPENMP")
message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=OPENMP. You must set the CMake variable OpenMP_ROOT to the OMP library location before compiling. Do not use this option if Torch was built with OPENMP; use ATEN_OPENMP instead.")
Expand Down Expand Up @@ -59,6 +59,6 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe
target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_TEST_DUMMY=1)

else()
message(FATAL_ERROR "Unknown TORCHAO_PARALLEL_BACKEND: ${TORCHAO_PARALLEL_BACKEND}. Please choose one of: aten_openmp, openmp, pthreadpool, single_threaded.")
message(FATAL_ERROR "Unknown TORCHAO_PARALLEL_BACKEND: ${TORCHAO_PARALLEL_BACKEND}. Please choose one of: aten_openmp, executorch, openmp, pthreadpool, single_threaded.")
endif()
endfunction()
13 changes: 8 additions & 5 deletions torchao/experimental/build_torchao_ops.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@ if [[ $# -ne 1 ]]; then
exit 1;
fi
TARGET="${1}"
export CMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')"
export CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
echo "CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}"
export CMAKE_OUT=/tmp/cmake-out/torchao
if [[ $TARGET == "executorch" ]]; then
TORCHAO_OP_EXECUTORCH_BUILD=ON
else
TORCHAO_OP_EXECUTORCH_BUILD=OFF
fi
export CMAKE_OUT=cmake-out/torchao
cmake -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_OUT} \
-DTORCHAO_OP_TARGET="${TARGET}" \
-DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
-DTORCHAO_OP_EXECUTORCH_BUILD="${TORCHAO_OP_EXECUTORCH_BUILD}" \
-S . \
-B ${CMAKE_OUT}
cmake --build ${CMAKE_OUT} --target install --config Release
15 changes: 15 additions & 0 deletions torchao/experimental/install_requirements.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Install requirements for experimental torchao ops.
if [[ -z $PIP ]];
then
PIP=pip
fi

NIGHTLY_VERSION="dev20241011"
$PIP install "executorch==0.5.0.$NIGHTLY_VERSION" --extra-index-url https://download.pytorch.org/whl/nightly/cpu
8 changes: 4 additions & 4 deletions torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ CMAKE_DEPENDENT_OPTION(BUILD_KLEIDI "Download, build, and link against Arm Kleid
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
add_library(
torchao_kernels_aarch64
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/reduction/find_min_and_max.cpp
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/reduction/compute_sum.cpp
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/quantization/quantize.cpp
${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/valpacking/interleave.cpp
${CMAKE_CURRENT_SOURCE_DIR}/reduction/find_min_and_max.cpp
${CMAKE_CURRENT_SOURCE_DIR}/reduction/compute_sum.cpp
${CMAKE_CURRENT_SOURCE_DIR}/quantization/quantize.cpp
${CMAKE_CURRENT_SOURCE_DIR}/valpacking/interleave.cpp
)
if (BUILD_KLEIDI)
# Temporarily exposing this to the parent scope until we wire
Expand Down
44 changes: 21 additions & 23 deletions torchao/experimental/ops/linear_8bit_act_xbit_weight/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,22 @@

cmake_minimum_required(VERSION 3.19)

include(${TORCHAO_ROOT}/Utils.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/../../Utils.cmake)

find_package(Torch REQUIRED)
add_library(torchao_ops_linear_8bit_act_xbit_weight_aten OBJECT
linear_8bit_act_xbit_weight.cpp
op_linear_8bit_act_xbit_weight_aten.cpp
)
target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_aten aten_openmp)
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE torchao_kernels_aarch64)
target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE "${TORCH_INCLUDE_DIRS}")
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE "${TORCH_LIBRARIES}")
target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE USE_ATEN=1)

if(TORCHAO_OP_TARGET STREQUAL "aten")
message(STATUS "Building with TORCHAO_OP_TARGET=aten")
find_package(Torch REQUIRED)
add_library(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} OBJECT
linear_8bit_act_xbit_weight.cpp
op_linear_8bit_act_xbit_weight_aten.cpp
)
target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} "${TORCHAO_PARALLEL_BACKEND}")
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64)
target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${TORCH_INCLUDE_DIRS}")
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${TORCH_LIBRARIES}")
target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE USE_ATEN=1)
elseif(TORCHAO_OP_TARGET STREQUAL "executorch")
message(STATUS "Building with TORCHAO_OP_TARGET=executorch")
add_library(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} OBJECT
if(TORCHAO_OP_EXECUTORCH_BUILD)
find_package(ExecuTorch REQUIRED HINTS ${CMAKE_PREFIX_PATH}/executorch/share/cmake)
add_library(torchao_ops_linear_8bit_act_xbit_weight_executorch OBJECT
linear_8bit_act_xbit_weight.cpp
op_linear_8bit_act_xbit_weight_executorch/w2s.cpp
op_linear_8bit_act_xbit_weight_executorch/w2sz.cpp
Expand All @@ -33,12 +31,12 @@ elseif(TORCHAO_OP_TARGET STREQUAL "executorch")
op_linear_8bit_act_xbit_weight_executorch/w4sz.cpp
op_linear_8bit_act_xbit_weight_executorch/w5s.cpp
op_linear_8bit_act_xbit_weight_executorch/w5sz.cpp
op_linear_8bit_act_xbit_weight_executorch/w6s.cpp
op_linear_8bit_act_xbit_weight_executorch/w6sz.cpp
)
target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} "${TORCHAO_PARALLEL_BACKEND}")
target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE USE_EXECUTORCH=1)
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_LIBRARIES}")
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64)
else()
message(FATAL_ERROR "Unknown TORCHAO_OP_TARGET: ${TORCHAO_OP_TARGET}. Please choose one of: aten, executorch.")
target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_executorch executorch)
target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE "${EXECUTORCH_INCLUDE_DIRS}")
target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE USE_EXECUTORCH=1)
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE "${EXECUTORCH_LIBRARIES}")
target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE torchao_kernels_aarch64)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import glob
import os
import subprocess

import sys
import tempfile
Expand All @@ -21,7 +22,36 @@
Int8DynActIntxWeightQuantizer,
)

libs = glob.glob("/tmp/cmake-out/torchao/lib/libtorchao_ops_aten.*")

def cmake_build_torchao_ops(temp_build_dir):
from distutils.sysconfig import get_python_lib

print("Building torchao ops for ATen target")
cmake_prefix_path = get_python_lib()
dir_path = os.path.dirname(os.path.realpath(__file__))
subprocess.run(
[
"cmake",
"-DCMAKE_PREFIX_PATH=" + cmake_prefix_path,
"-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name,
"-S " + dir_path + "/../",
"-B " + temp_build_dir.name,
]
)
subprocess.run(
[
"cmake",
"--build",
temp_build_dir.name,
"--target install",
"--config Release",
]
)


temp_build_dir = tempfile.TemporaryDirectory()
cmake_build_torchao_ops(temp_build_dir)
libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*")
libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
if len(libs) == 0:
print(
Expand All @@ -30,6 +60,7 @@
else:
torch.ops.load_library(libs[0])


class TestInt8DynActIntxWeightQuantizer(unittest.TestCase):
def test_accuracy(self):
group_size = 128
Expand Down Expand Up @@ -81,7 +112,11 @@ def test_export_compile_aoti(self):
k3 = 1024
nbit = 4
has_weight_zeros = False
layers = [torch.nn.Linear(k0, k1, bias=False), torch.nn.Linear(k1, k2, bias=False), torch.nn.Linear(k2, k3, bias=False)]
layers = [
torch.nn.Linear(k0, k1, bias=False),
torch.nn.Linear(k1, k2, bias=False),
torch.nn.Linear(k2, k3, bias=False),
]
model = torch.nn.Sequential(*layers)

activations = torch.randn(m, k0, dtype=torch.float32)
Expand Down

0 comments on commit 9cd0da6

Please sign in to comment.