diff --git a/torchao/experimental/CMakeLists.txt b/torchao/experimental/CMakeLists.txt index 2c75b9e1d..8d63eac7f 100644 --- a/torchao/experimental/CMakeLists.txt +++ b/torchao/experimental/CMakeLists.txt @@ -3,70 +3,55 @@ # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. +cmake_minimum_required(VERSION 3.19) project(torchao) -cmake_minimum_required(VERSION 3.19) - set(CMAKE_CXX_STANDARD 17) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +option(TORCHAO_OP_EXECUTORCH_BUILD "Building torchao ops for ExecuTorch." OFF) -# Source root directory for torchao/experimental -if(NOT TORCHAO_ROOT) - set(TORCHAO_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -endif() if(NOT TORCHAO_INCLUDE_DIRS) - set(TORCHAO_INCLUDE_DIRS ${TORCHAO_ROOT}/../..) + set(TORCHAO_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../..) endif() -if (NOT TORCHAO_OP_TARGET) - message(FATAL_ERROR "TORCHAO_OP_TARGET is not set. Set it to aten or executorch.") -endif() - -if (NOT TORCHAO_PARALLEL_BACKEND) - if (TORCHAO_OP_TARGET STREQUAL "aten") - set(TORCHAO_PARALLEL_BACKEND "aten_openmp") - elseif(TORCHAO_OP_TARGET STREQUAL "executorch") - set(TORCHAO_PARALLEL_BACKEND "executorch") - else() - message(TORCHAO_PARALLEL_BACKEND "TORCHAO_PARALLEL_BACKEND is not set. Please set it directly or set TORCHAO_OP_TARGET to get a default.") - endif() -endif() include(CMakePrintHelpers) -add_compile_options("-Wall" "-Werror") +add_compile_options("-Wall" "-Werror" "-Wno-deprecated") include(CMakePrintHelpers) message("TORCHAO_INCLUDE_DIRS: ${TORCHAO_INCLUDE_DIRS}") include_directories(${TORCHAO_INCLUDE_DIRS}) -if(TORCHAO_OP_TARGET STREQUAL "aten") - add_library(torchao_ops_${TORCHAO_OP_TARGET} SHARED) -elseif(TORCHAO_OP_TARGET STREQUAL "executorch") - add_library(torchao_ops_${TORCHAO_OP_TARGET} STATIC) - add_compile_options("-Wno-error=deprecated") -else() - message(FATAL_ERROR "Unknown TORCHAO_OP_TARGET: ${TORCHAO_OP_TARGET}. Please choose one of: aten, executorch.") -endif() - -if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") # Defines target torchao_kernels_aarch64 - add_subdirectory(${TORCHAO_ROOT}/kernels/cpu/aarch64) - add_subdirectory(${TORCHAO_ROOT}/ops/linear_8bit_act_xbit_weight) + add_subdirectory(kernels/cpu/aarch64) + add_subdirectory(ops/linear_8bit_act_xbit_weight) + add_library(torchao_ops_aten SHARED) target_link_libraries( - torchao_ops_${TORCHAO_OP_TARGET} PRIVATE - torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} + torchao_ops_aten PRIVATE + torchao_ops_linear_8bit_act_xbit_weight_aten + ) + install( + TARGETS torchao_ops_aten + DESTINATION lib ) + if(TORCHAO_OP_EXECUTORCH_BUILD) + add_library(torchao_ops_executorch STATIC) + target_link_libraries(torchao_ops_executorch PRIVATE torchao_ops_linear_8bit_act_xbit_weight_executorch) + + install( + TARGETS torchao_ops_executorch + DESTINATION lib + ) + endif() +else() + message(FATAL_ERROR "Torchao experimental ops can only be built on arm64 CPUs.") endif() - -install( - TARGETS torchao_ops_${TORCHAO_OP_TARGET} - DESTINATION lib -) diff --git a/torchao/experimental/Utils.cmake b/torchao/experimental/Utils.cmake index d6e6254de..984c90006 100644 --- a/torchao/experimental/Utils.cmake +++ b/torchao/experimental/Utils.cmake @@ -24,12 +24,12 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe target_link_libraries(${target_name} PRIVATE ${TORCH_INSTALL_PREFIX}/lib/libomp${CMAKE_SHARED_LIBRARY_SUFFIX}) elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "EXECUTORCH") - message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH") - message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}") - message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}") - target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}") - target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}") - target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1) + message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=TORCHAO_PARALLEL_EXECUTORCH") + message(STATUS "EXECUTORCH_INCLUDE_DIRS: ${EXECUTORCH_INCLUDE_DIRS}") + message(STATUS "EXECUTORCH_LIBRARIES: ${EXECUTORCH_LIBRARIES}") + target_include_directories(${target_name} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}") + target_link_libraries(${target_name} PRIVATE "${EXECUTORCH_LIBRARIES}") + target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_EXECUTORCH=1) elseif(TORCHAO_PARALLEL_BACKEND_TOUPPER STREQUAL "OPENMP") message(STATUS "Building with TORCHAO_PARALLEL_BACKEND=OPENMP. You must set the CMake variable OpenMP_ROOT to the OMP library location before compiling. Do not use this option if Torch was built with OPENMP; use ATEN_OPENMP instead.") @@ -59,6 +59,6 @@ function(target_link_torchao_parallel_backend target_name torchao_parallel_backe target_compile_definitions(${target_name} PRIVATE TORCHAO_PARALLEL_TEST_DUMMY=1) else() - message(FATAL_ERROR "Unknown TORCHAO_PARALLEL_BACKEND: ${TORCHAO_PARALLEL_BACKEND}. Please choose one of: aten_openmp, openmp, pthreadpool, single_threaded.") + message(FATAL_ERROR "Unknown TORCHAO_PARALLEL_BACKEND: ${TORCHAO_PARALLEL_BACKEND}. Please choose one of: aten_openmp, executorch, openmp, pthreadpool, single_threaded.") endif() endfunction() diff --git a/torchao/experimental/build_torchao_ops.sh b/torchao/experimental/build_torchao_ops.sh index 1f13f36c7..9d77a0647 100644 --- a/torchao/experimental/build_torchao_ops.sh +++ b/torchao/experimental/build_torchao_ops.sh @@ -10,14 +10,17 @@ if [[ $# -ne 1 ]]; then exit 1; fi TARGET="${1}" -export CMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" +export CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') echo "CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" -export CMAKE_OUT=/tmp/cmake-out/torchao +if [[ $TARGET == "executorch" ]]; then + TORCHAO_OP_EXECUTORCH_BUILD=ON +else + TORCHAO_OP_EXECUTORCH_BUILD=OFF +fi +export CMAKE_OUT=cmake-out/torchao cmake -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT} \ - -DTORCHAO_OP_TARGET="${TARGET}" \ - -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ - -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ + -DTORCHAO_OP_EXECUTORCH_BUILD="${TORCHAO_OP_EXECUTORCH_BUILD}" \ -S . \ -B ${CMAKE_OUT} cmake --build ${CMAKE_OUT} --target install --config Release diff --git a/torchao/experimental/install_requirements.sh b/torchao/experimental/install_requirements.sh new file mode 100644 index 000000000..96c70cfc8 --- /dev/null +++ b/torchao/experimental/install_requirements.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Install requirements for experimental torchao ops. +if [[ -z $PIP ]]; +then + PIP=pip +fi + +NIGHTLY_VERSION="dev20241011" +$PIP install "executorch==0.5.0.$NIGHTLY_VERSION" --extra-index-url https://download.pytorch.org/whl/nightly/cpu diff --git a/torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt b/torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt index 6541d7fda..eb023ec2c 100644 --- a/torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt +++ b/torchao/experimental/kernels/cpu/aarch64/CMakeLists.txt @@ -23,10 +23,10 @@ CMAKE_DEPENDENT_OPTION(BUILD_KLEIDI "Download, build, and link against Arm Kleid if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") add_library( torchao_kernels_aarch64 - ${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/reduction/find_min_and_max.cpp - ${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/reduction/compute_sum.cpp - ${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/quantization/quantize.cpp - ${TORCHAO_INCLUDE_DIRS}/torchao/experimental/kernels/cpu/aarch64/valpacking/interleave.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/reduction/find_min_and_max.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/reduction/compute_sum.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/quantization/quantize.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/valpacking/interleave.cpp ) if (BUILD_KLEIDI) # Temporarily exposing this to the parent scope until we wire diff --git a/torchao/experimental/ops/linear_8bit_act_xbit_weight/CMakeLists.txt b/torchao/experimental/ops/linear_8bit_act_xbit_weight/CMakeLists.txt index 203afb632..a1490e830 100644 --- a/torchao/experimental/ops/linear_8bit_act_xbit_weight/CMakeLists.txt +++ b/torchao/experimental/ops/linear_8bit_act_xbit_weight/CMakeLists.txt @@ -6,24 +6,22 @@ cmake_minimum_required(VERSION 3.19) -include(${TORCHAO_ROOT}/Utils.cmake) +include(${CMAKE_CURRENT_SOURCE_DIR}/../../Utils.cmake) +find_package(Torch REQUIRED) +add_library(torchao_ops_linear_8bit_act_xbit_weight_aten OBJECT + linear_8bit_act_xbit_weight.cpp + op_linear_8bit_act_xbit_weight_aten.cpp +) +target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_aten aten_openmp) +target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE torchao_kernels_aarch64) +target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE "${TORCH_INCLUDE_DIRS}") +target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE "${TORCH_LIBRARIES}") +target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_aten PRIVATE USE_ATEN=1) -if(TORCHAO_OP_TARGET STREQUAL "aten") - message(STATUS "Building with TORCHAO_OP_TARGET=aten") - find_package(Torch REQUIRED) - add_library(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} OBJECT - linear_8bit_act_xbit_weight.cpp - op_linear_8bit_act_xbit_weight_aten.cpp - ) - target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} "${TORCHAO_PARALLEL_BACKEND}") - target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64) - target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${TORCH_INCLUDE_DIRS}") - target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${TORCH_LIBRARIES}") - target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE USE_ATEN=1) -elseif(TORCHAO_OP_TARGET STREQUAL "executorch") - message(STATUS "Building with TORCHAO_OP_TARGET=executorch") - add_library(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} OBJECT +if(TORCHAO_OP_EXECUTORCH_BUILD) + find_package(ExecuTorch REQUIRED HINTS ${CMAKE_PREFIX_PATH}/executorch/share/cmake) + add_library(torchao_ops_linear_8bit_act_xbit_weight_executorch OBJECT linear_8bit_act_xbit_weight.cpp op_linear_8bit_act_xbit_weight_executorch/w2s.cpp op_linear_8bit_act_xbit_weight_executorch/w2sz.cpp @@ -33,12 +31,12 @@ elseif(TORCHAO_OP_TARGET STREQUAL "executorch") op_linear_8bit_act_xbit_weight_executorch/w4sz.cpp op_linear_8bit_act_xbit_weight_executorch/w5s.cpp op_linear_8bit_act_xbit_weight_executorch/w5sz.cpp + op_linear_8bit_act_xbit_weight_executorch/w6s.cpp + op_linear_8bit_act_xbit_weight_executorch/w6sz.cpp ) - target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} "${TORCHAO_PARALLEL_BACKEND}") - target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_INCLUDE_DIRS}") - target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE USE_EXECUTORCH=1) - target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE "${EXECUTORCH_LIBRARIES}") - target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_${TORCHAO_OP_TARGET} PRIVATE torchao_kernels_aarch64) -else() - message(FATAL_ERROR "Unknown TORCHAO_OP_TARGET: ${TORCHAO_OP_TARGET}. Please choose one of: aten, executorch.") + target_link_torchao_parallel_backend(torchao_ops_linear_8bit_act_xbit_weight_executorch executorch) + target_include_directories(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE "${EXECUTORCH_INCLUDE_DIRS}") + target_compile_definitions(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE USE_EXECUTORCH=1) + target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE "${EXECUTORCH_LIBRARIES}") + target_link_libraries(torchao_ops_linear_8bit_act_xbit_weight_executorch PRIVATE torchao_kernels_aarch64) endif() diff --git a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py index 45dce490a..631f18200 100644 --- a/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py +++ b/torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py @@ -8,6 +8,7 @@ import glob import os +import subprocess import sys import tempfile @@ -21,7 +22,36 @@ Int8DynActIntxWeightQuantizer, ) -libs = glob.glob("/tmp/cmake-out/torchao/lib/libtorchao_ops_aten.*") + +def cmake_build_torchao_ops(temp_build_dir): + from distutils.sysconfig import get_python_lib + + print("Building torchao ops for ATen target") + cmake_prefix_path = get_python_lib() + dir_path = os.path.dirname(os.path.realpath(__file__)) + subprocess.run( + [ + "cmake", + "-DCMAKE_PREFIX_PATH=" + cmake_prefix_path, + "-DCMAKE_INSTALL_PREFIX=" + temp_build_dir.name, + "-S " + dir_path + "/../", + "-B " + temp_build_dir.name, + ] + ) + subprocess.run( + [ + "cmake", + "--build", + temp_build_dir.name, + "--target install", + "--config Release", + ] + ) + + +temp_build_dir = tempfile.TemporaryDirectory() +cmake_build_torchao_ops(temp_build_dir) +libs = glob.glob(f"{temp_build_dir.name}/lib/libtorchao_ops_aten.*") libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) if len(libs) == 0: print( @@ -30,6 +60,7 @@ else: torch.ops.load_library(libs[0]) + class TestInt8DynActIntxWeightQuantizer(unittest.TestCase): def test_accuracy(self): group_size = 128 @@ -81,7 +112,11 @@ def test_export_compile_aoti(self): k3 = 1024 nbit = 4 has_weight_zeros = False - layers = [torch.nn.Linear(k0, k1, bias=False), torch.nn.Linear(k1, k2, bias=False), torch.nn.Linear(k2, k3, bias=False)] + layers = [ + torch.nn.Linear(k0, k1, bias=False), + torch.nn.Linear(k1, k2, bias=False), + torch.nn.Linear(k2, k3, bias=False), + ] model = torch.nn.Sequential(*layers) activations = torch.randn(m, k0, dtype=torch.float32)