Merge pull request #110 from cyndwith/main

Updates to documentation and iGPU example
amd · Aug 8, 2024 · 38daa3a · 38daa3a
2 parents 2a3c4af + 8772b2a
commit 38daa3a
Show file tree

Hide file tree

Showing 13 changed files with 1,575 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@ AMD Ryzen™ AI Software includes the tools and runtime libraries for optimizing
 
 This repository contains the demos, examples and tutorials, demonstrating usage and capabilities of the Ryzen™ AI Software. It is a subset of the Ryzen™ AI Software release.
 
+Follow the instructions at [Ryzen™ AI Software](https://ryzenai.docs.amd.com/en/latest/inst.html) for installation.
+
 ## Git LFS and Instructions to clone: 
 
  Due to the presence of large files in some examples/tutorials, Git Large File Storage (LFS) has been configured in this repository. Follow the instructions below to ensure Git LFS is properly set up: 

diff --git a/example/transformers/models/llm/docs/README.md b/example/transformers/models/llm/docs/README.md
@@ -37,6 +37,7 @@ The above list is a just representative collection of models supported using the
 Create conda environment:
 ```powershell
 cd <transformers>
+set TRANSFORMERS_ROOT=%CD%
 conda env create --file=env.yaml
 conda activate ryzenai-transformers
 build_dependencies.bat
@@ -45,7 +46,7 @@ build_dependencies.bat
 AWQ Model zoo has precomputed scales, clips and zeros for various LLMs including OPT, Llama. Get the precomputed results:
 ```powershell
 git lfs install
-cd <transformers>\ext
+cd %TRANSFORMERS_ROOT%\ext
 git clone https://huggingface.co/datasets/mit-han-lab/awq-model-zoo awq_cache
 ```
 
@@ -100,7 +101,7 @@ pip install ops\torch_cpp --force-reinstall
 When using locally downloaded weights, pass the model directory name as the argument to model_name. Only certain model names are supported by default, make sure the model directory name matches the supported model name.
 
 ```powershell
-cd <transformers>\models\llm
+cd  %TRANSFORMERS_ROOT%\models\llm
 ```
 
 ### Recipe 1: Smoothquant with w8a8/w8a16

diff --git a/example/transformers/models/llm_assisted_generation/README.md b/example/transformers/models/llm_assisted_generation/README.md
@@ -10,13 +10,13 @@ Complete the setup instructions for [LLMs on RyzenAI with Pytorch](../llm/docs/R
 Generate the quantized checkpoints by running ```models/llm/run_awq.py``` for target models. 
 
 * To generate the quantized target model for OPT-6.7b run the below command from ```models/llm``` directory
-
-  ```python run_awq.py --model_name facebook/opt-6.7b --task quantize```
-
+```
+python run_awq.py --model_name facebook/opt-6.7b --task quantize
+```
 * To generate the quantized target model for llama-2-7b run the below command from ```models/llm``` directory
-
-  ```python run_awq.py --model_name llama-2-7b --task quantize```
-
+```
+python run_awq.py --model_name llama-2-7b --task quantize
+```
 ## Step 2: Get draft models
 * For OPT-6.7b, ```facebook/opt-125m``` is used as the draft model. 
 * For llama-2-7b, ```JackFram/llama-160m``` is used as the draft model. 
@@ -31,22 +31,22 @@ Generate the quantized checkpoints by running ```models/llm/run_awq.py``` for ta
 # Instructions to run the LLM models
 ## 1. OPT-6.7b
 ### a. OPT-6.7b without assisted generation
-
-```python assisted_generation.py --model_name opt-6.7b --task benchmark ```
-
+```
+python assisted_generation.py --model_name opt-6.7b --task benchmark
+```
 ### b. OPT-6.7b with OPT-125M assistant model
-
-```python assisted_generation.py --model_name opt-6.7b --task benchmark --assisted_generation```
-
+```
+python assisted_generation.py --model_name opt-6.7b --task benchmark --assisted_generation
+```
 ## 2. llama-2-7b
 ### a. llama-2-7b without assisted generation, with fast attention
-
-```python assisted_generation.py --model_name llama-2-7b --task benchmark --fast_attention```
-
+```
+python assisted_generation.py --model_name llama-2-7b --task benchmark --fast_attention
+```
 ### b. llama-2-7b with llama-160m assistant model, with fast attention
-
-```python assisted_generation.py --model_name llama-2-7b --task benchmark --assisted_generation --fast_attention```
-
+```
+python assisted_generation.py --model_name llama-2-7b --task benchmark --assisted_generation --fast_attention
+```
 **Note:**
 - fast_attention argument is only supported with llama-2-7b, and llama-2-7b-chat models in this release.
 - Know issue related to kernel driver shows up when using assisted_generation with Llama-2-7b.
diff --git a/example/transformers/models/llm_gguf/docs/README.md b/example/transformers/models/llm_gguf/docs/README.md
@@ -13,14 +13,32 @@ Assumes Windows CMD shell
 ### Activate ryzenai-transformers conda-enviornment
 ```console
 cd <transformers>
+set TRANSFORMERS_ROOT=%CD%
 conda env create --file=env.yaml
 conda activate ryzenai-transformers
 ```
 
+⚠️ **Warning:** Windows has a path length limit that you may hit when building the project or installing the wheels, resulting in cryptic errors.
+To work around it, use a virtual drive to shorten the path the repository is cloned to:
+
+*On Command Prompt*
+```batch
+@REM use any unused drive letter, Z: for example
+subst Z: %cd%
+@REM switch to the Z: drive
+Z:
+```
+
+You can remove the virtual drive with:
+
+*On Command Prompt*
+```batch
+subst /d Z:
+```
+
 ### Build and Install RyzenAI
 ```console
 setup_phx.bat # or setup_stx.bat
-set TRANSFORMERS_ROOT=%PYTORCH_AIE_PATH%
 
 cd %TRANSFORMERS_ROOT%\ops\cpp
 cmake -B build\ -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%

diff --git a/example/transformers/models/llm_onnx/docs/README.md b/example/transformers/models/llm_onnx/docs/README.md
@@ -17,9 +17,15 @@ and accept [Huggingface license](https://huggingface.co/meta-llama/Llama-2-7b-hf
 
 :pushpin: Conda environment with python 3.10
 
+Setup the environment variable:
+```powershell
+cd <transformers>
+set TRANSFORMERS_ROOT=%CD%
+```
+
 Create conda environment:
 ```powershell
-cd <transformers/models/llm_onnx>
+cd %TRANSFORMERS_ROOT%\models\llm_onnx
 conda update -n base -c defaults conda -y
 conda env create --file=env.yaml
 conda activate llm_onnx
@@ -40,13 +46,13 @@ pip install numpy==1.26.4
 
 ##### For PHX
 ```
-cd <transformers>
+cd %TRANSFORMERS_ROOT%
 .\setup_phx.bat
 ```
 
 ##### For STX
 ```
-cd <transformers>
+cd %TRANSFORMERS_ROOT%
 .\setup_stx.bat
 ```
 
@@ -58,6 +64,7 @@ Use "prepare_model.py" script to export, optimize and quantize the LLMs. You can
 
 Check script usage
 ```powershell
+cd %TRANSFORMERS_ROOT%\models\llm_onnx
 python prepare_model.py --help
 
 usage: prepare_model.py [-h]
@@ -86,7 +93,6 @@ optional arguments:
 #### Export, Optimize and quantize the model
 
 ```powershell
-
 python .\prepare_model.py --model_name <model_name> --output_model_dir <output directory> --export --optimize --quantize
 ```
 #### Optimize and quantize existing model
@@ -149,7 +155,7 @@ as `float32` model. The quantizer used is `MatMul4BitsQuantizer` from onnxruntim
  > As for the optimizer , ORT optimizer is used.
 ### Using ONNX Runtime Interface
 
-Copy the 'model.onnx.data' file from output_model_dir to the models/llm_onnx/ folder.
+**Note:** Copy the 'model.onnx.data' file from output_model_dir to the models/llm_onnx/ folder.
 
 **Note:** Each run generates a log file in `./logs` directory with name `log_<model_name>.log`.
 ```powershell

diff --git a/iGPU/getting_started/README.md b/iGPU/getting_started/README.md
@@ -4,10 +4,10 @@ This is an example showing how to run the ResNet50 model from PyTorch on AMD's i
 
 ## Activate Ryzen AI conda environment
 
-Activate the conda environment created by the automatic installer: 
+Activate the conda environment created by the MSI installer: 
 
 ```powershell
-conda activate ryzenai-1.2
+conda activate ryzen-ai-1.2.0
 ```
 
 ## Install Olive 
@@ -33,8 +33,53 @@ The optimized models will be available in `./torch_to_onnx-float16_conversion-pe
 
 ## Run the generated model on the iGPU 
 
+### Deployment in Python 
+
 ```powershell
 python predict.py
 ```
-
 **_NOTE:_**  In predict.py, line 15, the iGPU device ID is enumerated as 0. For PCs with multiple GPUs, you may adjust the device_id to target a specific iGPU.
+
+### Deployment in C++
+
+#### Prerequisites
+
+1. Visual Studio 2022 Community edition, ensure “Desktop Development with C++” is installed
+2. cmake (version >= 3.26)
+3. opencv (version=4.6.0) required for the resnet50 example
+
+#### Install OpenCV from source 
+
+It is recommended to build OpenCV from the source code and use static build. The following instruction installs OpenCV in the location "C:\\opencv" as an example, this can be changed by modifying `CMAKE_PREFIX_PATH` in the following cmake command. You may first change the directory to where you want to clone the OpenCV repository.
+
+```powershell
+git clone https://github.com/opencv/opencv.git -b 4.6.0
+cd opencv
+
+cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -G "Visual Studio 17 2022" "-DCMAKE_INSTALL_PREFIX=C:\opencv" "-DCMAKE_PREFIX_PATH=C:\opencv" -DCMAKE_BUILD_TYPE=Release -DBUILD_opencv_python2=OFF -DBUILD_opencv_python3=OFF -DBUILD_WITH_STATIC_CRT=OFF -B build
+
+cmake --build build --config Release
+cmake --install build --config Release
+```
+The build files will be written to ``build\``.
+
+#### Run Olive-optimized ResNet50 model on the iGPU
+
+Build the given ResNet50 C++ example: 
+
+```powershell
+
+cd cpp 
+compile.bat "path/to/your/opencv/build"
+```
+
+Run inference: 
+
+```powershell
+
+run.bat
+```
+
+
+
+
diff --git a/iGPU/getting_started/cpp/compile.bat b/iGPU/getting_started/cpp/compile.bat
@@ -0,0 +1,20 @@
+@echo off
+
+if "%RYZEN_AI_INSTALLATION_PATH%" == "" echo RYZEN_AI_INSTALLATION_PATH not set. This script requires the RYZEN_AI_INSTALLATION_PATH env var to be set to the RyzenAI 1.2 installation folder. & goto :error
+
+REM Check if the first argument is provided
+if "%1"=="" (
+    echo Usage: %0 [OpenCV_DIR]
+    exit /b 1
+)
+
+set "OpenCV_DIR=%~1"
+
+echo OpenCV_DIR is set to: %OpenCV_DIR%
+
+cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CONFIGURATION_TYPES=Release -A x64 -T host=x64 -DCMAKE_INSTALL_PREFIX=. -DCMAKE_PREFIX_PATH=. -B build -S resnet50 -DOpenCV_DIR="%OpenCV_DIR%" -G "Visual Studio 17 2022"
+
+cmake --build .\build --config Release --target ALL_BUILD
+
+:error
+exit /b %errorlevel%
diff --git a/iGPU/getting_started/cpp/resnet50/CMakeLists.txt b/iGPU/getting_started/cpp/resnet50/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required(VERSION 3.5)
+
+project(resnet50 VERSION 1.0.0 LANGUAGES C CXX)
+set(CMAKE_CXX_STANDARD 17)
+find_package(Python 3.10 EXACT COMPONENTS Interpreter Development)
+
+set (RYZEN_AI_INSTALLATION_PATH $ENV{RYZEN_AI_INSTALLATION_PATH})
+
+
+set(ONNXRUNTIME_ROOTDIR "${RYZEN_AI_INSTALLATION_PATH}/onnxruntime")
+
+include_directories("${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session")
+
+link_directories("${ONNXRUNTIME_ROOTDIR}/lib")
+
+link_directories("${CMAKE_INSTALL_PREFIX}/lib")
+
+
+
+find_package(OpenCV COMPONENTS core highgui imgproc REQUIRED)
+
+add_executable(resnet50 resnet50.cpp util/getopt.c)
+target_include_directories(resnet50 
+    PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/util
+)
+target_link_libraries(resnet50 ${ORT_LIBRARY} ${OpenCV_LIBS} onnxruntime)
+install(TARGETS resnet50 RUNTIME DESTINATION bin)
+
+add_custom_command(
+  TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy
+  ${RYZEN_AI_INSTALLATION_PATH}/onnxruntime/bin/DirectML.dll 
+  ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>/DirectML.dll)
+
+add_custom_command(
+  TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy
+  ${RYZEN_AI_INSTALLATION_PATH}/onnxruntime/bin/onnxruntime.dll 
+  ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>/onnxruntime.dll)