pytorch · mikekgfb · Apr 15, 2024 · Apr 13, 2024 · Apr 13, 2024 · Apr 13, 2024
diff --git a/.github/workflows/compile-bf16.yml b/.github/workflows/compile-bf16.yml
@@ -44,9 +44,9 @@ jobs:
           export MODEL_NAME=stories15M
           export MODEL_DIR=/tmp
           for DTYPE in bfloat16 float16 float32; do
-            if [ $(uname -s) == Darwin ]; then
-              export DTYPE=float16
-            fi  
+            # if [ $(uname -s) == Darwin ]; then
+            #   export DTYPE=float16
+            # fi  
             python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
             cat ./output_eager
             python generate.py --dtype ${DTYPE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled

diff --git a/.github/workflows/test_mps-dtype.yml b/.github/workflows/test_mps-dtype.yml
@@ -0,0 +1,65 @@
+name: Run eager tests on MPS with dtypes
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  test-mps:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-stable
+      script: |
+        set -eou pipefail
+
+        echo "::group::Print machine info"
+        uname -a
+        if [ $(uname -s) == Darwin ]; then
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+        fi
+        echo "::endgroup::"
+
+        echo "::group::Install requirements"
+        # Install requirements
+        pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+        ls -la
+        pwd
+        pip install -r requirements.txt
+        echo "::endgroup::"
+
+        echo "::group::Download checkpoints"
+        (
+          mkdir -p checkpoints/stories15M
+          pushd checkpoints/stories15M
+          curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
+          curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
+          popd
+        )
+        echo "::endgroup::"
+
+        echo "::group::Run inference"
+        export MODEL_PATH=checkpoints/stories15M/stories15M.pt
+        export MODEL_NAME=stories15M
+        export MODEL_DIR=/tmp
+        for DTYPE in float16 float32; do
+          # if [ $(uname -s) == Darwin ]; then
+          #   export DTYPE=float16
+          # fi  
+
+          python generate.py --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+          python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+          python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+          python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+          python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+          python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+          cat ./output_eager
+        done
diff --git a/.github/workflows/test_mps.yml b/.github/workflows/test_mps.yml
@@ -1,4 +1,4 @@
-name: Run compile tests on MPS
+name: Run eager tests on MPS
 
 on:
   pull_request:
@@ -45,5 +45,17 @@ jobs:
         export MODEL_PATH=checkpoints/stories15M/stories15M.pt
         export MODEL_NAME=stories15M
         export MODEL_DIR=/tmp
+
         python generate.py --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
         cat ./output_eager
+        python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+        cat ./output_eager
+        python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+        cat ./output_eager
+        python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+        cat ./output_eager
+        python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+        cat ./output_eager
+        python generate.py --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
+        cat ./output_eager
+