Skip to content

Commit

Permalink
MPS CI runs (#162)
Browse files Browse the repository at this point in the history
* MPS quantization

* mps dtypes

* updates

* fix names

* typo

* no bfloat16 for older macOS

* fix typo

* remove failing embedding quantization from MPS runs

* bfloat -> current model precision

* typo

* missed bfloat16 to swotch to defaulkt precision

* remove int8 quantization on mps

* enable cpu fallback for mps on int4

* hack int4pack_mm for torch.float

* typo

* disable int4 because fp16 int4pack_mm not working for float16
  • Loading branch information
mikekgfb authored and malfet committed Jul 17, 2024
1 parent 76c330e commit a3cda44
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 17 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/compile-bf16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ jobs:
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in bfloat16 float16 float32; do
if [ $(uname -s) == Darwin ]; then
export DTYPE=float16
fi
# if [ $(uname -s) == Darwin ]; then
# export DTYPE=float16
# fi
python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --dtype ${DTYPE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
Expand Down
65 changes: 65 additions & 0 deletions .github/workflows/test_mps-dtype.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Run eager tests on MPS with dtypes

on:
pull_request:
push:
branches:
- main
workflow_dispatch:

jobs:
test-mps:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
script: |
set -eou pipefail
echo "::group::Print machine info"
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
echo "::endgroup::"
echo "::group::Install requirements"
# Install requirements
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
ls -la
pwd
pip install -r requirements.txt
echo "::endgroup::"
echo "::group::Download checkpoints"
(
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
)
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in float16 float32; do
# if [ $(uname -s) == Darwin ]; then
# export DTYPE=float16
# fi
python generate.py --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# PYTORCH_ENABLE_MPS_FALLBACK=1 python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
done
14 changes: 13 additions & 1 deletion .github/workflows/test_mps.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run compile tests on MPS
name: Run eager tests on MPS

on:
pull_request:
Expand Down Expand Up @@ -45,5 +45,17 @@ jobs:
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
python generate.py --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
# python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# PYTORCH_ENABLE_MPS_FALLBACK=1 python generate.py --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
30 changes: 20 additions & 10 deletions quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,11 +465,12 @@ def __init__(
self.register_buffer(
"weight", torch.empty((out_features, in_features), dtype=torch.int8)
)
if groupsize is None or (groupsize == 0):
self.register_buffer("scales", torch.ones(out_features, dtype=torch.bfloat16))
dtype=get_precision()
if group_size is None or (group_size == 0):
self.register_buffer("scales", torch.ones(out_features, dtype=dtype))
else:
groups = (in_features + groupsize - 1) // groupsize
self.register_buffer("scales", torch.ones(out_features, groups, dtype=torch.bfloat16))
groups = (in_features + group_size - 1) // group_size
self.register_buffer("scales", torch.ones(out_features, groups, dtype=dtype))

def forward(self, input: torch.Tensor) -> torch.Tensor:
scales = self.scales
Expand Down Expand Up @@ -683,12 +684,21 @@ def _int4_calc_padded_size(k, groupsize=1, innner_k_tiles=1):
def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, groupsize):
origin_x_size = x.size()
x = x.reshape(-1, origin_x_size[-1])
c = torch.ops.aten._weight_int4pack_mm(
x.to(dtype=torch.bfloat16),
weight_int4pack,
groupsize,
scales_and_zeros.to(dtype=torch.bfloat16)
).to(dtype=x.dtype)
if x.dtype == torch.float:
# work around missing int4pack_mm for torch.float
c = torch.ops.aten._weight_int4pack_mm(
x.to(torch.float16),
weight_int4pack,
groupsize,
scales_and_zeros.to(torch.float16),
).to(torch.float)
else:
c = torch.ops.aten._weight_int4pack_mm(
x,
weight_int4pack,
groupsize,
scales_and_zeros,
)
new_shape = origin_x_size[:-1] + (out_features,)
c = c.reshape(new_shape)
return c
Expand Down
6 changes: 3 additions & 3 deletions quantized_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def linear_int4(
origin_input_size = input.size()
input = input.reshape(-1, origin_input_size[-1])
c = torch.ops.aten._weight_int4pack_mm(
input.to(dtype=torch.bfloat16),
input,
weight_int4pack,
groupsize,
scales_and_zeros.to(dtype=torch.bfloat16)
).to(dtype=input.dtype)
scales_and_zeros,
)
new_shape = origin_input_size[:-1] + (out_features,)
c = c.reshape(new_shape)
return c
Expand Down

0 comments on commit a3cda44

Please sign in to comment.