Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPS CI runs #162

Merged
merged 17 commits into from
Apr 15, 2024
6 changes: 3 additions & 3 deletions .github/workflows/compile-bf16.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ jobs:
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in bfloat16 float16 float32; do
if [ $(uname -s) == Darwin ]; then
export DTYPE=float16
fi
# if [ $(uname -s) == Darwin ]; then
# export DTYPE=float16
# fi
python generate.py --dtype ${DTYPE} --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --dtype ${DTYPE} --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
Expand Down
65 changes: 65 additions & 0 deletions .github/workflows/test_mps-dtype.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Run eager tests on MPS with dtypes

on:
pull_request:
push:
branches:
- main
workflow_dispatch:

jobs:
test-mps:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
script: |
set -eou pipefail

echo "::group::Print machine info"
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
echo "::endgroup::"

echo "::group::Install requirements"
# Install requirements
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
ls -la
pwd
pip install -r requirements.txt
echo "::endgroup::"

echo "::group::Download checkpoints"
(
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
)
echo "::endgroup::"

echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in float16 float32; do
# if [ $(uname -s) == Darwin ]; then
# export DTYPE=float16
# fi

python generate.py --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# PYTORCH_ENABLE_MPS_FALLBACK=1 python generate.py --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
done
14 changes: 13 additions & 1 deletion .github/workflows/test_mps.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run compile tests on MPS
name: Run eager tests on MPS

on:
pull_request:
Expand Down Expand Up @@ -45,5 +45,17 @@ jobs:
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp

python generate.py --device mps --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
# python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# python generate.py --device mps --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager
# PYTORCH_ENABLE_MPS_FALLBACK=1 python generate.py --device mps --quant '{"linear:int4" : {"group_size": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
# cat ./output_eager

30 changes: 20 additions & 10 deletions quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,11 +465,12 @@ def __init__(
self.register_buffer(
"weight", torch.empty((out_features, in_features), dtype=torch.int8)
)
if groupsize is None or (groupsize == 0):
self.register_buffer("scales", torch.ones(out_features, dtype=torch.bfloat16))
dtype=get_precision()
if group_size is None or (group_size == 0):
self.register_buffer("scales", torch.ones(out_features, dtype=dtype))
else:
groups = (in_features + groupsize - 1) // groupsize
self.register_buffer("scales", torch.ones(out_features, groups, dtype=torch.bfloat16))
groups = (in_features + group_size - 1) // group_size
self.register_buffer("scales", torch.ones(out_features, groups, dtype=dtype))

def forward(self, input: torch.Tensor) -> torch.Tensor:
scales = self.scales
Expand Down Expand Up @@ -683,12 +684,21 @@ def _int4_calc_padded_size(k, groupsize=1, innner_k_tiles=1):
def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, groupsize):
origin_x_size = x.size()
x = x.reshape(-1, origin_x_size[-1])
c = torch.ops.aten._weight_int4pack_mm(
x.to(dtype=torch.bfloat16),
weight_int4pack,
groupsize,
scales_and_zeros.to(dtype=torch.bfloat16)
).to(dtype=x.dtype)
if x.dtype == torch.float:
# work around missing int4pack_mm for torch.float
c = torch.ops.aten._weight_int4pack_mm(
x.to(torch.float16),
weight_int4pack,
groupsize,
scales_and_zeros.to(torch.float16),
).to(torch.float)
else:
c = torch.ops.aten._weight_int4pack_mm(
x,
weight_int4pack,
groupsize,
scales_and_zeros,
)
new_shape = origin_x_size[:-1] + (out_features,)
c = c.reshape(new_shape)
return c
Expand Down
6 changes: 3 additions & 3 deletions quantized_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def linear_int4(
origin_input_size = input.size()
input = input.reshape(-1, origin_input_size[-1])
c = torch.ops.aten._weight_int4pack_mm(
input.to(dtype=torch.bfloat16),
input,
weight_int4pack,
groupsize,
scales_and_zeros.to(dtype=torch.bfloat16)
).to(dtype=input.dtype)
scales_and_zeros,
)
new_shape = origin_input_size[:-1] + (out_features,)
c = c.reshape(new_shape)
return c
Expand Down
Loading