Skip to content

Commit

Permalink
Merge branch 'main' into sd-logging
Browse files Browse the repository at this point in the history
  • Loading branch information
monorimet authored Nov 13, 2024
2 parents e2b23f7 + 7bd3253 commit 71855bc
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-sharktank.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
# Update to the latest iree packages.
pip install -f https://iree.dev/pip-release-links.html --upgrade \
iree-compiler iree-runtime --src deps \
iree-base-compiler iree-base-runtime --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
- name: Run sharktank tests
Expand Down
26 changes: 26 additions & 0 deletions shortfin/python/shortfin/interop/support/device_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import shortfin as sf


def get_selected_devices(sb: sf.SystemBuilder, device_ids=None):
available = sb.available_devices
selected = []
if device_ids is not None:
if len(device_ids) > len(available):
raise ValueError(
f"Requested more device ids ({device_ids}) than available ({available})."
)
for did in device_ids:
if isinstance(did, str):
try:
did = int(did)
except ValueError:
did = did
if did in available:
selected.append(did)
elif isinstance(did, int):
selected.append(available[did])
else:
raise ValueError(f"Device id {did} could not be parsed.")
else:
selected = available
return selected
15 changes: 11 additions & 4 deletions shortfin/python/shortfin_apps/llm/components/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,23 @@
import threading

import shortfin as sf
from shortfin.interop.support.device_setup import get_selected_devices

logger = logging.getLogger(__name__)


class SystemManager:
def __init__(self, device="local-task"):
if device == "local-task":
def __init__(self, device="local-task", device_ids=None, async_allocs=True):
if any(x in device for x in ["local-task", "cpu"]):
self.ls = sf.host.CPUSystemBuilder().create_system()
elif device == "hip":
self.ls = sf.amdgpu.SystemBuilder().create_system()
elif any(x in device for x in ["hip", "amdgpu"]):
sb = sf.SystemBuilder(
system_type="amdgpu", amdgpu_async_allocations=async_allocs
)
if device_ids:
sb.visible_devices = sb.available_devices
sb.visible_devices = get_selected_devices(sb, device_ids)
self.ls = sb.create_system()
logger.info(f"Created local system with {self.ls.device_names} devices")
# TODO: Come up with an easier bootstrap thing than manually
# running a thread.
Expand Down
21 changes: 19 additions & 2 deletions shortfin/python/shortfin_apps/llm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def get_eos_from_tokenizer_config(json_path):

def configure(args) -> SystemManager:
# Setup system (configure devices, etc).
sysman = SystemManager(device=args.device)
sysman = SystemManager(
device=args.device,
device_ids=args.device_ids,
async_allocs=args.amdgpu_async_allocations,
)

# Setup each service we are hosting.
eos_token = get_eos_from_tokenizer_config(args.tokenizer_config_json)
Expand Down Expand Up @@ -155,16 +159,29 @@ def main(argv, log_config=uvicorn.config.LOGGING_CONFIG):
parser.add_argument(
"--device",
type=str,
default="local-task",
required=True,
choices=["local-task", "hip", "amdgpu"],
help="Device to serve on; e.g. local-task, hip. Same options as `iree-run-module --device` ",
)
parser.add_argument(
"--device_ids",
type=str,
nargs="*",
default=None,
help="Device IDs visible to the system builder. Defaults to None (full visibility). Can be an index or a sf device id like amdgpu:0:0@0",
)
parser.add_argument(
"--isolation",
type=str,
default="per_call",
choices=[isolation.name.lower() for isolation in ProgramIsolation],
help="Concurrency control -- How to isolate programs.",
)
parser.add_argument(
"--amdgpu_async_allocations",
action="store_true",
help="Enable asynchronous allocations for amdgpu device contexts.",
)
args = parser.parse_args(argv)

if args.tokenizer_config_json is None:
Expand Down
26 changes: 1 addition & 25 deletions shortfin/python/shortfin_apps/sd/components/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,11 @@
import threading

import shortfin as sf
from shortfin.interop.support.device_setup import get_selected_devices

logger = logging.getLogger("shortfin-sd.manager")


def get_selected_devices(sb: sf.SystemBuilder, device_ids=None):
available = sb.available_devices
selected = []
if device_ids is not None:
if len(device_ids) >= len(available):
raise ValueError(
f"Requested more device ids ({device_ids}) than available ({available})."
)
for did in device_ids:
if isinstance(did, str):
try:
did = int(did)
except ValueError:
did = did
if did in available:
selected.append(did)
elif isinstance(did, int):
selected.append(available[did])
else:
raise ValueError(f"Device id {did} could not be parsed.")
else:
selected = available
return selected


class SystemManager:
def __init__(self, device="local-task", device_ids=None, async_allocs=True):
if any(x in device for x in ["local-task", "cpu"]):
Expand Down
32 changes: 15 additions & 17 deletions shortfin/python/shortfin_apps/sd/components/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,24 +186,22 @@ def __repr__(self):
params = [
f" {key} : {value}" for key, value in self.inference_parameters.items()
]
mod_string = '\n'.join(modules)
params_string = '\n'.join(params)
# For python 3.11 since we can't have \ in the f"" expression.
new_line = "\n"
return (
f"ServiceManager(" +
"\n INFERENCE DEVICES : \n" +
f" {self.sysman.ls.devices}\n" +
"\n MODEL PARAMS : \n" +
f"{self.model_params}" +
"\n SERVICE PARAMS : \n" +
f" fibers per device : {self.fibers_per_device}" +
"\n" +
f" program isolation mode : {self.prog_isolation}" +
"\n" +
"\n INFERENCE MODULES : \n" +
mod_string +
"\n INFERENCE PARAMETERS : \n" +
params_string +
")"
f"ServiceManager("
f"\n INFERENCE DEVICES : \n"
f" {self.sysman.ls.devices}\n"
f"\n MODEL PARAMS : \n"
f"{self.model_params}"
f"\n SERVICE PARAMS : \n"
f" fibers per device : {self.fibers_per_device}\n"
f" program isolation mode : {self.prog_isolation}\n"
f"\n INFERENCE MODULES : \n"
f"{new_line.join(modules)}\n"
f"\n INFERENCE PARAMETERS : \n"
f"{new_line.join(params)}\n"
f")"
)


Expand Down

0 comments on commit 71855bc

Please sign in to comment.