From 66a4043f41b896c00127605e706f11a100fa41db Mon Sep 17 00:00:00 2001 From: Stephen Baione <109226581+stbaione@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:24:09 -0500 Subject: [PATCH] Add Patch for LLM Server (#379) Small patch reflecting some recent changes in `sf.Program` and `sf.ProgramFunction`. Was originally included as part of this PR, which adds an integration test to shortfin llm serving: https://github.com/nod-ai/SHARK-Platform/pull/373 But, parsing it out, since that may take a little more time to make adjustments/add workflow file. Without it, you get the following error when trying to launch the server: ```text [2024-10-30 11:59:09.939] [info] [manager.py:40] System manager command processor stopped [2024-10-30 11:59:09.991] [error] [on.py:121] Traceback (most recent call last): File "/home/amd/stephen/repos/forks/SHARK-Platform/.venv/lib/python3.12/site-packages/starlette/routing.py", line 693, in lifespan async with self.lifespan_context(app) as maybe_state: File "/home/amd/.pyenv/versions/3.12.5/lib/python3.12/contextlib.py", line 210, in __aenter__ return await anext(self.gen) ^^^^^^^^^^^^^^^^^^^^^ File "/home/amd/stephen/repos/forks/SHARK-Platform/.venv/lib/python3.12/site-packages/shortfin_apps/llm/server.py", line 42, in lifespan service.start() File "/home/amd/stephen/repos/forks/SHARK-Platform/.venv/lib/python3.12/site-packages/shortfin_apps/llm/components/service.py", line 69, in start self.inference_program = sf.Program( ^^^^^^^^^^^ TypeError: __new__(): incompatible function arguments. The following argument types are supported: 1. __new__(cls: object, modules: collections.abc.Sequence[_shortfin_default.lib.local.ProgramModule], *, devices: collections.abc.Sequence[_shortfin_default.lib.local.Device], trace_execution: bool = False, isolation: _shortfin_default.lib.local.ProgramIsolation = ProgramIsolation.PER_FIBER) -> _shortfin_default.lib.local.Program Invoked with types: nanobind.nb_type_0, kwargs = { modules: list, fiber: _shortfin_default.lib.local.Fiber, trace_execution: bool } [2024-10-30 11:59:09.991] [error] [on.py:59] Application startup failed. Exiting. ``` With it, you're able to start server, send requests, and receive responses. --- shortfin/python/shortfin_apps/llm/components/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shortfin/python/shortfin_apps/llm/components/service.py b/shortfin/python/shortfin_apps/llm/components/service.py index 1e6245d53..646d186f8 100644 --- a/shortfin/python/shortfin_apps/llm/components/service.py +++ b/shortfin/python/shortfin_apps/llm/components/service.py @@ -73,7 +73,7 @@ def start(self): ) ] + self.inference_modules, - fiber=self.main_fiber, + devices=self.sysman.ls.devices, trace_execution=False, ) # Resolve prefill entrypoints. @@ -393,7 +393,7 @@ async def run(self): "".join([f"\n {i}: {ary.shape}" for i, ary in enumerate(args)]), ) # Invoke. Logits are of shape [bs, bsl, d]. - (logits,) = await fn(*args) + (logits,) = await fn(*args, fiber=self.fiber) # Return results. for i in range(req_count):