diff --git a/.github/workflows/build_packages.yml b/.github/workflows/build_packages.yml index 4a332b6f5..8f138d973 100644 --- a/.github/workflows/build_packages.yml +++ b/.github/workflows/build_packages.yml @@ -37,15 +37,15 @@ jobs: - name: Generate release candidate versions id: version_rc run: | - sharktank_package_version=$(python3 build_tools/gen_version_info_rc.py sharktank) - shortfin_package_version=$(python3 build_tools/gen_version_info_rc.py shortfin) - - name: Upload version_info_rc.json + sharktank_package_version=$(python3 build_tools/python_deploy/compute_local_version.py sharktank) + shortfin_package_version=$(python3 build_tools/python_deploy/compute_local_version.py shortfin) + - name: Upload version_local.json uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: - name: version_info_rc + name: version_local path: | - sharktank/version_info_rc.json - shortfin/version_info_rc.json + sharktank/version_local.json + shortfin/version_local.json build_packages: name: "${{ matrix.package }} :: ${{ matrix.platform }} :: ${{ matrix.python-version }}" @@ -91,10 +91,10 @@ jobs: path: "c" # Windows can hit path length limits, so use a short path. submodules: false - - name: Download version_info_rc.json + - name: Download version_local.json uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: version_info_rc + name: version_local path: ./c/ merge-multiple: true diff --git a/.github/workflows/ci-sharktank.yml b/.github/workflows/ci-sharktank.yml index 6f359077a..4c660e6ee 100644 --- a/.github/workflows/ci-sharktank.yml +++ b/.github/workflows/ci-sharktank.yml @@ -63,7 +63,7 @@ jobs: # Update to the latest iree packages. pip install -f https://iree.dev/pip-release-links.html --upgrade \ - iree-compiler iree-runtime --src deps \ + iree-base-compiler iree-base-runtime --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run sharktank tests diff --git a/.gitignore b/.gitignore index 6474e6a8c..bdb0b5387 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,9 @@ wheelhouse *.whl *.venv +# Local-only config options +version_local.json + #Model artifacts *.pt *.safetensors diff --git a/build_tools/python_deploy/compute_common_version.py b/build_tools/python_deploy/compute_common_version.py index ba5e653fb..6aea7f254 100644 --- a/build_tools/python_deploy/compute_common_version.py +++ b/build_tools/python_deploy/compute_common_version.py @@ -36,8 +36,8 @@ THIS_DIR = Path(__file__).parent.resolve() REPO_ROOT = THIS_DIR.parent.parent -VERSION_FILE_SHARKTANK = REPO_ROOT / "sharktank/version_info.json" -VERSION_FILE_SHORTFIN = REPO_ROOT / "shortfin/version_info.json" +VERSION_FILE_SHARKTANK = REPO_ROOT / "sharktank/version.json" +VERSION_FILE_SHORTFIN = REPO_ROOT / "shortfin/version.json" VERSION_FILE_LOCAL = REPO_ROOT / "shark-ai/version_local.json" diff --git a/build_tools/gen_version_info_rc.py b/build_tools/python_deploy/compute_local_version.py similarity index 59% rename from build_tools/gen_version_info_rc.py rename to build_tools/python_deploy/compute_local_version.py index 9399053b0..46d18d0ed 100644 --- a/build_tools/gen_version_info_rc.py +++ b/build_tools/python_deploy/compute_local_version.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Copyright 2024 Advanced Micro Devices, Inc. # # Licensed under the Apache License v2.0 with LLVM Exceptions. @@ -5,8 +6,8 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # This scripts grabs the X.Y.Z[.dev]` version identifier from a -# `version_info.json` and writes the corresponding -# `X.Y.ZrcYYYYMMDD` version identifier to `version_rc_info.json`. +# `version.json` and writes the corresponding +# `X.Y.ZrcYYYYMMDD` version identifier to `version_local.json`. import argparse from pathlib import Path @@ -20,18 +21,18 @@ parser.add_argument("path", type=Path) args = parser.parse_args() -VERSION_INFO_FILE = args.path / "version_info.json" -VERSION_INFO_RC_FILE = args.path / "version_info_rc.json" +VERSION_FILE = args.path / "version.json" +VERSION_FILE_LOCAL = args.path / "version_local.json" def load_version_info(): - with open(VERSION_INFO_FILE, "rt") as f: + with open(VERSION_FILE, "rt") as f: return json.load(f) def write_version_info(): - with open(VERSION_INFO_RC_FILE, "w") as f: - json.dump(version_info_rc, f, indent=2) + with open(VERSION_FILE_LOCAL, "w") as f: + json.dump(version_local, f, indent=2) f.write("\n") @@ -39,10 +40,12 @@ def write_version_info(): PACKAGE_VERSION = version_info.get("package-version") PACKAGE_BASE_VERSION = Version(PACKAGE_VERSION).base_version -PACKAGE_RC_VERSION = PACKAGE_BASE_VERSION + "rc" + datetime.today().strftime("%Y%m%d") +PACKAGE_LOCAL_VERSION = ( + PACKAGE_BASE_VERSION + "rc" + datetime.today().strftime("%Y%m%d") +) -version_info_rc = {"package-version": PACKAGE_RC_VERSION} +version_local = {"package-version": PACKAGE_LOCAL_VERSION} write_version_info() -print(PACKAGE_RC_VERSION) +print(PACKAGE_LOCAL_VERSION) diff --git a/build_tools/python_deploy/write_requirements.py b/build_tools/python_deploy/write_requirements.py index 6ad7c10f5..a89b74dfe 100644 --- a/build_tools/python_deploy/write_requirements.py +++ b/build_tools/python_deploy/write_requirements.py @@ -33,8 +33,8 @@ THIS_DIR = Path(__file__).parent.resolve() REPO_ROOT = THIS_DIR.parent.parent -VERSION_FILE_SHARKTANK = REPO_ROOT / "sharktank/version_info.json" -VERSION_FILE_SHORTFIN = REPO_ROOT / "shortfin/version_info.json" +VERSION_FILE_SHARKTANK = REPO_ROOT / "sharktank/version_local.json" +VERSION_FILE_SHORTFIN = REPO_ROOT / "shortfin/version_local.json" VERSION_FILE_LOCAL = REPO_ROOT / "shark-ai/version_local.json" REQUIREMENTS_TXT = REPO_ROOT / "shark-ai/requirements.txt" @@ -44,18 +44,9 @@ def load_version_info(version_file): return json.load(f) -def write_requirements(package_list, package_version): +def write_requirements(requirements): with open(REQUIREMENTS_TXT, "w") as f: - for package in package_list: - PINNED_PACKAGE = package + "==" + package_version - f.write("%s\n" % PINNED_PACKAGE) - - -def append_requirements(package_list, package_version): - with open(REQUIREMENTS_TXT, "a") as f: - for package in package_list: - PINNED_PACKAGE = package + "==" + package_version - f.write("%s\n" % PINNED_PACKAGE) + f.write("%s\n" % requirements) metapackage_version = load_version_info(VERSION_FILE_LOCAL) @@ -70,20 +61,34 @@ def append_requirements(package_list, package_version): stable_packages_list = ["iree-base-compiler", "iree-base-runtime", "iree-turbine"] if Version(PACKAGE_VERSION).is_prerelease: - write_requirements( - ["sharktank"], - Version(SHARKTANK_PACKAGE_VERSION).base_version + "rc" + args.version_suffix, + requirements = ( + "sharktank==" + + Version(SHARKTANK_PACKAGE_VERSION).base_version + + "rc" + + args.version_suffix + + "\n" ) - append_requirements( - ["shortfin"], - Version(SHORTFIN_PACKAGE_VERSION).base_version + "rc" + args.version_suffix, + requirements += ( + "shortfin==" + + Version(SHORTFIN_PACKAGE_VERSION).base_version + + "rc" + + args.version_suffix ) + + write_requirements(requirements) + else: MAJOR_VERSION = Version(PACKAGE_VERSION).major MINOR_VERSION = Version(PACKAGE_VERSION).minor - write_requirements( - stable_packages_list, str(MAJOR_VERSION) + "." + str(MINOR_VERSION) + ".*" + STABLE_VERSION_TO_PIN = str(MAJOR_VERSION) + "." + str(MINOR_VERSION) + ".*" + + requirements = "" + for package in stable_packages_list: + requirements += package + "==" + STABLE_VERSION_TO_PIN + "\n" + requirements += ( + "sharktank==" + Version(SHARKTANK_PACKAGE_VERSION).base_version + "\n" ) - append_requirements(["sharktank"], Version(SHARKTANK_PACKAGE_VERSION).base_version) - append_requirements(["shortfin"], Version(SHORTFIN_PACKAGE_VERSION).base_version) + requirements += "shortfin==" + Version(SHORTFIN_PACKAGE_VERSION).base_version + + write_requirements(requirements) diff --git a/shark-ai/.gitignore b/shark-ai/.gitignore index 8e68ab1b5..80bf001b8 100644 --- a/shark-ai/.gitignore +++ b/shark-ai/.gitignore @@ -1,3 +1,2 @@ # Local-only config options -version_local.json requirements.txt diff --git a/sharktank/setup.py b/sharktank/setup.py index aca5c63d0..182f94abc 100644 --- a/sharktank/setup.py +++ b/sharktank/setup.py @@ -13,8 +13,8 @@ SETUPPY_DIR = os.path.realpath(os.path.dirname(__file__)) # Setup and get version information. -VERSION_INFO_FILE = os.path.join(SETUPPY_DIR, "version_info.json") -VERSION_INFO_RC_FILE = os.path.join(SETUPPY_DIR, "version_info_rc.json") +VERSION_FILE = os.path.join(SETUPPY_DIR, "version.json") +VERSION_FILE_LOCAL = os.path.join(SETUPPY_DIR, "version_local.json") def load_version_info(version_file): @@ -23,10 +23,10 @@ def load_version_info(version_file): try: - version_info = load_version_info(VERSION_INFO_RC_FILE) + version_info = load_version_info(VERSION_FILE_LOCAL) except FileNotFoundError: - print("version_info_rc.json not found. Default to dev build") - version_info = load_version_info(VERSION_INFO_FILE) + print("version_local.json not found. Default to dev build") + version_info = load_version_info(VERSION_FILE) PACKAGE_VERSION = version_info.get("package-version") print(f"Using PACKAGE_VERSION: '{PACKAGE_VERSION}'") diff --git a/sharktank/version_info.json b/sharktank/version.json similarity index 100% rename from sharktank/version_info.json rename to sharktank/version.json diff --git a/shortfin/CMakeLists.txt b/shortfin/CMakeLists.txt index 85113ce00..11982202d 100644 --- a/shortfin/CMakeLists.txt +++ b/shortfin/CMakeLists.txt @@ -14,7 +14,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) endif() # Get version number from file -file(READ ${CMAKE_CURRENT_SOURCE_DIR}/version_info.json VERSION_JSON_STRING) +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/version.json VERSION_JSON_STRING) string(JSON PACKAGE_VERSION GET ${VERSION_JSON_STRING} package-version) string(REGEX MATCH "(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*" BASE_VERSION ${PACKAGE_VERSION}) diff --git a/shortfin/README.md b/shortfin/README.md index 3e7901342..9818e05d3 100644 --- a/shortfin/README.md +++ b/shortfin/README.md @@ -1,4 +1,4 @@ -# shortfin - SHARK C++ inference library +# shortfin - SHARK inference library and serving engine ## Simple User Installation diff --git a/shortfin/pyproject.toml b/shortfin/pyproject.toml index 47cde6775..15bd68732 100644 --- a/shortfin/pyproject.toml +++ b/shortfin/pyproject.toml @@ -8,6 +8,32 @@ requires = [ ] build-backend = "setuptools.build_meta" +[project] +name = "shortfin" +authors = [ + {name = "SHARK Authors"}, +] +description = "SHARK inference library and serving engine" +readme = "README.md" +license = {text = "Apache-2.0"} +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +requires-python = ">= 3.10" + +# Version is set via the `setup.py`. +dynamic = ["version"] + +[project.urls] +Repository = "https://github.com/nod-ai/SHARK-Platform" +Documentation = "https://shortfin.readthedocs.io/en/latest/" + [tool.pytest.ini_options] addopts = [ "-ra", diff --git a/shortfin/python/shortfin/interop/support/device_setup.py b/shortfin/python/shortfin/interop/support/device_setup.py new file mode 100644 index 000000000..afe6ca695 --- /dev/null +++ b/shortfin/python/shortfin/interop/support/device_setup.py @@ -0,0 +1,26 @@ +import shortfin as sf + + +def get_selected_devices(sb: sf.SystemBuilder, device_ids=None): + available = sb.available_devices + selected = [] + if device_ids is not None: + if len(device_ids) > len(available): + raise ValueError( + f"Requested more device ids ({device_ids}) than available ({available})." + ) + for did in device_ids: + if isinstance(did, str): + try: + did = int(did) + except ValueError: + did = did + if did in available: + selected.append(did) + elif isinstance(did, int): + selected.append(available[did]) + else: + raise ValueError(f"Device id {did} could not be parsed.") + else: + selected = available + return selected diff --git a/shortfin/python/shortfin_apps/llm/components/manager.py b/shortfin/python/shortfin_apps/llm/components/manager.py index e3057de22..b44116b39 100644 --- a/shortfin/python/shortfin_apps/llm/components/manager.py +++ b/shortfin/python/shortfin_apps/llm/components/manager.py @@ -8,16 +8,23 @@ import threading import shortfin as sf +from shortfin.interop.support.device_setup import get_selected_devices logger = logging.getLogger(__name__) class SystemManager: - def __init__(self, device="local-task"): - if device == "local-task": + def __init__(self, device="local-task", device_ids=None, async_allocs=True): + if any(x in device for x in ["local-task", "cpu"]): self.ls = sf.host.CPUSystemBuilder().create_system() - elif device == "hip": - self.ls = sf.amdgpu.SystemBuilder().create_system() + elif any(x in device for x in ["hip", "amdgpu"]): + sb = sf.SystemBuilder( + system_type="amdgpu", amdgpu_async_allocations=async_allocs + ) + if device_ids: + sb.visible_devices = sb.available_devices + sb.visible_devices = get_selected_devices(sb, device_ids) + self.ls = sb.create_system() logger.info(f"Created local system with {self.ls.device_names} devices") # TODO: Come up with an easier bootstrap thing than manually # running a thread. diff --git a/shortfin/python/shortfin_apps/llm/server.py b/shortfin/python/shortfin_apps/llm/server.py index 5b51a9a7f..2ab7a1b96 100644 --- a/shortfin/python/shortfin_apps/llm/server.py +++ b/shortfin/python/shortfin_apps/llm/server.py @@ -86,7 +86,11 @@ def get_eos_from_tokenizer_config(json_path): def configure(args) -> SystemManager: # Setup system (configure devices, etc). - sysman = SystemManager(device=args.device) + sysman = SystemManager( + device=args.device, + device_ids=args.device_ids, + async_allocs=args.amdgpu_async_allocations, + ) # Setup each service we are hosting. eos_token = get_eos_from_tokenizer_config(args.tokenizer_config_json) @@ -155,9 +159,17 @@ def main(argv, log_config=uvicorn.config.LOGGING_CONFIG): parser.add_argument( "--device", type=str, - default="local-task", + required=True, + choices=["local-task", "hip", "amdgpu"], help="Device to serve on; e.g. local-task, hip. Same options as `iree-run-module --device` ", ) + parser.add_argument( + "--device_ids", + type=str, + nargs="*", + default=None, + help="Device IDs visible to the system builder. Defaults to None (full visibility). Can be an index or a sf device id like amdgpu:0:0@0", + ) parser.add_argument( "--isolation", type=str, @@ -165,6 +177,11 @@ def main(argv, log_config=uvicorn.config.LOGGING_CONFIG): choices=[isolation.name.lower() for isolation in ProgramIsolation], help="Concurrency control -- How to isolate programs.", ) + parser.add_argument( + "--amdgpu_async_allocations", + action="store_true", + help="Enable asynchronous allocations for amdgpu device contexts.", + ) args = parser.parse_args(argv) if args.tokenizer_config_json is None: diff --git a/shortfin/python/shortfin_apps/sd/components/manager.py b/shortfin/python/shortfin_apps/sd/components/manager.py index 846c4ced6..b44116b39 100644 --- a/shortfin/python/shortfin_apps/sd/components/manager.py +++ b/shortfin/python/shortfin_apps/sd/components/manager.py @@ -8,35 +8,11 @@ import threading import shortfin as sf +from shortfin.interop.support.device_setup import get_selected_devices logger = logging.getLogger(__name__) -def get_selected_devices(sb: sf.SystemBuilder, device_ids=None): - available = sb.available_devices - selected = [] - if device_ids is not None: - if len(device_ids) >= len(available): - raise ValueError( - f"Requested more device ids ({device_ids}) than available ({available})." - ) - for did in device_ids: - if isinstance(did, str): - try: - did = int(did) - except ValueError: - did = did - if did in available: - selected.append(did) - elif isinstance(did, int): - selected.append(available[did]) - else: - raise ValueError(f"Device id {did} could not be parsed.") - else: - selected = available - return selected - - class SystemManager: def __init__(self, device="local-task", device_ids=None, async_allocs=True): if any(x in device for x in ["local-task", "cpu"]): diff --git a/shortfin/python/shortfin_apps/sd/components/service.py b/shortfin/python/shortfin_apps/sd/components/service.py index a64013db0..1ee11569a 100644 --- a/shortfin/python/shortfin_apps/sd/components/service.py +++ b/shortfin/python/shortfin_apps/sd/components/service.py @@ -183,6 +183,8 @@ def __repr__(self): params = [ f" {key} : {value}" for key, value in self.inference_parameters.items() ] + # For python 3.11 since we can't have \ in the f"" expression. + new_line = "\n" return ( f"ServiceManager(" f"\n INFERENCE DEVICES : \n" @@ -193,9 +195,9 @@ def __repr__(self): f" fibers per device : {self.fibers_per_device}\n" f" program isolation mode : {self.prog_isolation}\n" f"\n INFERENCE MODULES : \n" - f"{'\n'.join(modules)}\n" + f"{new_line.join(modules)}\n" f"\n INFERENCE PARAMETERS : \n" - f"{'\n'.join(params)}\n" + f"{new_line.join(params)}\n" f")" ) diff --git a/shortfin/setup.py b/shortfin/setup.py index 94aae4a55..cf3762950 100644 --- a/shortfin/setup.py +++ b/shortfin/setup.py @@ -141,8 +141,8 @@ def copy_extensions_to_source(self, *args, **kwargs): # Setup and get version information. -VERSION_INFO_FILE = os.path.join(REL_SOURCE_DIR, "version_info.json") -VERSION_INFO_RC_FILE = os.path.join(REL_SOURCE_DIR, "version_info_rc.json") +VERSION_FILE = os.path.join(REL_SOURCE_DIR, "version.json") +VERSION_FILE_LOCAL = os.path.join(REL_SOURCE_DIR, "version_local.json") def load_version_info(version_file): @@ -151,10 +151,10 @@ def load_version_info(version_file): try: - version_info = load_version_info(VERSION_INFO_RC_FILE) + version_info = load_version_info(VERSION_FILE_LOCAL) except FileNotFoundError: - print("version_info_rc.json not found. Default to dev build") - version_info = load_version_info(VERSION_INFO_FILE) + print("version_local.json not found. Default to dev build") + version_info = load_version_info(VERSION_FILE) PACKAGE_VERSION = version_info.get("package-version") print(f"Using PACKAGE_VERSION: '{PACKAGE_VERSION}'") @@ -359,10 +359,7 @@ def populate_built_package(abs_dir): print(f"Found shortfin packages: {packages}") setup( - name="shortfin", version=f"{PACKAGE_VERSION}", - description="Shortfin native library implementation", - author="SHARK Authors", packages=packages, zip_safe=False, package_dir=combine_dicts( diff --git a/shortfin/version_info.json b/shortfin/version.json similarity index 100% rename from shortfin/version_info.json rename to shortfin/version.json