From c47083adf51e5cc8b2ba23b0ad85ac1d65a5a726 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 4 Nov 2024 18:45:23 -0800 Subject: [PATCH 1/2] [shortfin] Make SystemBuilder fully configuration/environment driven. * Now `shortfin.SystemBuilder` (which was an abstract class) has a constructor. When used (vs one of the concrete subclasses), the `system_type=` keyword (or SHORTFIN_SYSTEM_TYPE env var, or `SystemBuilder.default_system_type` property) is used to drive subclass selection. * This feature was already in the C++ API: this patch merely exposes it to Python in an ergonomic way. * Pytest configuration and fixtures were updated to support configurable system independence where possible/straightforward. The result is that some tests now run for `--system amdgpu`. * Pytest `--compile-flags=` is now available to also provide explicit compilation flags for tests that need to compile a binary. * Both such tests are currently non functional on amdgpu, which needs further triage. This is ok to land because no one is passing `--compile-flags` yet, leaving time to triage these tests. --- shortfin/README.md | 7 +- shortfin/python/array_binding.cc | 1 + shortfin/python/lib_ext.cc | 109 +++++++++++++++--- shortfin/tests/api/array_ops_test.py | 3 + shortfin/tests/api/array_storage_test.py | 10 +- shortfin/tests/api/array_test.py | 3 + shortfin/tests/api/nputils_test.py | 3 + shortfin/tests/conftest.py | 39 ++++++- shortfin/tests/invocation/conftest.py | 4 +- .../invocation/mobilenet_program_test.py | 10 +- .../invocation/vmfb_buffer_access_test.py | 8 +- 11 files changed, 160 insertions(+), 37 deletions(-) diff --git a/shortfin/README.md b/shortfin/README.md index 957658400..463846840 100644 --- a/shortfin/README.md +++ b/shortfin/README.md @@ -111,10 +111,13 @@ Run platform independent tests only: pytest tests/ ``` -Run tests including for a specific platform: +Run tests including for a specific platform (in this example, a gfx1100 AMDGPU): + +(note that not all tests are system aware yet and some may only run on the CPU) ```bash -pytest tests/ --system amdgpu +pytest tests/ --system amdgpu \ + --compile-flags="--iree-hal-target-backends=rocm --iree-hip-target=gfx1100" ``` # Production Library Building diff --git a/shortfin/python/array_binding.cc b/shortfin/python/array_binding.cc index 7fdf20d38..da7197b14 100644 --- a/shortfin/python/array_binding.cc +++ b/shortfin/python/array_binding.cc @@ -428,6 +428,7 @@ void BindArray(py::module_ &m) { py::kw_only(), py::arg("read") = false, py::arg("write") = false, py::arg("discard") = false, DOCSTRING_STORAGE_MAP) .def(py::self == py::self) + .def("__len__", &storage::byte_length) .def("__repr__", &storage::to_s); // mapping diff --git a/shortfin/python/lib_ext.cc b/shortfin/python/lib_ext.cc index f04e29bba..1270a8f28 100644 --- a/shortfin/python/lib_ext.cc +++ b/shortfin/python/lib_ext.cc @@ -34,6 +34,32 @@ SystemBuilder classes. This API is a shorthand for creating a SystemBuilder and calling create_system() on it. )"; +static const char DOCSTRING_SYSTEM_BUILDER_CTOR[] = + R"(Constructs a system with environment dependent configuration. + +Most configuration is done by way of key/value arguments. Arguments are meant +to be derived from flags or config files and are expected to be simple strings +or integer values: + + * "system_type": A supported system type, corresponding to a subclass of + SystemBuilder. Typically available values include "hostcpu", "amdgpu", etc. + * Other keywords are passed to the concrete SystemBuilder subclass. + +Resolution for the `system_type` is special and is checked in three steps, with +the first defined winning: + +1. `kwargs["system_type"]` +2. Environment `SHORTFIN_SYSTEM_TYPE` variable (or as controlled by `env_prefix`) +3. `SystemBuilder.default_system_type` as a process-wide default. + +Args: + env_prefix: Controls how options are looked up in the environment. By default, + the prefix is "SHORTFIN_" and upper-cased options are appended to it. Any + option not explicitly specified but in the environment will be used. Pass + None to disable environment lookup. + **kwargs: Key/value arguments for controlling setup of the system. +)"; + static const char DOCSTRING_HOSTCPU_SYSTEM_BUILDER_CTOR[] = R"(Constructs a system with CPU based devices. @@ -90,8 +116,19 @@ class Refs { return lazy_PyWorkerEventLoop_; } + std::optional default_system_type() { + iree::slim_mutex_lock_guard g(mu_); + return default_system_type_; + } + void set_default_system_type(std::optional st) { + iree::slim_mutex_lock_guard g(mu_); + default_system_type_ = st; + } + private: + iree::slim_mutex mu_; py::object lazy_PyWorkerEventLoop_; + std::optional default_system_type_; }; // We need a fair bit of accounting additions in order to make Workers usable @@ -453,6 +490,44 @@ void BindLocal(py::module_ &m) { .export_values(); py::class_(m, "SystemBuilder") + .def("__init__", [](py::args, py::kwargs) {}) + .def_static( + "__new__", + [refs](py::handle cls, std::optional env_prefix, + bool validate_undef, py::kwargs kwargs) { + auto options = + CreateConfigOptions(env_prefix, kwargs, validate_undef); + std::optional system_type = + options.GetOption("system_type"); + std::optional default_system_type; + if (!system_type) { + default_system_type = refs->default_system_type(); + if (!default_system_type) { + throw std::invalid_argument( + "In order to construct a generic SystemBuilder, a " + "`system_type=` keyword (or appropriate environment " + "variable) must be specified. Alternatively, a default can " + "be set process wide via " + "`SystemBuilder.default_system_type =`"); + } + system_type = *default_system_type; + } + return local::SystemBuilder::ForSystem( + iree_allocator_system(), *system_type, std::move(options)); + }, + // Note that for some reason, no-arg construction passes no arguments + // to __new__. We allow the single positional argument to be none, + // which satisfies this case in practice. + py::arg("cls") = py::none(), py::kw_only(), + py::arg("env_prefix").none() = "SHORTFIN_", + py::arg("validate_undef") = true, py::arg("kwargs"), + DOCSTRING_SYSTEM_BUILDER_CTOR) + .def_prop_rw_static( + "default_system_type", + [refs](py::handle /*unused*/) { return refs->default_system_type(); }, + [refs](py::handle /*unused*/, std::optional st) { + refs->set_default_system_type(std::move(st)); + }) .def("create_system", [live_system_refs, worker_initializer](local::SystemBuilder &self) { auto system_ptr = self.CreateSystem(); @@ -1123,17 +1198,20 @@ void BindHostSystem(py::module_ &global_m) { m, "SystemBuilder"); py::class_(m, "CPUSystemBuilder") - .def( - "__init__", - [](local::systems::HostCPUSystemBuilder *self, - std::optional env_prefix, bool validate_undef, - py::kwargs kwargs) { + .def_static( + "__new__", + [](py::handle cls, std::optional env_prefix, + bool validate_undef, py::kwargs kwargs) { auto options = CreateConfigOptions(env_prefix, kwargs, validate_undef); - new (self) local::systems::HostCPUSystemBuilder( + return std::make_unique( iree_allocator_system(), std::move(options)); }, - py::kw_only(), py::arg("env_prefix").none() = "SHORTFIN_", + // Note that for some reason, no-arg construction passes no arguments + // to __new__. We allow the single positional argument to be none, + // which satisfies this case in practice. + py::arg("cls") = py::none(), py::kw_only(), + py::arg("env_prefix").none() = "SHORTFIN_", py::arg("validate_undef") = true, py::arg("kwargs"), DOCSTRING_HOSTCPU_SYSTEM_BUILDER_CTOR); py::class_(m, "HostCPUDevice"); @@ -1223,17 +1301,20 @@ void BindAMDGPUSystem(py::module_ &global_m) { auto m = global_m.def_submodule("amdgpu", "AMDGPU system config"); py::class_(m, "SystemBuilder") - .def( - "__init__", - [](local::systems::AMDGPUSystemBuilder *self, - std::optional env_prefix, bool validate_undef, - py::kwargs kwargs) { + .def_static( + "__new__", + [](py::handle cls, std::optional env_prefix, + bool validate_undef, py::kwargs kwargs) { auto options = CreateConfigOptions(env_prefix, kwargs, validate_undef); - new (self) local::systems::AMDGPUSystemBuilder( + return std::make_unique( iree_allocator_system(), std::move(options)); }, - py::kw_only(), py::arg("env_prefix").none() = "SHORTFIN_", + // Note that for some reason, no-arg construction passes no arguments + // to __new__. We allow the single positional argument to be none, + // which satisfies this case in practice. + py::arg("cls") = py::none(), py::kw_only(), + py::arg("env_prefix").none() = "SHORTFIN_", py::arg("validate_undef") = true, py::arg("kwargs"), DOCSTRING_AMDGPU_SYSTEM_BUILDER_CTOR) .def_prop_ro( diff --git a/shortfin/tests/api/array_ops_test.py b/shortfin/tests/api/array_ops_test.py index e6d1af3cd..69d21e929 100644 --- a/shortfin/tests/api/array_ops_test.py +++ b/shortfin/tests/api/array_ops_test.py @@ -14,6 +14,9 @@ @pytest.fixture def lsys(): + # TODO: Port this test to use memory type independent access. It currently + # presumes unified memory. + # sc = sf.SystemBuilder() sc = sf.host.CPUSystemBuilder() lsys = sc.create_system() yield lsys diff --git a/shortfin/tests/api/array_storage_test.py b/shortfin/tests/api/array_storage_test.py index d65452013..5ecf9bdc9 100644 --- a/shortfin/tests/api/array_storage_test.py +++ b/shortfin/tests/api/array_storage_test.py @@ -13,7 +13,7 @@ @pytest.fixture def lsys(): - sc = sf.host.CPUSystemBuilder() + sc = sf.SystemBuilder() lsys = sc.create_system() yield lsys lsys.shutdown() @@ -30,13 +30,13 @@ def device(fiber): def test_allocate_host(device): - s = sfnp.storage.allocate_host(device, 32) - assert len(bytes(s.map(read=True))) == 32 + h = sfnp.storage.allocate_host(device, 32) + assert len(h) == 32 def test_allocate_device(device): - s = sfnp.storage.allocate_device(device, 64) - assert len(bytes(s.map(read=True))) == 64 + d = sfnp.storage.allocate_device(device, 64) + assert len(d) == 64 def test_fill1(lsys, device): diff --git a/shortfin/tests/api/array_test.py b/shortfin/tests/api/array_test.py index 75a2c48d8..be10c05b7 100644 --- a/shortfin/tests/api/array_test.py +++ b/shortfin/tests/api/array_test.py @@ -14,6 +14,9 @@ @pytest.fixture def lsys(): + # TODO: Port this test to use memory type independent access. It currently + # presumes unified memory. + # sc = sf.SystemBuilder() sc = sf.host.CPUSystemBuilder() lsys = sc.create_system() yield lsys diff --git a/shortfin/tests/api/nputils_test.py b/shortfin/tests/api/nputils_test.py index 18f35d8f7..fdb650054 100644 --- a/shortfin/tests/api/nputils_test.py +++ b/shortfin/tests/api/nputils_test.py @@ -22,6 +22,9 @@ @pytest.fixture def lsys(): + # TODO: Port this test to use memory type independent access. It currently + # presumes unified memory. + # sc = sf.SystemBuilder() sc = sf.host.CPUSystemBuilder() lsys = sc.create_system() yield lsys diff --git a/shortfin/tests/conftest.py b/shortfin/tests/conftest.py index 247876ad5..04ee36d8a 100644 --- a/shortfin/tests/conftest.py +++ b/shortfin/tests/conftest.py @@ -6,6 +6,7 @@ import os import pytest +import shlex import shortfin as sf @@ -15,8 +16,14 @@ def pytest_addoption(parser): "--system", action="store", metavar="NAME", - nargs="*", - help="Enable tests for system name ('amdgpu', ...)", + default="hostcpu", + help="Enable tests for system name ('hostcpu', 'amdgpu', ...)", + ) + parser.addoption( + "--compile-flags", + action="store", + metavar="FLAGS", + help="Compile flags to run test on the --system (required if it cannot be inferred)", ) @@ -27,14 +34,17 @@ def pytest_configure(config): def pytest_runtest_setup(item): + system_type = item.config.getoption("--system") + # Filter tests based on system mark. required_system_names = [mark.args[0] for mark in item.iter_markers("system")] if required_system_names: - available_system_names = item.config.getoption("--system") or [] - if not all(name in available_system_names for name in required_system_names): + if not all(name == system_type for name in required_system_names): pytest.skip( f"test requires system in {required_system_names!r} but has " - f"{available_system_names!r} (set with --system arg)" + f"{system_type!r} (set with --system arg)" ) + # Set the default. + sf.SystemBuilder.default_system_type = system_type # Keys that will be cleaned project wide prior to and after each test run. @@ -42,9 +52,28 @@ def pytest_runtest_setup(item): CLEAN_ENV_KEYS = [ "SHORTFIN_HOSTCPU_TOPOLOGY_NODES", "SHORTFIN_HOSTCPU_TOPOLOGY_MAX_GROUP_COUNT", + "SHORTFIN_SYSTEM_TYPE", ] +@pytest.fixture(scope="session") +def compile_flags(pytestconfig) -> list[str]: + compile_flags = pytestconfig.getoption("--compile-flags") + if compile_flags is not None: + return shlex.split(compile_flags) + # Try to figure it out from the system. + system_type = pytestconfig.getoption("--system") + if system_type == "hostcpu": + return [ + "--iree-hal-target-device=llvm-cpu", + "--iree-llvmcpu-target-cpu=host", + ] + pytest.skip( + reason="Test needs to compile a binary and no --compile-flags set (or " + "could not be inferred)" + ) + + @pytest.fixture(autouse=True) def clean_env(): def kill(): diff --git a/shortfin/tests/invocation/conftest.py b/shortfin/tests/invocation/conftest.py index 148ae064d..e62373eb5 100644 --- a/shortfin/tests/invocation/conftest.py +++ b/shortfin/tests/invocation/conftest.py @@ -36,7 +36,7 @@ def mobilenet_onnx_path(tmp_path_factory): @pytest.fixture(scope="session") -def mobilenet_compiled_cpu_path(mobilenet_onnx_path): +def mobilenet_compiled_path(mobilenet_onnx_path, compile_flags): try: import iree.compiler.tools as tools import iree.compiler.tools.import_onnx.__main__ as import_onnx @@ -53,7 +53,7 @@ def mobilenet_compiled_cpu_path(mobilenet_onnx_path): tools.compile_file( str(mlir_path), output_file=str(vmfb_path), - target_backends=["llvm-cpu"], input_type="onnx", + extra_args=compile_flags, ) return vmfb_path diff --git a/shortfin/tests/invocation/mobilenet_program_test.py b/shortfin/tests/invocation/mobilenet_program_test.py index a0f209219..ff7b9bbf2 100644 --- a/shortfin/tests/invocation/mobilenet_program_test.py +++ b/shortfin/tests/invocation/mobilenet_program_test.py @@ -16,7 +16,7 @@ @pytest.fixture def lsys(): - sc = sf.host.CPUSystemBuilder() + sc = sf.SystemBuilder() lsys = sc.create_system() yield lsys lsys.shutdown() @@ -34,9 +34,9 @@ def device(fiber0): @pytest.fixture def mobilenet_program_function( - lsys, mobilenet_compiled_cpu_path + lsys, mobilenet_compiled_path ) -> tuple[sf.ProgramFunction]: - program_module = lsys.load_module(mobilenet_compiled_cpu_path) + program_module = lsys.load_module(mobilenet_compiled_path) program = sf.Program([program_module], devices=lsys.devices) main_function = program["module.torch-jit-export"] return main_function @@ -44,9 +44,9 @@ def mobilenet_program_function( @pytest.fixture def mobilenet_program_function_per_call( - lsys, mobilenet_compiled_cpu_path + lsys, mobilenet_compiled_path ) -> tuple[sf.ProgramFunction]: - program_module = lsys.load_module(mobilenet_compiled_cpu_path) + program_module = lsys.load_module(mobilenet_compiled_path) program = sf.Program( [program_module], devices=lsys.devices, isolation=sf.ProgramIsolation.PER_CALL ) diff --git a/shortfin/tests/invocation/vmfb_buffer_access_test.py b/shortfin/tests/invocation/vmfb_buffer_access_test.py index 9d464dbd4..d86a58822 100644 --- a/shortfin/tests/invocation/vmfb_buffer_access_test.py +++ b/shortfin/tests/invocation/vmfb_buffer_access_test.py @@ -11,8 +11,8 @@ import random -@pytest.fixture(scope="session") -def kvcache_compiled_cpu_path(tmp_path_factory): +@pytest.fixture +def kvcache_compiled_cpu_path(tmp_path_factory, compile_flags): try: import iree.compiler.tools as tools except ModuleNotFoundError: @@ -100,8 +100,8 @@ def kvcache_compiled_cpu_path(tmp_path_factory): tools.compile_file( str(mlir_path), output_file=str(vmfb_path), - target_backends=["llvm-cpu"], input_type="AUTO", + extra_args=compile_flags, ) return vmfb_path @@ -109,7 +109,7 @@ def kvcache_compiled_cpu_path(tmp_path_factory): @pytest.fixture def lsys(): - sc = sf.host.CPUSystemBuilder() + sc = sf.SystemBuilder() lsys = sc.create_system() yield lsys lsys.shutdown() From 22d28ac67d13d2a2e03151920cc3633a40c075c3 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 4 Nov 2024 19:03:54 -0800 Subject: [PATCH 2/2] Try to fix 3.11 issue --- shortfin/python/lib_ext.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shortfin/python/lib_ext.cc b/shortfin/python/lib_ext.cc index 1270a8f28..b6ec7ee07 100644 --- a/shortfin/python/lib_ext.cc +++ b/shortfin/python/lib_ext.cc @@ -1198,6 +1198,7 @@ void BindHostSystem(py::module_ &global_m) { m, "SystemBuilder"); py::class_(m, "CPUSystemBuilder") + .def("__init__", [](py::args, py::kwargs) {}) .def_static( "__new__", [](py::handle cls, std::optional env_prefix, @@ -1301,6 +1302,7 @@ void BindAMDGPUSystem(py::module_ &global_m) { auto m = global_m.def_submodule("amdgpu", "AMDGPU system config"); py::class_(m, "SystemBuilder") + .def("__init__", [](py::args, py::kwargs) {}) .def_static( "__new__", [](py::handle cls, std::optional env_prefix,