Skip to content

Commit

Permalink
[shortfin] Make SystemBuilder fully configuration/environment driven. (
Browse files Browse the repository at this point in the history
…#420)

* Now `shortfin.SystemBuilder` (which was an abstract class) has a
constructor. When used (vs one of the concrete subclasses), the
`system_type=` keyword (or SHORTFIN_SYSTEM_TYPE env var, or
`SystemBuilder.default_system_type` property) is used to drive subclass
selection.
* This feature was already in the C++ API: this patch merely exposes it
to Python in an ergonomic way.
* Pytest configuration and fixtures were updated to support configurable
system independence where possible/straightforward. The result is that
some tests now run for `--system amdgpu`.
* Pytest `--compile-flags=` is now available to also provide explicit
compilation flags for tests that need to compile a binary.
* Both such tests are currently non functional on amdgpu, which needs
further triage. This is ok to land because no one is passing
`--compile-flags` yet, leaving time to triage these tests.
  • Loading branch information
stellaraccident authored Nov 5, 2024
1 parent e282fbc commit 120851c
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 37 deletions.
7 changes: 5 additions & 2 deletions shortfin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,13 @@ Run platform independent tests only:
pytest tests/
```

Run tests including for a specific platform:
Run tests including for a specific platform (in this example, a gfx1100 AMDGPU):

(note that not all tests are system aware yet and some may only run on the CPU)

```bash
pytest tests/ --system amdgpu
pytest tests/ --system amdgpu \
--compile-flags="--iree-hal-target-backends=rocm --iree-hip-target=gfx1100"
```

# Production Library Building
Expand Down
1 change: 1 addition & 0 deletions shortfin/python/array_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ void BindArray(py::module_ &m) {
py::kw_only(), py::arg("read") = false, py::arg("write") = false,
py::arg("discard") = false, DOCSTRING_STORAGE_MAP)
.def(py::self == py::self)
.def("__len__", &storage::byte_length)
.def("__repr__", &storage::to_s);

// mapping
Expand Down
111 changes: 97 additions & 14 deletions shortfin/python/lib_ext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,32 @@ SystemBuilder classes. This API is a shorthand for creating a SystemBuilder
and calling create_system() on it.
)";

static const char DOCSTRING_SYSTEM_BUILDER_CTOR[] =
R"(Constructs a system with environment dependent configuration.
Most configuration is done by way of key/value arguments. Arguments are meant
to be derived from flags or config files and are expected to be simple strings
or integer values:
* "system_type": A supported system type, corresponding to a subclass of
SystemBuilder. Typically available values include "hostcpu", "amdgpu", etc.
* Other keywords are passed to the concrete SystemBuilder subclass.
Resolution for the `system_type` is special and is checked in three steps, with
the first defined winning:
1. `kwargs["system_type"]`
2. Environment `SHORTFIN_SYSTEM_TYPE` variable (or as controlled by `env_prefix`)
3. `SystemBuilder.default_system_type` as a process-wide default.
Args:
env_prefix: Controls how options are looked up in the environment. By default,
the prefix is "SHORTFIN_" and upper-cased options are appended to it. Any
option not explicitly specified but in the environment will be used. Pass
None to disable environment lookup.
**kwargs: Key/value arguments for controlling setup of the system.
)";

static const char DOCSTRING_HOSTCPU_SYSTEM_BUILDER_CTOR[] =
R"(Constructs a system with CPU based devices.
Expand Down Expand Up @@ -90,8 +116,19 @@ class Refs {
return lazy_PyWorkerEventLoop_;
}

std::optional<std::string> default_system_type() {
iree::slim_mutex_lock_guard g(mu_);
return default_system_type_;
}
void set_default_system_type(std::optional<std::string> st) {
iree::slim_mutex_lock_guard g(mu_);
default_system_type_ = st;
}

private:
iree::slim_mutex mu_;
py::object lazy_PyWorkerEventLoop_;
std::optional<std::string> default_system_type_;
};

// We need a fair bit of accounting additions in order to make Workers usable
Expand Down Expand Up @@ -453,6 +490,44 @@ void BindLocal(py::module_ &m) {
.export_values();

py::class_<local::SystemBuilder>(m, "SystemBuilder")
.def("__init__", [](py::args, py::kwargs) {})
.def_static(
"__new__",
[refs](py::handle cls, std::optional<std::string> env_prefix,
bool validate_undef, py::kwargs kwargs) {
auto options =
CreateConfigOptions(env_prefix, kwargs, validate_undef);
std::optional<std::string_view> system_type =
options.GetOption("system_type");
std::optional<std::string> default_system_type;
if (!system_type) {
default_system_type = refs->default_system_type();
if (!default_system_type) {
throw std::invalid_argument(
"In order to construct a generic SystemBuilder, a "
"`system_type=` keyword (or appropriate environment "
"variable) must be specified. Alternatively, a default can "
"be set process wide via "
"`SystemBuilder.default_system_type =`");
}
system_type = *default_system_type;
}
return local::SystemBuilder::ForSystem(
iree_allocator_system(), *system_type, std::move(options));
},
// Note that for some reason, no-arg construction passes no arguments
// to __new__. We allow the single positional argument to be none,
// which satisfies this case in practice.
py::arg("cls") = py::none(), py::kw_only(),
py::arg("env_prefix").none() = "SHORTFIN_",
py::arg("validate_undef") = true, py::arg("kwargs"),
DOCSTRING_SYSTEM_BUILDER_CTOR)
.def_prop_rw_static(
"default_system_type",
[refs](py::handle /*unused*/) { return refs->default_system_type(); },
[refs](py::handle /*unused*/, std::optional<std::string> st) {
refs->set_default_system_type(std::move(st));
})
.def("create_system", [live_system_refs,
worker_initializer](local::SystemBuilder &self) {
auto system_ptr = self.CreateSystem();
Expand Down Expand Up @@ -1123,17 +1198,21 @@ void BindHostSystem(py::module_ &global_m) {
m, "SystemBuilder");
py::class_<local::systems::HostCPUSystemBuilder,
local::systems::HostSystemBuilder>(m, "CPUSystemBuilder")
.def(
"__init__",
[](local::systems::HostCPUSystemBuilder *self,
std::optional<std::string> env_prefix, bool validate_undef,
py::kwargs kwargs) {
.def("__init__", [](py::args, py::kwargs) {})
.def_static(
"__new__",
[](py::handle cls, std::optional<std::string> env_prefix,
bool validate_undef, py::kwargs kwargs) {
auto options =
CreateConfigOptions(env_prefix, kwargs, validate_undef);
new (self) local::systems::HostCPUSystemBuilder(
return std::make_unique<local::systems::HostCPUSystemBuilder>(
iree_allocator_system(), std::move(options));
},
py::kw_only(), py::arg("env_prefix").none() = "SHORTFIN_",
// Note that for some reason, no-arg construction passes no arguments
// to __new__. We allow the single positional argument to be none,
// which satisfies this case in practice.
py::arg("cls") = py::none(), py::kw_only(),
py::arg("env_prefix").none() = "SHORTFIN_",
py::arg("validate_undef") = true, py::arg("kwargs"),
DOCSTRING_HOSTCPU_SYSTEM_BUILDER_CTOR);
py::class_<local::systems::HostCPUDevice, local::Device>(m, "HostCPUDevice");
Expand Down Expand Up @@ -1223,17 +1302,21 @@ void BindAMDGPUSystem(py::module_ &global_m) {
auto m = global_m.def_submodule("amdgpu", "AMDGPU system config");
py::class_<local::systems::AMDGPUSystemBuilder,
local::systems::HostCPUSystemBuilder>(m, "SystemBuilder")
.def(
"__init__",
[](local::systems::AMDGPUSystemBuilder *self,
std::optional<std::string> env_prefix, bool validate_undef,
py::kwargs kwargs) {
.def("__init__", [](py::args, py::kwargs) {})
.def_static(
"__new__",
[](py::handle cls, std::optional<std::string> env_prefix,
bool validate_undef, py::kwargs kwargs) {
auto options =
CreateConfigOptions(env_prefix, kwargs, validate_undef);
new (self) local::systems::AMDGPUSystemBuilder(
return std::make_unique<local::systems::AMDGPUSystemBuilder>(
iree_allocator_system(), std::move(options));
},
py::kw_only(), py::arg("env_prefix").none() = "SHORTFIN_",
// Note that for some reason, no-arg construction passes no arguments
// to __new__. We allow the single positional argument to be none,
// which satisfies this case in practice.
py::arg("cls") = py::none(), py::kw_only(),
py::arg("env_prefix").none() = "SHORTFIN_",
py::arg("validate_undef") = true, py::arg("kwargs"),
DOCSTRING_AMDGPU_SYSTEM_BUILDER_CTOR)
.def_prop_ro(
Expand Down
3 changes: 3 additions & 0 deletions shortfin/tests/api/array_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

@pytest.fixture
def lsys():
# TODO: Port this test to use memory type independent access. It currently
# presumes unified memory.
# sc = sf.SystemBuilder()
sc = sf.host.CPUSystemBuilder()
lsys = sc.create_system()
yield lsys
Expand Down
10 changes: 5 additions & 5 deletions shortfin/tests/api/array_storage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@pytest.fixture
def lsys():
sc = sf.host.CPUSystemBuilder()
sc = sf.SystemBuilder()
lsys = sc.create_system()
yield lsys
lsys.shutdown()
Expand All @@ -30,13 +30,13 @@ def device(fiber):


def test_allocate_host(device):
s = sfnp.storage.allocate_host(device, 32)
assert len(bytes(s.map(read=True))) == 32
h = sfnp.storage.allocate_host(device, 32)
assert len(h) == 32


def test_allocate_device(device):
s = sfnp.storage.allocate_device(device, 64)
assert len(bytes(s.map(read=True))) == 64
d = sfnp.storage.allocate_device(device, 64)
assert len(d) == 64


def test_fill1(lsys, device):
Expand Down
3 changes: 3 additions & 0 deletions shortfin/tests/api/array_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

@pytest.fixture
def lsys():
# TODO: Port this test to use memory type independent access. It currently
# presumes unified memory.
# sc = sf.SystemBuilder()
sc = sf.host.CPUSystemBuilder()
lsys = sc.create_system()
yield lsys
Expand Down
3 changes: 3 additions & 0 deletions shortfin/tests/api/nputils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

@pytest.fixture
def lsys():
# TODO: Port this test to use memory type independent access. It currently
# presumes unified memory.
# sc = sf.SystemBuilder()
sc = sf.host.CPUSystemBuilder()
lsys = sc.create_system()
yield lsys
Expand Down
39 changes: 34 additions & 5 deletions shortfin/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os
import pytest
import shlex

import shortfin as sf

Expand All @@ -15,8 +16,14 @@ def pytest_addoption(parser):
"--system",
action="store",
metavar="NAME",
nargs="*",
help="Enable tests for system name ('amdgpu', ...)",
default="hostcpu",
help="Enable tests for system name ('hostcpu', 'amdgpu', ...)",
)
parser.addoption(
"--compile-flags",
action="store",
metavar="FLAGS",
help="Compile flags to run test on the --system (required if it cannot be inferred)",
)


Expand All @@ -30,24 +37,46 @@ def pytest_configure(config):


def pytest_runtest_setup(item):
system_type = item.config.getoption("--system")
# Filter tests based on system mark.
required_system_names = [mark.args[0] for mark in item.iter_markers("system")]
if required_system_names:
available_system_names = item.config.getoption("--system") or []
if not all(name in available_system_names for name in required_system_names):
if not all(name == system_type for name in required_system_names):
pytest.skip(
f"test requires system in {required_system_names!r} but has "
f"{available_system_names!r} (set with --system arg)"
f"{system_type!r} (set with --system arg)"
)
# Set the default.
sf.SystemBuilder.default_system_type = system_type


# Keys that will be cleaned project wide prior to and after each test run.
# Test code can freely modify these.
CLEAN_ENV_KEYS = [
"SHORTFIN_HOSTCPU_TOPOLOGY_NODES",
"SHORTFIN_HOSTCPU_TOPOLOGY_MAX_GROUP_COUNT",
"SHORTFIN_SYSTEM_TYPE",
]


@pytest.fixture(scope="session")
def compile_flags(pytestconfig) -> list[str]:
compile_flags = pytestconfig.getoption("--compile-flags")
if compile_flags is not None:
return shlex.split(compile_flags)
# Try to figure it out from the system.
system_type = pytestconfig.getoption("--system")
if system_type == "hostcpu":
return [
"--iree-hal-target-device=llvm-cpu",
"--iree-llvmcpu-target-cpu=host",
]
pytest.skip(
reason="Test needs to compile a binary and no --compile-flags set (or "
"could not be inferred)"
)


@pytest.fixture(autouse=True)
def clean_env():
def kill():
Expand Down
4 changes: 2 additions & 2 deletions shortfin/tests/invocation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def mobilenet_onnx_path(tmp_path_factory):


@pytest.fixture(scope="session")
def mobilenet_compiled_cpu_path(mobilenet_onnx_path):
def mobilenet_compiled_path(mobilenet_onnx_path, compile_flags):
try:
import iree.compiler.tools as tools
import iree.compiler.tools.import_onnx.__main__ as import_onnx
Expand All @@ -53,7 +53,7 @@ def mobilenet_compiled_cpu_path(mobilenet_onnx_path):
tools.compile_file(
str(mlir_path),
output_file=str(vmfb_path),
target_backends=["llvm-cpu"],
input_type="onnx",
extra_args=compile_flags,
)
return vmfb_path
10 changes: 5 additions & 5 deletions shortfin/tests/invocation/mobilenet_program_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@pytest.fixture
def lsys():
sc = sf.host.CPUSystemBuilder()
sc = sf.SystemBuilder()
lsys = sc.create_system()
yield lsys
lsys.shutdown()
Expand All @@ -34,19 +34,19 @@ def device(fiber0):

@pytest.fixture
def mobilenet_program_function(
lsys, mobilenet_compiled_cpu_path
lsys, mobilenet_compiled_path
) -> tuple[sf.ProgramFunction]:
program_module = lsys.load_module(mobilenet_compiled_cpu_path)
program_module = lsys.load_module(mobilenet_compiled_path)
program = sf.Program([program_module], devices=lsys.devices)
main_function = program["module.torch-jit-export"]
return main_function


@pytest.fixture
def mobilenet_program_function_per_call(
lsys, mobilenet_compiled_cpu_path
lsys, mobilenet_compiled_path
) -> tuple[sf.ProgramFunction]:
program_module = lsys.load_module(mobilenet_compiled_cpu_path)
program_module = lsys.load_module(mobilenet_compiled_path)
program = sf.Program(
[program_module], devices=lsys.devices, isolation=sf.ProgramIsolation.PER_CALL
)
Expand Down
8 changes: 4 additions & 4 deletions shortfin/tests/invocation/vmfb_buffer_access_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import random


@pytest.fixture(scope="session")
def kvcache_compiled_cpu_path(tmp_path_factory):
@pytest.fixture
def kvcache_compiled_cpu_path(tmp_path_factory, compile_flags):
try:
import iree.compiler.tools as tools
except ModuleNotFoundError:
Expand Down Expand Up @@ -100,16 +100,16 @@ def kvcache_compiled_cpu_path(tmp_path_factory):
tools.compile_file(
str(mlir_path),
output_file=str(vmfb_path),
target_backends=["llvm-cpu"],
input_type="AUTO",
extra_args=compile_flags,
)

return vmfb_path


@pytest.fixture
def lsys():
sc = sf.host.CPUSystemBuilder()
sc = sf.SystemBuilder()
lsys = sc.create_system()
yield lsys
lsys.shutdown()
Expand Down

0 comments on commit 120851c

Please sign in to comment.