diff --git a/Jenkinsfile b/Jenkinsfile index bc7c64f02..0638dbf7c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -27,7 +27,9 @@ pipeline { echo $HOME ''' sh '''#!/bin/bash -ex - cuda_arch=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader|head -n 1| sed "s/\\.//") + # Oldest card in the Jenkins pool is a K40 + cuda_arch="35" + cmake -B build . -DFINUFFT_USE_CUDA=ON \ -DFINUFFT_USE_CPU=OFF \ -DFINUFFT_BUILD_TESTS=ON \ @@ -44,9 +46,14 @@ pipeline { sh '''#!/bin/bash -ex source $HOME/bin/activate python3 -m pip install --upgrade pip + python3 -m pip install --upgrade pycuda cupy-cuda110 numba + python3 -m pip install torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html python3 -m pip install -e python/cufinufft python3 -m pip install pytest - python3 -m pytest python/cufinufft + python3 -m pytest --framework=pycuda python/cufinufft + python3 -m pytest --framework=numba python/cufinufft + python3 -m pytest --framework=cupy python/cufinufft + python3 -m pytest --framework=torch python/cufinufft ''' } } diff --git a/python/cufinufft/cufinufft/_compat.py b/python/cufinufft/cufinufft/_compat.py new file mode 100644 index 000000000..04e066a1a --- /dev/null +++ b/python/cufinufft/cufinufft/_compat.py @@ -0,0 +1,106 @@ +import inspect + +import numpy as np + + +def get_array_ptr(data): + try: + return data.__cuda_array_interface__['data'][0] + except RuntimeError: + # Handle torch with gradient enabled + # https://github.com/flatironinstitute/finufft/pull/326#issuecomment-1652212770 + return data.data_ptr() + except AttributeError: + raise TypeError("Invalid GPU array implementation. Implementation must implement the standard cuda array interface.") + + +def get_array_module(obj): + module_name = inspect.getmodule(type(obj)).__name__ + + if module_name.startswith("numba.cuda"): + return "numba" + elif module_name.startswith("torch"): + return "torch" + elif module_name.startswith("pycuda"): + return "pycuda" + else: + return "generic" + + +def get_array_size(obj): + array_module = get_array_module(obj) + + if array_module == "torch": + return len(obj) + else: + return obj.size + + +def get_array_dtype(obj): + array_module = get_array_module(obj) + + if array_module == "torch": + dtype_str = str(obj.dtype) + dtype_str = dtype_str[len("torch."):] + return np.dtype(dtype_str) + else: + return obj.dtype + + +def is_array_contiguous(obj): + array_module = get_array_module(obj) + + if array_module == "numba": + return obj.is_c_contiguous() + elif array_module == "torch": + return obj.is_contiguous() + else: + return obj.flags.c_contiguous + + +def array_can_contiguous(obj): + array_module = get_array_module(obj) + + if array_module == "pycuda": + return False + else: + return True + + +def array_contiguous(obj): + array_module = get_array_module(obj) + + if array_module == "numba": + import numba + ret = numba.cuda.device_array(obj.shape, obj.dtype, stream=obj.stream) + ret[:] = obj[:] + return ret + if array_module == "torch": + return obj.contiguous() + else: + return obj.copy(order="C") + + +def array_empty_like(obj, *args, **kwargs): + module_name = get_array_module(obj) + + if module_name == "numba": + import numba.cuda + return numba.cuda.device_array(*args, **kwargs) + elif module_name == "torch": + import torch + if "shape" in kwargs: + kwargs["size"] = kwargs.pop("shape") + if "dtype" in kwargs: + dtype = kwargs.pop("dtype") + if dtype == np.complex64: + dtype = torch.complex64 + elif dtype == np.complex128: + dtype = torch.complex128 + kwargs["dtype"] = dtype + if "device" not in kwargs: + kwargs["device"] = obj.device + + return torch.empty(*args, **kwargs) + else: + return type(obj)(*args, **kwargs) diff --git a/python/cufinufft/cufinufft/_cufinufft.py b/python/cufinufft/cufinufft/_cufinufft.py index b36b60b74..beaeb8b8c 100644 --- a/python/cufinufft/cufinufft/_cufinufft.py +++ b/python/cufinufft/cufinufft/_cufinufft.py @@ -17,8 +17,6 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) import imp -import numpy as np - from ctypes import c_double from ctypes import c_int from ctypes import c_int64 diff --git a/python/cufinufft/cufinufft/_plan.py b/python/cufinufft/cufinufft/_plan.py index 084619990..4231a145a 100644 --- a/python/cufinufft/cufinufft/_plan.py +++ b/python/cufinufft/cufinufft/_plan.py @@ -25,7 +25,7 @@ from cufinufft._cufinufft import _destroy_plan from cufinufft._cufinufft import _destroy_planf -from pycuda.gpuarray import GPUArray +from cufinufft import _compat # If we are shutting down python, we don't need to run __del__ @@ -206,7 +206,7 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None): _x, _y, _z = _ensure_valid_pts(_x, _y, _z, self.dim) - M = _x.size + M = _compat.get_array_size(_x) # Because FINUFFT/cufinufft are internally column major, # we will reorder the pts axes. Reordering references @@ -217,17 +217,17 @@ def setpts(self, x, y=None, z=None, s=None, t=None, u=None): # (x, y, None) ~> (y, x, None) # (x, y, z) ~> (z, y, x) # Via code, we push each dimension onto a stack of axis - fpts_axes = [_x.ptr, None, None] + fpts_axes = [_compat.get_array_ptr(_x), None, None] # We will also store references to these arrays. # This keeps python from prematurely cleaning them up. self._references.append(_x) if self.dim >= 2: - fpts_axes.insert(0, _y.ptr) + fpts_axes.insert(0, _compat.get_array_ptr(_y)) self._references.append(_y) if self.dim >= 3: - fpts_axes.insert(0, _z.ptr) + fpts_axes.insert(0, _compat.get_array_ptr(_z)) self._references.append(_z) # Then take three items off the stack as our reordered axis. @@ -278,14 +278,16 @@ def execute(self, data, out=None): req_out_shape = batch_shape + req_out_shape if out is None: - _out = GPUArray(req_out_shape, dtype=self.dtype) + _out = _compat.array_empty_like(_data, req_out_shape, dtype=self.dtype) else: _out = _ensure_array_shape(_out, "out", req_out_shape) if self.type == 1: - ier = self._exec_plan(self._plan, data.ptr, _out.ptr) + ier = self._exec_plan(self._plan, _compat.get_array_ptr(_data), + _compat.get_array_ptr(_out)) elif self.type == 2: - ier = self._exec_plan(self._plan, _out.ptr, data.ptr) + ier = self._exec_plan(self._plan, _compat.get_array_ptr(_out), + _compat.get_array_ptr(_data)) if ier != 0: raise RuntimeError('Error executing plan.') @@ -315,27 +317,21 @@ def __del__(self): def _ensure_array_type(x, name, dtype, output=False): if x is None: - return GPUArray(0, dtype=dtype, order="C") + return None - if x.dtype != dtype: + if _compat.get_array_dtype(x) != dtype: raise TypeError(f"Argument `{name}` does not have the correct dtype: " f"{x.dtype} was given, but {dtype} was expected.") - if not x.flags.c_contiguous: - if output: + if not _compat.is_array_contiguous(x): + if output or not _compat.array_can_contiguous(x): raise TypeError(f"Argument `{name}` does not satisfy the " f"following requirement: C") else: - raise TypeError(f"Argument `{name}` does not satisfy the " - f"following requirement: C") - - # Ideally we'd copy the array into the correct ordering here, but - # this does not seem possible as of pycuda 2022.2.2. - - # warnings.warn(f"Argument `{name}` does not satisfy the " - # f"following requirement: C. Copying array (this may - # reduce performance)") - # x = gpuarray.GPUArray(x, dtype=dtype, order="C") + warnings.warn(f"Argument `{name}` does not satisfy the " + f"following requirement: C. Copying array " + f"(this may reduce performance)") + x = _compat.array_contiguous(x) return x @@ -354,22 +350,21 @@ def _ensure_array_shape(x, name, shape, allow_reshape=False): else: return x + def _ensure_valid_pts(x, y, z, dim): if x.ndim != 1: raise TypeError(f"Argument `x` must be a vector") - M = x.size - if dim >= 2: y = _ensure_array_shape(y, "y", x.shape) if dim >= 3: z = _ensure_array_shape(z, "z", x.shape) - if dim < 3 and z.size > 0: + if dim < 3 and z is not None and _compat.get_array_size(z) > 0: raise TypeError(f"Plan dimension is {dim}, but `z` was specified") - if dim < 2 and y.size > 0: + if dim < 2 and y is not None and _compat.get_array_size(y) > 0: raise TypeError(f"Plan dimension is {dim}, but `y` was specified") return x, y, z diff --git a/python/cufinufft/cufinufft/_simple.py b/python/cufinufft/cufinufft/_simple.py index 2b42c9d25..ac36e90ab 100644 --- a/python/cufinufft/cufinufft/_simple.py +++ b/python/cufinufft/cufinufft/_simple.py @@ -1,4 +1,4 @@ -from cufinufft import Plan +from cufinufft import Plan, _compat def nufft1d1(x, data, n_modes=None, out=None, eps=1e-6, isign=1, **kwargs): return _invoke_plan(1, 1, x, None, None, data, out, isign, eps, n_modes, @@ -24,7 +24,7 @@ def nufft3d2(x, y, z, data, out=None, eps=1e-6, isign=-1, **kwargs): def _invoke_plan(dim, nufft_type, x, y, z, data, out, isign, eps, n_modes=None, kwargs=None): - dtype = data.dtype + dtype = _compat.get_array_dtype(data) n_trans = _get_ntrans(dim, nufft_type, data) diff --git a/python/cufinufft/examples/example2d1many.py b/python/cufinufft/examples/example2d1_pycuda.py similarity index 100% rename from python/cufinufft/examples/example2d1many.py rename to python/cufinufft/examples/example2d1_pycuda.py diff --git a/python/cufinufft/examples/example2d2many.py b/python/cufinufft/examples/example2d2_pycuda.py similarity index 100% rename from python/cufinufft/examples/example2d2many.py rename to python/cufinufft/examples/example2d2_pycuda.py diff --git a/python/cufinufft/examples/getting_started.py b/python/cufinufft/examples/getting_started_pycuda.py similarity index 100% rename from python/cufinufft/examples/getting_started.py rename to python/cufinufft/examples/getting_started_pycuda.py diff --git a/python/cufinufft/requirements.txt b/python/cufinufft/requirements.txt index fcbec6659..bc2cbbd1c 100644 --- a/python/cufinufft/requirements.txt +++ b/python/cufinufft/requirements.txt @@ -1,3 +1,2 @@ numpy -pycuda six diff --git a/python/cufinufft/tests/conftest.py b/python/cufinufft/tests/conftest.py new file mode 100644 index 000000000..56528681f --- /dev/null +++ b/python/cufinufft/tests/conftest.py @@ -0,0 +1,24 @@ +import pytest + +import utils + + +def pytest_addoption(parser): + parser.addoption("--framework", action="append", default=[], help="List of frameworks") + +def pytest_generate_tests(metafunc): + if "framework" in metafunc.fixturenames: + metafunc.parametrize("framework", metafunc.config.getoption("framework")) + +@pytest.fixture +def to_gpu(framework): + to_gpu, _ = utils.transfer_funcs(framework) + + return to_gpu + + +@pytest.fixture +def to_cpu(framework): + _, to_cpu = utils.transfer_funcs(framework) + + return to_cpu diff --git a/python/cufinufft/tests/test_array_ordering.py b/python/cufinufft/tests/test_array_ordering.py index d42fd8fa7..0fba8f8f5 100644 --- a/python/cufinufft/tests/test_array_ordering.py +++ b/python/cufinufft/tests/test_array_ordering.py @@ -2,60 +2,26 @@ import numpy as np -import pycuda.autoinit # NOQA:401 -import pycuda.gpuarray as gpuarray - -from cufinufft import Plan +from cufinufft import Plan, _compat import utils -def test_type2_ordering(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): - complex_dtype = utils._complex_dtype(dtype) - - k = utils.gen_nu_pts(M).astype(dtype) - fk = utils.gen_uniform_data(shape).astype(complex_dtype) - - fkTT = fk.T.copy().T - - k_gpu = gpuarray.to_gpu(k) - fk_gpu = gpuarray.to_gpu(fk) - fkTT_gpu = gpuarray.to_gpu(fkTT) - - plan = Plan(2, shape, eps=tol, dtype=complex_dtype) - - plan.setpts(k_gpu[0], k_gpu[1], k_gpu[2]) - - c_gpu = plan.execute(fk_gpu) - with pytest.raises(TypeError, match="following requirement: C") as err: - cTT_gpu = plan.execute(fkTT_gpu) - - # Ideally, it should be possible to get this to align with true output, - # but corrently does not look like it. - - # c = c_gpu.get() - # cTT = cTT_gpu.get() - - # assert np.allclose(c, cTT, rtol=1e-2) - - -def test_type1_ordering(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): +def test_type1_ordering(to_gpu, to_cpu, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): complex_dtype = utils._complex_dtype(dtype) k, c = utils.type1_problem(dtype, shape, M) - k_gpu = gpuarray.to_gpu(k) - c_gpu = gpuarray.to_gpu(c) + k_gpu = to_gpu(k) + c_gpu = to_gpu(c) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) plan.setpts(*k_gpu) - out_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype) - - plan.execute(c_gpu, out=out_gpu) + out = np.empty(shape, dtype=complex_dtype, order="F") - out_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype, order="F") + out_gpu = to_gpu(out) with pytest.raises(TypeError, match="following requirement: C") as err: plan.execute(c_gpu, out=out_gpu) diff --git a/python/cufinufft/tests/test_basic.py b/python/cufinufft/tests/test_basic.py index 7d0bccc90..d63c23a7a 100644 --- a/python/cufinufft/tests/test_basic.py +++ b/python/cufinufft/tests/test_basic.py @@ -2,10 +2,7 @@ import numpy as np -import pycuda.autoinit # NOQA:401 -import pycuda.gpuarray as gpuarray - -from cufinufft import Plan +from cufinufft import Plan, _compat import utils @@ -16,19 +13,21 @@ MS = [256, 1024, 4096] TOLS = [1e-2, 1e-3] OUTPUT_ARGS = [False, True] +CONTIGUOUS = [False, True] + @pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("shape", SHAPES) @pytest.mark.parametrize("M", MS) @pytest.mark.parametrize("tol", TOLS) @pytest.mark.parametrize("output_arg", OUTPUT_ARGS) -def test_type1(dtype, shape, M, tol, output_arg): +def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg): complex_dtype = utils._complex_dtype(dtype) k, c = utils.type1_problem(dtype, shape, M) - k_gpu = gpuarray.to_gpu(k) - c_gpu = gpuarray.to_gpu(c) + k_gpu = to_gpu(k) + c_gpu = to_gpu(c) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) @@ -38,12 +37,12 @@ def test_type1(dtype, shape, M, tol, output_arg): plan.setpts(*k_gpu) if output_arg: - fk_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype) + fk_gpu = _compat.array_empty_like(c_gpu, shape, dtype=complex_dtype) plan.execute(c_gpu, out=fk_gpu) else: fk_gpu = plan.execute(c_gpu) - fk = fk_gpu.get() + fk = to_cpu(fk_gpu) utils.verify_type1(k, c, fk, tol) @@ -53,39 +52,61 @@ def test_type1(dtype, shape, M, tol, output_arg): @pytest.mark.parametrize("M", MS) @pytest.mark.parametrize("tol", TOLS) @pytest.mark.parametrize("output_arg", OUTPUT_ARGS) -def test_type2(dtype, shape, M, tol, output_arg): +@pytest.mark.parametrize("contiguous", CONTIGUOUS) +def test_type2(to_gpu, to_cpu, dtype, shape, M, tol, output_arg, contiguous): complex_dtype = utils._complex_dtype(dtype) k, fk = utils.type2_problem(dtype, shape, M) - k_gpu = gpuarray.to_gpu(k) - fk_gpu = gpuarray.to_gpu(fk) - plan = Plan(2, shape, eps=tol, dtype=complex_dtype) + check_result = True + + if not contiguous and len(shape) > 1: + fk = fk.copy(order="F") + + if _compat.array_can_contiguous(to_gpu(np.empty(1))): + def _execute(*args, **kwargs): + with pytest.warns(UserWarning, match="requirement: C. Copying"): + return plan.execute(*args, **kwargs) + else: + check_result = False + + def _execute(*args, **kwargs): + with pytest.raises(TypeError, match="requirement: C"): + plan.execute(*args, **kwargs) + + else: + def _execute(*args, **kwargs): + return plan.execute(*args, **kwargs) + + k_gpu = to_gpu(k) + fk_gpu = to_gpu(fk) + plan.setpts(*k_gpu) if output_arg: - c_gpu = gpuarray.GPUArray(shape=(M,), dtype=complex_dtype) - plan.execute(fk_gpu, out=c_gpu) + c_gpu = _compat.array_empty_like(fk_gpu, (M,), dtype=complex_dtype) + _execute(fk_gpu, out=c_gpu) else: - c_gpu = plan.execute(fk_gpu) + c_gpu = _execute(fk_gpu) - c = c_gpu.get() + if check_result: + c = to_cpu(c_gpu) - utils.verify_type2(k, fk, c, tol) + utils.verify_type2(k, fk, c, tol) -def test_opts(shape=(8, 8, 8), M=32, tol=1e-3): +def test_opts(to_gpu, to_cpu, shape=(8, 8, 8), M=32, tol=1e-3): dtype = np.float32 complex_dtype = utils._complex_dtype(dtype) k, c = utils.type1_problem(dtype, shape, M) - k_gpu = gpuarray.to_gpu(k) - c_gpu = gpuarray.to_gpu(c) - fk_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype) + k_gpu = to_gpu(k) + c_gpu = to_gpu(c) + fk_gpu = _compat.array_empty_like(c_gpu, shape, dtype=complex_dtype) plan = Plan(1, shape, eps=tol, dtype=complex_dtype, gpu_sort=False, gpu_maxsubprobsize=10) @@ -94,7 +115,6 @@ def test_opts(shape=(8, 8, 8), M=32, tol=1e-3): plan.execute(c_gpu, fk_gpu) - fk = fk_gpu.get() + fk = to_cpu(fk_gpu) utils.verify_type1(k, c, fk, tol) - diff --git a/python/cufinufft/tests/test_error_checks.py b/python/cufinufft/tests/test_error_checks.py index 59798e7ca..6a9a6b4aa 100644 --- a/python/cufinufft/tests/test_error_checks.py +++ b/python/cufinufft/tests/test_error_checks.py @@ -1,15 +1,11 @@ import numpy as np import pytest -import pycuda.autoinit # NOQA:401 -import pycuda.gpuarray as gpuarray - -from cufinufft import Plan +from cufinufft import Plan, _compat import utils - -def test_set_nu_raises_on_dtype(): +def test_set_nu_raises_on_dtype(to_gpu): dtype = np.complex64 M = 4096 @@ -19,10 +15,10 @@ def test_set_nu_raises_on_dtype(): kxyz = utils.gen_nu_pts(M, dim=dim).astype(dtype) - kxyz_gpu = gpuarray.to_gpu(kxyz) + kxyz_gpu = to_gpu(kxyz) # Here we'll intentionally contruct an incorrect array dtype. - kxyz_gpu_wrong_type = gpuarray.to_gpu(kxyz.real.astype(np.float64)) + kxyz_gpu_wrong_type = to_gpu(kxyz.real.astype(np.float64)) plan = Plan(1, shape, eps=tol, dtype=dtype) @@ -40,7 +36,7 @@ def test_set_nu_raises_on_dtype(): kxyz_gpu_wrong_type[1], kxyz_gpu_wrong_type[2]) -def test_set_pts_raises_on_size(): +def test_set_pts_raises_on_size(to_gpu): dtype = np.float32 complex_dtype = np.complex64 @@ -51,7 +47,7 @@ def test_set_pts_raises_on_size(): kxyz = utils.gen_nu_pts(M, dim=dim).astype(dtype) - kxyz_gpu = gpuarray.to_gpu(kxyz) + kxyz_gpu = to_gpu(kxyz) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) @@ -62,7 +58,7 @@ def test_set_pts_raises_on_size(): plan.setpts(kxyz_gpu[0], kxyz_gpu[1], kxyz_gpu[2][:4]) -def test_set_pts_raises_on_nonvector(): +def test_set_pts_raises_on_nonvector(to_gpu): dtype = np.float32 complex_dtype = np.complex64 @@ -73,7 +69,7 @@ def test_set_pts_raises_on_nonvector(): kxyz = utils.gen_nu_pts(M, dim=dim).astype(dtype) - kxyz_gpu = gpuarray.to_gpu(kxyz) + kxyz_gpu = to_gpu(kxyz) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) @@ -81,7 +77,7 @@ def test_set_pts_raises_on_nonvector(): plan.setpts(kxyz) -def test_set_pts_raises_on_number_of_args(): +def test_set_pts_raises_on_number_of_args(to_gpu): dtype = np.float32 complex_dtype = np.complex64 @@ -92,7 +88,7 @@ def test_set_pts_raises_on_number_of_args(): kxyz = utils.gen_nu_pts(M, dim=3).astype(dtype) - kxyz_gpu = gpuarray.to_gpu(kxyz) + kxyz_gpu = to_gpu(kxyz) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) @@ -112,7 +108,7 @@ def test_wrong_field_names(): plan = Plan(1, (8, 8), foo="bar") -def test_exec_raises_on_dtype(): +def test_exec_raises_on_dtype(to_gpu): dtype = np.float32 complex_dtype = np.complex64 @@ -123,14 +119,17 @@ def test_exec_raises_on_dtype(): kxyz = utils.gen_nu_pts(M, dim=dim).astype(dtype) c = utils.gen_nonuniform_data(M).astype(complex_dtype) - c_gpu = gpuarray.to_gpu(c) + c_gpu = to_gpu(c) # Using c.real gives us wrong dtype here... - c_gpu_wrong_dtype = gpuarray.to_gpu(c.real) + # Need contiguous here since numba does not allow transfers of + # non-contiguous arrays. + c_gpu_wrong_dtype = to_gpu(np.ascontiguousarray(c.real)) - kxyz_gpu = gpuarray.to_gpu(kxyz) - fk_gpu = gpuarray.GPUArray(shape, dtype=complex_dtype) + kxyz_gpu = to_gpu(kxyz) + fk_gpu = _compat.array_empty_like(kxyz_gpu, shape, dtype=complex_dtype) # Here we'll intentionally contruct an incorrect array dtype. - fk_gpu_wrong_dtype = gpuarray.GPUArray(shape, dtype=np.complex128) + fk_gpu_wrong_dtype = _compat.array_empty_like(fk_gpu, shape, + dtype=np.complex128) plan = Plan(1, shape, eps=tol, dtype=complex_dtype) diff --git a/python/cufinufft/tests/test_examples.py b/python/cufinufft/tests/test_examples.py index 34fe610a8..c6fb5dd45 100644 --- a/python/cufinufft/tests/test_examples.py +++ b/python/cufinufft/tests/test_examples.py @@ -17,5 +17,11 @@ scripts.append(os.path.join(examples_dir, filename)) @pytest.mark.parametrize("filename", scripts) -def test_example(filename): - subprocess.check_call([sys.executable, filename]) +def test_example(filename, request): + # Extract framework from format `example_framework.py`. + framework = Path(filename).stem.split("_")[-1] + + if framework in request.config.getoption("framework"): + subprocess.check_call([sys.executable, filename]) + else: + pytest.skip("Example not in list of frameworks") diff --git a/python/cufinufft/tests/test_multi.py b/python/cufinufft/tests/test_multi.py index 9115ca484..a8e392fed 100644 --- a/python/cufinufft/tests/test_multi.py +++ b/python/cufinufft/tests/test_multi.py @@ -1,16 +1,18 @@ import pytest import numpy as np - -import pycuda.driver as drv -import pycuda.gpuarray as gpuarray - from cufinufft import Plan import utils -def test_multi_type1(dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): +def test_multi_type1(framework, dtype=np.float32, shape=(16, 16, 16), M=4096, tol=1e-3): + if framework == "pycuda": + import pycuda.driver as drv + import pycuda.gpuarray as gpuarray + else: + pytest.skip("Multi-GPU support only tested for pycuda") + complex_dtype = utils._complex_dtype(dtype) drv.init() diff --git a/python/cufinufft/tests/test_simple.py b/python/cufinufft/tests/test_simple.py index b1a9d319a..f51a137f0 100644 --- a/python/cufinufft/tests/test_simple.py +++ b/python/cufinufft/tests/test_simple.py @@ -2,10 +2,8 @@ import numpy as np -import pycuda.autoinit -import pycuda.gpuarray as gpuarray - import cufinufft +from cufinufft import _compat import utils @@ -22,7 +20,7 @@ @pytest.mark.parametrize("M", MS) @pytest.mark.parametrize("tol", TOLS) @pytest.mark.parametrize("output_arg", OUTPUT_ARGS) -def test_simple_type1(dtype, shape, n_trans, M, tol, output_arg): +def test_simple_type1(to_gpu, to_cpu, dtype, shape, n_trans, M, tol, output_arg): real_dtype = dtype complex_dtype = utils._complex_dtype(dtype) @@ -35,20 +33,21 @@ def test_simple_type1(dtype, shape, n_trans, M, tol, output_arg): k, c = utils.type1_problem(dtype, shape, M, n_trans=n_trans) - k_gpu = gpuarray.to_gpu(k) - c_gpu = gpuarray.to_gpu(c) + k_gpu = to_gpu(k) + c_gpu = to_gpu(c) if output_arg: # Ensure that output array has proper shape i.e., (N1, ...) for no # batch, (1, N1, ...) for batch of size one, and (n, N1, ...) for # batch of size n. - fk_gpu = gpuarray.GPUArray(n_trans + shape, dtype=complex_dtype) + fk_gpu = _compat.array_empty_like(c_gpu, n_trans + shape, + dtype=complex_dtype) fun(*k_gpu, c_gpu, out=fk_gpu, eps=tol) else: fk_gpu = fun(*k_gpu, c_gpu, shape, eps=tol) - fk = fk_gpu.get() + fk = to_cpu(fk_gpu) utils.verify_type1(k, c, fk, tol) @@ -59,7 +58,7 @@ def test_simple_type1(dtype, shape, n_trans, M, tol, output_arg): @pytest.mark.parametrize("M", MS) @pytest.mark.parametrize("tol", TOLS) @pytest.mark.parametrize("output_arg", OUTPUT_ARGS) -def test_simple_type2(dtype, shape, n_trans, M, tol, output_arg): +def test_simple_type2(to_gpu, to_cpu, dtype, shape, n_trans, M, tol, output_arg): real_dtype = dtype complex_dtype = utils._complex_dtype(dtype) @@ -71,16 +70,17 @@ def test_simple_type2(dtype, shape, n_trans, M, tol, output_arg): k, fk = utils.type2_problem(dtype, shape, M, n_trans=n_trans) - k_gpu = gpuarray.to_gpu(k) - fk_gpu = gpuarray.to_gpu(fk) + k_gpu = to_gpu(k) + fk_gpu = to_gpu(fk) if output_arg: - c_gpu = gpuarray.GPUArray(n_trans + (M,), dtype=complex_dtype) + c_gpu = _compat.array_empty_like(fk_gpu, n_trans + (M,), + dtype=complex_dtype) fun(*k_gpu, fk_gpu, eps=tol, out=c_gpu) else: c_gpu = fun(*k_gpu, fk_gpu, eps=tol) - c = c_gpu.get() + c = to_cpu(c_gpu) utils.verify_type2(k, fk, c, tol) diff --git a/python/cufinufft/tests/utils.py b/python/cufinufft/tests/utils.py index 5bace0d71..9ea3281f3 100644 --- a/python/cufinufft/tests/utils.py +++ b/python/cufinufft/tests/utils.py @@ -126,3 +126,32 @@ def verify_type2(k, fk, c, tol): type2_rel_err = np.linalg.norm(c_target - c_est) / np.linalg.norm(c_target) assert type2_rel_err < 25 * tol + + +def transfer_funcs(module_name): + if module_name == "pycuda": + import pycuda.autoinit # NOQA:401 + from pycuda.gpuarray import to_gpu + def to_cpu(obj): + return obj.get() + elif module_name == "cupy": + import cupy + def to_gpu(obj): + return cupy.array(obj) + def to_cpu(obj): + return obj.get() + elif module_name == "numba": + import numba.cuda + to_gpu = numba.cuda.to_device + def to_cpu(obj): + return obj.copy_to_host() + elif module_name == "torch": + import torch + def to_gpu(obj): + return torch.as_tensor(obj, device=torch.device("cuda")) + def to_cpu(obj): + return obj.cpu().numpy() + else: + raise TypeError(f"Unsupported framework: {module_name}") + + return to_gpu, to_cpu diff --git a/tools/cufinufft/docker/cuda11.0/Dockerfile-x86_64 b/tools/cufinufft/docker/cuda11.0/Dockerfile-x86_64 index 4b09fc392..29954df1d 100644 --- a/tools/cufinufft/docker/cuda11.0/Dockerfile-x86_64 +++ b/tools/cufinufft/docker/cuda11.0/Dockerfile-x86_64 @@ -5,8 +5,7 @@ ENV CUDA_MAJOR 11 ENV CUDA_MINOR 0 ENV CUDART_VERSION 11.0.171 -ENV CUFFT_VERSION 10.2.1.245 -ENV CURAND_VERSION 10.2.1.245 +ENV CUDA_LIBRARIES_VERSION 11.0.3 ENV NVPROF_VERSION 11.0.221 ENV NVTX_VERSION 11.0.167 ENV NVCC_VERSION 11.0.221 @@ -43,16 +42,14 @@ ENV NVIDIA_REQUIRE_CUDA "cuda>=${CUDA_DOT_VERSION} brand=tesla,driver>=418,drive # runtime RUN yum install -y \ - libcufft-${CUDA_DASH_VERSION}-${CUFFT_VERSION}-1 \ - libcurand-${CUDA_DASH_VERSION}-${CURAND_VERSION}-1 \ + cuda-libraries-${CUDA_DASH_VERSION}-${CUDA_LIBRARIES_VERSION}-1 \ cuda-nvtx-${CUDA_DASH_VERSION}-${NVTX_VERSION}-1 && \ rm -rf /var/cache/yum/* # devel RUN yum install -y \ cuda-cudart-devel-${CUDA_DASH_VERSION}-${CUDART_VERSION}-1 \ - libcufft-devel-${CUDA_DASH_VERSION}-${CUFFT_VERSION}-1 \ - libcurand-devel-${CUDA_DASH_VERSION}-${CURAND_VERSION}-1 \ + cuda-libraries-devel-${CUDA_DASH_VERSION}-${CUDA_LIBRARIES_VERSION}-1 \ cuda-nvprof-${CUDA_DASH_VERSION}-${NVPROF_VERSION}-1 \ cuda-nvcc-${CUDA_DASH_VERSION}-${NVCC_VERSION}-1 && \ rm -rf /var/cache/yum/*