Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
mreineck committed Oct 22, 2024
2 parents 2b2dbc2 + 5340e0b commit a4e0dba
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 39 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/cmake_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,33 @@ jobs:
working-directory: ./build
run: |
ctest -C ${{matrix.build_type}} --output-on-failure
- name: Set up Python
if: matrix.finufft_static_linking
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Build Python wheels
if: matrix.finufft_static_linking
env:
MACOSX_DEPLOYMENT_TARGET: 13
shell: bash
run: |
python3 -m pip install \
--verbose \
-C cmake.define.CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-C cmake.define.FINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }} \
-C cmake.define.FINUFFT_USE_DUCC0=${{ matrix.ducc_fft }} \
python/finufft
- name: Install pytest
if: matrix.finufft_static_linking
run: |
python3 -m pip install --upgrade pip
python3 -m pip install pytest
- name: Test Python package
if: matrix.finufft_static_linking
run: |
python3 -m pytest python/finufft/test
76 changes: 40 additions & 36 deletions include/cufinufft/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
using namespace cufinufft::common;
int ier;
if (type < 1 || type > 3) {
fprintf(stderr, "[%s] Invalid type (%d): should be 1 or 2.\n", __func__, type);
fprintf(stderr, "[%s] Invalid type (%d): should be 1, 2, or 3.\n", __func__, type);
return FINUFFT_ERR_TYPE_NOTVALID;
}
if (ntransf < 1) {
Expand Down Expand Up @@ -178,7 +178,8 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
}

cufinufft_setup_binsize<T>(type, d_plan->spopts.nspread, dim, &d_plan->opts);
if (ier = cudaGetLastError(), ier != cudaSuccess) {
if (cudaGetLastError() != cudaSuccess) {
ier = FINUFFT_ERR_CUDA_FAILURE;
goto finalize;
}
if (d_plan->opts.debug) {
Expand All @@ -196,6 +197,42 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
printf("[cufinufft] shared memory required for the spreader: %ld\n", mem_required);
}


// dynamically request the maximum amount of shared memory available
// for the spreader

/* Automatically set GPU method. */
if (d_plan->opts.gpu_method == 0) {
/* For type 1, we default to method 2 (SM) since this is generally faster
* if there is enough shared memory available. Otherwise, we default to GM.
* Type 3 inherits this behavior since the outer plan here is also a type 1.
*
* For type 2, we always default to method 1 (GM).
*/
if (type == 2) {
d_plan->opts.gpu_method = 1;
} else {
// query the device for the amount of shared memory available
int shared_mem_per_block{};
cudaDeviceGetAttribute(&shared_mem_per_block,
cudaDevAttrMaxSharedMemoryPerBlockOptin, device_id);
// compute the amount of shared memory required for the method
const auto shared_mem_required = shared_memory_required<T>(
dim, d_plan->spopts.nspread, d_plan->opts.gpu_binsizex,
d_plan->opts.gpu_binsizey, d_plan->opts.gpu_binsizez);
if ((shared_mem_required > shared_mem_per_block)) {
d_plan->opts.gpu_method = 1;
} else {
d_plan->opts.gpu_method = 2;
}
}
}

if (cudaGetLastError() != cudaSuccess) {
ier = FINUFFT_ERR_CUDA_FAILURE;
goto finalize;
}

if (type == 1 || type == 2) {
CUFINUFFT_BIGINT nf1 = 1, nf2 = 1, nf3 = 1;
set_nf_type12(d_plan->ms, d_plan->opts, d_plan->spopts, &nf1,
Expand All @@ -207,39 +244,6 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
set_nf_type12(d_plan->mu, d_plan->opts, d_plan->spopts, &nf3,
d_plan->opts.gpu_obinsizez);

// dynamically request the maximum amount of shared memory available
// for the spreader

/* Automatically set GPU method. */
if (d_plan->opts.gpu_method == 0) {
/* For type 1, we default to method 2 (SM) since this is generally faster
* if there is enough shared memory available. Otherwise, we default to GM.
*
* For type 2, we always default to method 1 (GM).
*/
if (type == 2) {
d_plan->opts.gpu_method = 1;
} else {
// query the device for the amount of shared memory available
int shared_mem_per_block{};
cudaDeviceGetAttribute(&shared_mem_per_block,
cudaDevAttrMaxSharedMemoryPerBlockOptin, device_id);
// compute the amount of shared memory required for the method
const auto shared_mem_required = shared_memory_required<T>(
dim, d_plan->spopts.nspread, d_plan->opts.gpu_binsizex,
d_plan->opts.gpu_binsizey, d_plan->opts.gpu_binsizez);
if ((shared_mem_required > shared_mem_per_block)) {
d_plan->opts.gpu_method = 1;
} else {
d_plan->opts.gpu_method = 2;
}
}
}

if ((ier = cudaGetLastError())) {
goto finalize;
}

d_plan->nf1 = nf1;
d_plan->nf2 = nf2;
d_plan->nf3 = nf3;
Expand Down Expand Up @@ -795,7 +799,7 @@ int cufinufft_setpts_impl(int M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_
int t2modes[] = {d_plan->nf1, d_plan->nf2, d_plan->nf3};
cufinufft_opts t2opts = d_plan->opts;
t2opts.gpu_spreadinterponly = 0;
t2opts.gpu_method = 1;
t2opts.gpu_method = 0;
// Safe to ignore the return value here?
if (d_plan->t2_plan) cufinufft_destroy_impl(d_plan->t2_plan);
// check that maxbatchsize is correct
Expand Down
16 changes: 13 additions & 3 deletions src/cuda/cufinufft.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
#include <cufinufft.h>
#include <cufinufft/impl.h>

inline bool is_invalid_mode_array(int dim, const int64_t *modes64, int32_t modes32[3]) {
inline bool is_invalid_mode_array(int type, int dim, const int64_t *modes64,
int32_t modes32[3]) {
if (type == 3) {
modes32[0] = modes32[1] = modes32[2] = 1;
return false;
}

int64_t tot_size = 1;
for (int i = 0; i < dim; ++i) {
if (modes64[i] > std::numeric_limits<int32_t>::max()) return true;
Expand All @@ -28,7 +34,9 @@ int cufinufftf_makeplan(int type, int dim, const int64_t *nmodes, int iflag, int
}

int nmodes32[3];
if (is_invalid_mode_array(dim, nmodes, nmodes32)) return FINUFFT_ERR_NDATA_NOTVALID;
if (is_invalid_mode_array(type, dim, nmodes, nmodes32)) {
return FINUFFT_ERR_NDATA_NOTVALID;
}

return cufinufft_makeplan_impl(type, dim, nmodes32, iflag, ntransf, tol,
(cufinufft_plan_t<float> **)d_plan_ptr, opts);
Expand All @@ -42,7 +50,9 @@ int cufinufft_makeplan(int type, int dim, const int64_t *nmodes, int iflag, int
}

int nmodes32[3];
if (is_invalid_mode_array(dim, nmodes, nmodes32)) return FINUFFT_ERR_NDATA_NOTVALID;
if (is_invalid_mode_array(type, dim, nmodes, nmodes32)) {
return FINUFFT_ERR_NDATA_NOTVALID;
}

return cufinufft_makeplan_impl(type, dim, nmodes32, iflag, ntransf, tol,
(cufinufft_plan_t<double> **)d_plan_ptr, opts);
Expand Down
37 changes: 37 additions & 0 deletions test/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ foreach(srcfile ${test_src})
endforeach()

function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
add_test(NAME cufinufft1d1_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 0 1 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d1_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 1 1 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})
Expand All @@ -29,13 +33,26 @@ function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
COMMAND cufinufft1d_test 2 1 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d2_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 0 2 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d2_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 1 2 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d3_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 0 3 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d3_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 1 3 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft2d1_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 0 1 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft2d1_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 1 1 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})
Expand All @@ -44,10 +61,18 @@ function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
COMMAND cufinufft2d_test 2 1 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft2d2_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 0 2 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft2d2_test_SM_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 2 2 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft2d3_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 0 3 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft2d3_test_SM_${PREC}_${UPSAMP}
COMMAND cufinufft2d_test 2 3 1e2 2e2 2e4 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})
Expand Down Expand Up @@ -76,6 +101,10 @@ function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
COMMAND cufinufft2dmany_test 2 3 1e2 2e2 5 0 2e4 ${REQ_TOL}
${CHECK_TOL} ${PREC} ${UPSAMP})

add_test(NAME cufinufft3d1_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 0 1 2 5 10 20 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft3d1_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 1 1 2 5 10 20 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})
Expand All @@ -98,10 +127,18 @@ function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
${PREC} ${UPSAMP})
endif()

add_test(NAME cufinufft3d2_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 0 2 2 5 10 20 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft3d2_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 1 2 2 5 10 20 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft3d3_test_auto_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 0 3 2 5 10 30 ${REQ_TOL} ${CHECK_TOL}
${PREC} ${UPSAMP})

add_test(NAME cufinufft3d3_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft3d_test 1 3 2 3 7 20 ${REQ_TOL} ${CHECK_TOL}*100
${PREC} ${UPSAMP})
Expand Down

0 comments on commit a4e0dba

Please sign in to comment.