From 1d9fb8efa029bf6c7207b2562266403c61e37f46 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Wed, 4 Sep 2024 10:41:55 -0400 Subject: [PATCH 1/9] Provide a user level locking mechanism for FFTW This commit provides a basic interface for the user to provide a lock around `fftw` calls. This is because any plan manipulation functions in `fftw` (make, destroy) are not thread-safe. See example code here: ```c++ #include #include #include #include #include using namespace std; #define N 65384 void locker(void *lck) { reinterpret_cast(lck)->lock(); } void unlocker(void *lck) { reinterpret_cast(lck)->unlock(); } int main() { int64_t Ns[3]; // guru describes mode array by vector [N1,N2..] Ns[0] = N; recursive_mutex lck; finufft_opts opts; finufft_default_opts(&opts); opts.nthreads = 1; opts.debug = 0; opts.fftw_lock_fun = locker; opts.fftw_unlock_fun = unlocker; opts.fftw_lock_data = reinterpret_cast(&lck); // random nonuniform points (x) and complex strengths (c)... vector> c(N); // init FFTW threads fftw_init_threads(); // FFTW and FINUFFT execution using OpenMP parallelization #pragma omp parallel for for (int j = 0; j < 100; ++j) { // allocate output array for FFTW... vector> F1(N); // FFTW plan lck.lock(); fftw_plan_with_nthreads(1); fftw_plan plan = fftw_plan_dft_1d(N, reinterpret_cast(c.data()), reinterpret_cast(F1.data()), FFTW_FORWARD, FFTW_ESTIMATE); fftw_destroy_plan(plan); lck.unlock(); // FINUFFT plan finufft_plan nufftplan; finufft_makeplan(1, 1, Ns, 1, 1, 1e-6, &nufftplan, &opts); finufft_destroy(nufftplan); } return 0; } ``` --- include/finufft_errors.h | 1 + include/finufft_opts.h | 6 ++++++ src/finufft.cpp | 40 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/include/finufft_errors.h b/include/finufft_errors.h index 2feaa131a..7a4157ebe 100644 --- a/include/finufft_errors.h +++ b/include/finufft_errors.h @@ -23,4 +23,5 @@ #define FINUFFT_ERR_BINSIZE_NOTVALID 18 #define FINUFFT_ERR_INSUFFICIENT_SHMEM 19 #define FINUFFT_ERR_NUM_NU_PTS_INVALID 20 +#define FINUFFT_ERR_LOCK_FUNS_INVALID 21 #endif diff --git a/include/finufft_opts.h b/include/finufft_opts.h index 0435b8c41..0d482f732 100644 --- a/include/finufft_opts.h +++ b/include/finufft_opts.h @@ -32,6 +32,12 @@ typedef struct finufft_opts { // defaults see finufft.cpp:finufft_default_opts() // atomic int spread_max_sp_size; // if >0, overrides spreader (dir=1) max subproblem size // sphinx tag (don't remove): @opts_end + + // User can provide their own FFTW planner lock functions for thread safety + // Null values ignored and use a default lock function (both or neither must be set) + void (*fftw_lock_fun)(void *); // Function ptr that locks the FFTW planner + void (*fftw_unlock_fun)(void *); // Function ptr that unlocks the FFTW planner + void *fftw_lock_data; // Data to pass to the lock functions (e.g. a mutex) } finufft_opts; // Those of the above of the form spread_* indicate pass through to finufft_spread_opts diff --git a/src/finufft.cpp b/src/finufft.cpp index b71acce87..e9ea637c1 100644 --- a/src/finufft.cpp +++ b/src/finufft.cpp @@ -89,6 +89,28 @@ namespace common { // Technically global state... // Needs to be static to avoid name collision with SINGLE/DOUBLE static std::mutex fftw_lock; + +class FFTWLockGuard { +public: + FFTWLockGuard(void (*lock_fun)(void *), void (*unlock_fun)(void *), void *lock_data) + : unlock_fun_(unlock_fun), lock_data_(lock_data) { + if (lock_fun) + lock_fun(lock_data_); + else + fftw_lock.lock(); + } + ~FFTWLockGuard() { + if (unlock_fun_) + unlock_fun_(lock_data_); + else + fftw_lock.unlock(); + } + +private: + void (*unlock_fun_)(void *); + void *lock_data_; +}; + #endif // We macro because it has no FLT args but gets compiled for both prec's... @@ -531,6 +553,9 @@ void FINUFFT_DEFAULT_OPTS(finufft_opts *o) o->maxbatchsize = 0; o->spread_nthr_atomic = -1; o->spread_max_sp_size = 0; + o->fftw_lock_fun = nullptr; + o->fftw_unlock_fun = nullptr; + o->fftw_lock_data = nullptr; // sphinx tag (don't remove): @defopts_end } @@ -568,6 +593,12 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, fprintf(stderr, "[%s] ntrans (%d) should be at least 1.\n", __func__, ntrans); return FINUFFT_ERR_NTRANS_NOTVALID; } + if (!opts->fftw_lock_fun != !opts->fftw_unlock_fun) { + fprintf(stderr, "[%s] fftw_(un)lock functions should be both null or both set\n", + __func__); + return FINUFFT_ERR_LOCK_FUNS_INVALID; + ; + } // get stuff from args... p->type = type; @@ -661,7 +692,8 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // thread-safe (can be called inside OMP) { static bool did_fftw_init = false; // the only global state of FINUFFT - std::lock_guard lock(fftw_lock); + FFTWLockGuard lock(opts->fftw_lock_fun, opts->fftw_unlock_fun, + opts->fftw_lock_data); if (!did_fftw_init) { FFTW_INIT(); // setup FFTW global state; should only do once did_fftw_init = true; // ensure other FINUFFT threads don't clash @@ -753,7 +785,8 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // fftw_plan_many_dft args: rank, gridsize/dim, howmany, in, inembed, istride, // idist, ot, onembed, ostride, odist, sign, flags { - std::lock_guard lock(fftw_lock); + FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, + p->opts.fftw_lock_data); // FFTW_PLAN_TH sets all future fftw_plan calls to use nthr_fft threads. // FIXME: Since this might override what the user wants for fftw, we'd like to @@ -1227,7 +1260,8 @@ int FINUFFT_DESTROY(FINUFFT_PLAN p) if (p->type == 1 || p->type == 2) { #ifndef FINUFFT_USE_DUCC0 { - std::lock_guard lock(fftw_lock); + FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, + p->opts.fftw_lock_data); FFTW_DE(p->fftwPlan); } #endif From 904baf3da690e8ab48f3880c26e1ebaa40c5db68 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Wed, 4 Sep 2024 15:19:19 -0400 Subject: [PATCH 2/9] fftw_lock: fix issue when null opts passed --- src/finufft.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/finufft.cpp b/src/finufft.cpp index e9ea637c1..5ea9723b3 100644 --- a/src/finufft.cpp +++ b/src/finufft.cpp @@ -593,7 +593,7 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, fprintf(stderr, "[%s] ntrans (%d) should be at least 1.\n", __func__, ntrans); return FINUFFT_ERR_NTRANS_NOTVALID; } - if (!opts->fftw_lock_fun != !opts->fftw_unlock_fun) { + if (!p->opts.fftw_lock_fun != !p->opts.fftw_unlock_fun) { fprintf(stderr, "[%s] fftw_(un)lock functions should be both null or both set\n", __func__); return FINUFFT_ERR_LOCK_FUNS_INVALID; @@ -692,8 +692,8 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // thread-safe (can be called inside OMP) { static bool did_fftw_init = false; // the only global state of FINUFFT - FFTWLockGuard lock(opts->fftw_lock_fun, opts->fftw_unlock_fun, - opts->fftw_lock_data); + FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, + p->opts.fftw_lock_data); if (!did_fftw_init) { FFTW_INIT(); // setup FFTW global state; should only do once did_fftw_init = true; // ensure other FINUFFT threads don't clash From f12988059ba6cd7ff03b3f8d7d7d62d3673536ca Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Wed, 4 Sep 2024 15:26:16 -0400 Subject: [PATCH 3/9] fftw_lock: add new opts to python bindings --- python/finufft/finufft/_finufft.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/finufft/finufft/_finufft.py b/python/finufft/finufft/_finufft.py index 99969a38c..10af6829c 100644 --- a/python/finufft/finufft/_finufft.py +++ b/python/finufft/finufft/_finufft.py @@ -85,7 +85,10 @@ class FinufftOpts(ctypes.Structure): ('spread_thread', c_int), ('maxbatchsize', c_int), ('spread_nthr_atomic', c_int), - ('spread_max_sp_size', c_int)] + ('spread_max_sp_size', c_int), + ('fftw_lock_fun', c_void_p), + ('fftw_unlock_fun', c_void_p), + ('fftw_lock_data', c_void_p)] FinufftPlan = c_void_p From 49675c063993345360e0e60a2055b7ca85ae4082 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Mon, 9 Sep 2024 11:05:02 -0400 Subject: [PATCH 4/9] threads: fix lock guard issue in osx arm64 clang --- src/finufft.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/finufft.cpp b/src/finufft.cpp index 5ea9723b3..c9552e7ad 100644 --- a/src/finufft.cpp +++ b/src/finufft.cpp @@ -93,22 +93,23 @@ static std::mutex fftw_lock; class FFTWLockGuard { public: FFTWLockGuard(void (*lock_fun)(void *), void (*unlock_fun)(void *), void *lock_data) - : unlock_fun_(unlock_fun), lock_data_(lock_data) { + : unlock_fun_(unlock_fun), lock_data_(lock_data), fftw_lock_(fftw_lock) { if (lock_fun) lock_fun(lock_data_); else - fftw_lock.lock(); + fftw_lock_.lock(); } ~FFTWLockGuard() { if (unlock_fun_) unlock_fun_(lock_data_); else - fftw_lock.unlock(); + fftw_lock_.unlock(); } private: void (*unlock_fun_)(void *); void *lock_data_; + std::mutex &fftw_lock_; }; #endif From 024bfa8434b6198b7f4ea4ac026915d050b3cd8e Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Tue, 10 Sep 2024 10:33:57 -0400 Subject: [PATCH 5/9] fftw_lock: add info to changelog and user documentation --- CHANGELOG | 6 +++ docs/opts.rst | 100 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 93 insertions(+), 13 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 41c310842..45e9d16fc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,12 @@ List of features / changes made / release notes, in reverse chronological order. If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately). +V 2.3.1 + +* Add options for user-provided FFTW locker (PR548, Blackwell). These options can be be +used to prevent crashes when a user is creating/destroying FFTW plans and +FINUFFT plans in threads simultaneously. + V 2.3.0 (9/5/24) * Switched C++ standards from C++14 to C++17, allowing various templating diff --git a/docs/opts.rst b/docs/opts.rst index 8e32829d8..c510706df 100644 --- a/docs/opts.rst +++ b/docs/opts.rst @@ -20,7 +20,7 @@ to the simple, vectorized, or guru makeplan routines. Recall how to do this from C++: .. code-block:: C++ - + // (... set up M,x,c,tol,N, and allocate F here...) finufft_opts* opts; finufft_default_opts(opts); @@ -30,7 +30,7 @@ Recall how to do this from C++: This setting produces more timing output to ``stdout``. .. warning:: - + In C/C++ and Fortran, don't forget to call the command which sets default options (``finufft_default_opts`` or ``finufftf_default_opts``) before you start changing them and passing them to FINUFFT. @@ -51,9 +51,9 @@ Here are their default settings (from ``src/finufft.cpp:finufft_default_opts``): .. literalinclude:: ../src/finufft.cpp :start-after: @defopts_start :end-before: @defopts_end - + As for quick advice, the main options you'll want to play with are: - + - ``modeord`` to flip ("fftshift") the Fourier mode ordering - ``debug`` to look at timing output (to determine if your problem is spread/interpolation dominated, vs FFT dominated) - ``nthreads`` to run with a different number of threads than the current maximum available through OpenMP (a large number can sometimes be detrimental, and very small problems can sometimes run faster on 1 thread) @@ -92,7 +92,7 @@ Data handling options .. note:: The index *sets* are the same in the two ``modeord`` choices; their ordering differs only by a cyclic shift. The FFT ordering cyclically shifts the CMCL indices $\mbox{floor}(N/2)$ to the left (often called an "fftshift"). **chkbnds**: [DEPRECATED] has no effect. - + Diagnostic options ~~~~~~~~~~~~~~~~~~~~~~~ @@ -100,7 +100,7 @@ Diagnostic options **debug**: Controls the amount of overall debug/timing output to stdout. * ``debug=0`` : silent - + * ``debug=1`` : print some information * ``debug=2`` : prints more information @@ -113,11 +113,11 @@ Diagnostic options * ``spread_debug=2`` : prints lots. This can print thousands of lines since it includes one line per *subproblem*. - + **showwarn**: Whether to print warnings (these go to stderr). - + * ``showwarn=0`` : suppresses such warnings - + * ``showwarn=1`` : prints warnings @@ -173,19 +173,93 @@ for only two settings, as follows. Otherwise, setting it to zero chooses a good **spread_thread**: in the case of multiple transforms per call (``ntr>1``, or the "many" interfaces), controls how multithreading is used to spread/interpolate each batch of data. * ``spread_thread=0`` : makes an automatic choice between the below. Recommended. - + * ``spread_thread=1`` : acts on each vector in the batch in sequence, using multithreaded spread/interpolate on that vector. It can be slightly better than ``2`` for large problems. - + * ``spread_thread=2`` : acts on all vectors in a batch (of size chosen typically to be the number of threads) simultaneously, assigning each a thread which performs a single-threaded spread/interpolate. It is much better than ``1`` for all but large problems. (Historical note: this was used by Melody Shih for the original "2dmany" interface in 2018.) .. note:: - + Historical note: A former option ``3`` has been removed. This was like ``2`` except allowing nested OMP parallelism, so multi-threaded spread-interpolate was used for each of the vectors in a batch in parallel. This was used by Andrea Malleo in 2019. We have not yet found a case where this beats both ``1`` and ``2``, hence removed it due to complications with changing the OMP nesting state in both old and new OMP versions. - + **maxbatchsize**: in the case of multiple transforms per call (``ntr>1``, or the "many" interfaces), set the largest batch size of data vectors. Here ``0`` makes an automatic choice. If you are unhappy with this, then for small problems it should equal the number of threads, while for large problems it appears that ``1`` often better (since otherwise too much simultaneous RAM movement occurs). Some further work is needed to optimize this parameter. **spread_nthr_atomic**: if non-negative: for numbers of threads up to this value, an OMP critical block for ``add_wrapped_subgrid`` is used in spreading (type 1 transforms). Above this value, instead OMP atomic writes are used, which scale better for large thread numbers. If negative, the heuristic default in the spreader is used, set in ``src/spreadinterp.cpp:setup_spreader()``. **spread_max_sp_size**: if positive, overrides the maximum subproblem (chunking) size for multithreaded spreading (type 1 transforms). Otherwise the default in the spreader is used, set in ``src/spreadinterp.cpp:setup_spreader()``, which we believe is a decent heuristic for Intel i7 and xeon machines. + + +Thread safety options (advanced) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, with FFTW as the FFT library, FINUFFT is thread safe so long as no other threads are calling FFTW plan creation/destruction routines independently of FINUFFT. If these FFTW routines are called outside of FINUFFT, then the program is liable to crash. In most cases, the calling program can simply call the FFTW routine ``fftw_make_planner_thread_safe()`` before threading out and thread safety will be maintained. However, in instances where this is less desirable, we provide a means to provide your own FFTW locking mechanism. The following example code should exercise FFTW thread safety, and can be built with ``c++ thread_test.cpp -o thread_test -lfinufft -lfftw3_threads -lfftw3 -fopenmp -std=c++11``, assuming the finufft include and library paths are set. + +.. code-block:: C++ + + + // thread_test.cpp + #include + #include + #include + + #include + #include + #include + + using namespace std; + + constexpr int N = 65384; + + void locker(void *lck) { reinterpret_cast(lck)->lock(); } + void unlocker(void *lck) { reinterpret_cast(lck)->unlock(); } + + int main() { + int64_t Ns[3]; // guru describes mode array by vector [N1,N2..] + Ns[0] = N; + recursive_mutex lck; + + finufft_opts opts; + finufft_default_opts(&opts); + opts.nthreads = 1; + opts.debug = 0; + opts.fftw_lock_fun = locker; + opts.fftw_unlock_fun = unlocker; + opts.fftw_lock_data = reinterpret_cast(&lck); + + // random nonuniform points (x) and complex strengths (c) + vector> c(N); + + // init FFTW threads + fftw_init_threads(); + + // FFTW and FINUFFT execution using OpenMP parallelization + #pragma omp parallel for + for (int j = 0; j < 100; ++j) { + // allocate output array for FFTW... + vector> F1(N); + + // FFTW plan + lck.lock(); + fftw_plan_with_nthreads(1); + fftw_plan plan = fftw_plan_dft_1d(N, reinterpret_cast(c.data()), + reinterpret_cast(F1.data()), + FFTW_FORWARD, FFTW_ESTIMATE); + fftw_destroy_plan(plan); + lck.unlock(); + + // FINUFFT plan + finufft_plan nufftplan; + finufft_makeplan(1, 1, Ns, 1, 1, 1e-6, &nufftplan, &opts); + finufft_destroy(nufftplan); + } + + return 0; + } + +**fftw_lock_fun**: ``void (fun*)(void *)`` C-style callback function to lock calls to FFTW plan manipulation routines. A ``nullptr`` or ``0`` value will be ignored. If non-null, ``fftw_unlock_fun`` must also be set. + +**fftw_unlock_fun**: ``void (fun*)(void *)`` C-style callback function to unlock calls to FFTW plan manipulation routines. A ``nullptr`` or ``0`` value will be ignored. If non-null, ``fftw_lock_fun`` must also be set. + +**fftw_lock_data**: ``void *data`` pointer, typically to the lock object itself. Pointer will be passed to ``fftw_lock_fun`` and ``fftw_unlock_fun`` if they are set. From 1e2c047fdc9ac14e03327c969052eb3cc61c7384 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Tue, 10 Sep 2024 11:28:21 -0400 Subject: [PATCH 6/9] fftw_lock: add test for new fftw_lock --- test/CMakeLists.txt | 11 ++++++++ test/fftw_lock_test.cpp | 56 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 test/fftw_lock_test.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 37050876b..b9e9fbec7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -36,6 +36,17 @@ add_test( COMMAND testutils WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +if(NOT FINUFFT_USE_DUCC0) + add_executable(fftw_lock_test fftw_lock_test.cpp) + target_compile_features(fftw_lock_test PRIVATE cxx_std_17) + finufft_link_test(fftw_lock_test) + + add_test( + NAME run_fftw_lock_test + COMMAND fftw_lock_test + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +endif() + # Add ctest definitions that run at both precisions... function(add_tests_with_prec PREC REQ_TOL CHECK_TOL SUFFIX) # All of the following should be run at OMP_NUM_THREADS=4 or something small, diff --git a/test/fftw_lock_test.cpp b/test/fftw_lock_test.cpp new file mode 100644 index 000000000..c90e184e4 --- /dev/null +++ b/test/fftw_lock_test.cpp @@ -0,0 +1,56 @@ +#include +#include +#include + +#include +#include +#include + +constexpr int N = 65384; + +void locker(void *lck) { reinterpret_cast(lck)->lock(); } +void unlocker(void *lck) { reinterpret_cast(lck)->unlock(); } + +int main() { + int64_t Ns[3]; // guru describes mode array by vector [N1,N2..] + Ns[0] = N; + std::mutex lck; + + finufft_opts opts; + finufft_default_opts(&opts); + opts.nthreads = 1; + opts.debug = 0; + opts.fftw_lock_fun = locker; + opts.fftw_unlock_fun = unlocker; + opts.fftw_lock_data = reinterpret_cast(&lck); + + // random nonuniform points (x) and complex strengths (c)... + std::vector> c(N); + + omp_set_num_threads(8); + + // init FFTW threads + fftw_init_threads(); + +// FFTW and FINUFFT execution using OpenMP parallelization +#pragma omp parallel for + for (int j = 0; j < 100; ++j) { + // allocate output array for FFTW... + std::vector> F1(N); + + // FFTW plan + lck.lock(); + fftw_plan_with_nthreads(1); + fftw_plan plan = fftw_plan_dft_1d(N, reinterpret_cast(c.data()), + reinterpret_cast(F1.data()), + FFTW_FORWARD, FFTW_ESTIMATE); + fftw_destroy_plan(plan); + lck.unlock(); + + // FINUFFT plan + finufft_plan nufftplan; + finufft_makeplan(1, 1, Ns, 1, 1, 1e-6, &nufftplan, &opts); + finufft_destroy(nufftplan); + } + return 0; +} From 8594b65bf640c54efb08038642a7ae490b05bb14 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Tue, 10 Sep 2024 14:20:39 -0400 Subject: [PATCH 7/9] fftw_lock: share lock and init between float/double --- src/finufft.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/finufft.cpp b/src/finufft.cpp index c9552e7ad..2528dbc60 100644 --- a/src/finufft.cpp +++ b/src/finufft.cpp @@ -86,9 +86,9 @@ namespace finufft { namespace common { #ifndef FINUFFT_USE_DUCC0 -// Technically global state... -// Needs to be static to avoid name collision with SINGLE/DOUBLE -static std::mutex fftw_lock; +// The only global state in finufft. Inline ensures variables shared between float/double +inline std::mutex fftw_lock; +inline bool did_fftw_init = false; class FFTWLockGuard { public: @@ -692,7 +692,6 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // Now place FFTW initialization in a lock, courtesy of OMP. Makes FINUFFT // thread-safe (can be called inside OMP) { - static bool did_fftw_init = false; // the only global state of FINUFFT FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, p->opts.fftw_lock_data); if (!did_fftw_init) { From 4dd9ad7b537fade76191a00d0836509340943782 Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Tue, 17 Sep 2024 09:42:16 -0400 Subject: [PATCH 8/9] Revert "fftw_lock: share lock and init between float/double" This reverts commit 8594b65bf640c54efb08038642a7ae490b05bb14. --- src/finufft.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/finufft.cpp b/src/finufft.cpp index bc50106f7..9ed3f7d27 100644 --- a/src/finufft.cpp +++ b/src/finufft.cpp @@ -86,9 +86,9 @@ namespace finufft { namespace common { #ifndef FINUFFT_USE_DUCC0 -// The only global state in finufft. Inline ensures variables shared between float/double -inline std::mutex fftw_lock; -inline bool did_fftw_init = false; +// Technically global state... +// Needs to be static to avoid name collision with SINGLE/DOUBLE +static std::mutex fftw_lock; class FFTWLockGuard { public: @@ -688,6 +688,7 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // Now place FFTW initialization in a lock, courtesy of OMP. Makes FINUFFT // thread-safe (can be called inside OMP) { + static bool did_fftw_init = false; // the only global state of FINUFFT FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, p->opts.fftw_lock_data); if (!did_fftw_init) { From 0dd134cc4185e58cf254fffe9f3b635b327eb97a Mon Sep 17 00:00:00 2001 From: Robert Blackwell Date: Tue, 17 Sep 2024 16:55:07 -0400 Subject: [PATCH 9/9] fftw_lock_test: add comments to source describing why --- test/fftw_lock_test.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/fftw_lock_test.cpp b/test/fftw_lock_test.cpp index c90e184e4..45e148860 100644 --- a/test/fftw_lock_test.cpp +++ b/test/fftw_lock_test.cpp @@ -6,8 +6,15 @@ #include #include +// This file tests the user locking mechanism for multi-threaded FFTW. This +// demonstrates a user lock to prevent FFTW plan calls from interfering with +// finufft plan calls (make/destroy). +// Robert Blackwell. Based on bug identified by Jonas Krimmer (9/17/24) +// See discussion at https://github.com/ludvigak/FINUFFT.jl/issues/62 + constexpr int N = 65384; +// Example user lock functions void locker(void *lck) { reinterpret_cast(lck)->lock(); } void unlocker(void *lck) { reinterpret_cast(lck)->unlock(); }