Skip to content

Commit

Permalink
Fix CUDA fft template instantiation
Browse files Browse the repository at this point in the history
  • Loading branch information
xqft committed Jul 12, 2023
1 parent 1f0eff1 commit 96a4254
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
10 changes: 5 additions & 5 deletions math/src/gpu/cuda/shaders/fft/fft.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ inline __device__ void _radix2_dit_butterfly(Fp *input,
{
if (blockIdx.x >= butterfly_count) return;

long group_count = 1 << stage;
long half_group_size = butterfly_count / group_count;
long group = threadIdx.x / half_group_size;
int group_count = 1 << stage;
int half_group_size = butterfly_count / group_count;
int group = threadIdx.x / half_group_size;

long pos_in_group = threadIdx.x % half_group_size;
long i = threadIdx.x * 2 - pos_in_group; // multiply quotient by 2
int pos_in_group = threadIdx.x % half_group_size;
int i = threadIdx.x * 2 - pos_in_group; // multiply quotient by 2

Fp w = twiddles[group];
Fp a = input[i];
Expand Down
7 changes: 5 additions & 2 deletions math/src/gpu/cuda/shaders/field/stark256.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ namespace p256

extern "C"
{
__global__ void radix2_dit_butterfly(p256::Fp *input, const p256::Fp *twiddles)
__global__ void radix2_dit_butterfly( p256::Fp *input,
const p256::Fp *twiddles,
const int &stage,
const int &butterfly_count)
{
_radix2_dit_butterfly<p256::Fp>(input, twiddles);
_radix2_dit_butterfly<p256::Fp>(input, twiddles, stage, butterfly_count);
}
// NOTE: In order to calculate the inverse twiddles, call with _omega = _omega.inverse()
__global__ void calc_twiddles(p256::Fp *result, const p256::Fp &_omega)
Expand Down

0 comments on commit 96a4254

Please sign in to comment.