Skip to content

Commit

Permalink
...EJB
Browse files Browse the repository at this point in the history
  • Loading branch information
ebylaska committed Dec 7, 2023
1 parent 78e92c1 commit 3db0caf
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 34 deletions.
6 changes: 3 additions & 3 deletions Nwpw/band/cpsd/band_cpsd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,9 @@ int band_cpsd(MPI_Comm comm_world0, std::string &rtdbstring)
while (!done)
{
++icount;
// band_inner_loop(control, &mygrid, &myion, &mykin, &mycoulomb, &myxc, &mypsp,
// &mystrfac, &myewald, psi1, psi2, Hpsi, psi_r, dn, hml, lmbda, E,
// &deltae, &deltac, &deltar);
band_inner_loop(control, &mygrid, &myion, &mykin, &mycoulomb, &myxc, &mypsp,
&mystrfac, &myewald, psi1, psi2, Hpsi, psi_r, dn, hml, lmbda, E,
&deltae, &deltac, &deltar);

// mydfpt.start(psi1,psi_r

Expand Down
6 changes: 6 additions & 0 deletions Nwpw/band/cpsd/band_inner_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,16 @@ void band_inner_loop(Control2 &control, Cneb *mygrid, Ion *myion,
vc = mygrid->c_pack_allocate(0);
vcall = mygrid->c_pack_allocate(0);

std::cout << "HERA" << std::endl;
vpsi = x;

//fion = new double[3 * (myion->nion)]();
fion = myion->fion1;

/* generate local psp*/
mypsp->v_local(vl,false,dng,fion);

std::cout << "HERb" << std::endl;

// myewald->phafac();

Expand All @@ -84,6 +87,7 @@ void band_inner_loop(Control2 &control, Cneb *mygrid, Ion *myion,
{
mygrid->g_zero(Hpsi);
mygrid->gg_copy(psi2, psi1);
std::cout << "HERc" << std::endl;

if (move)
{
Expand All @@ -94,7 +98,9 @@ void band_inner_loop(Control2 &control, Cneb *mygrid, Ion *myion,
}

/* convert psi(G) to psi(r) - Expensive */
std::cout << "HERd" << std::endl;
mygrid->gh_fftb(psi1,psi_r);
std::cout << "HERe" << std::endl;

/* generate dn */
mygrid->hr_aSumSqr(scal2,psi_r,dn);
Expand Down
59 changes: 33 additions & 26 deletions Nwpw/nwpwlib/C3dB/CGrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,9 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,

aqsize = 0;
alast_index = aqmax - 1;
aqindx = new (std::nothrow) int[aqmax]();
aqindx = new (std::nothrow) int[aqmax]();
aqstatus = new (std::nothrow) int[aqmax]();
aqnbb = new (std::nothrow) int[aqmax]();
atmp = new (std::nothrow) double[2*aqmax*2*nfft3d]();

bqmax = pfft3_qsize0;
Expand All @@ -529,8 +530,9 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
//bqmax = aqmax;
bqsize = 0;
blast_index = bqmax - 1;
bqindx = new (std::nothrow) int[bqmax]();
bqindx = new (std::nothrow) int[bqmax]();
bqstatus = new (std::nothrow) int[bqmax]();
bqnbb = new (std::nothrow) int[bqmax]();
btmp = new (std::nothrow) double[2*bqmax*2*nfft3d]();

/* initialize async buffer data for pfft */
Expand Down Expand Up @@ -1041,7 +1043,6 @@ void CGrid::cr_pfft3b(const int nb, double *a)
*** A(nx,ny,nz) <- fft1d^(-1)[A(kx,ny,nz)] ***
************************************************/
c3db::mygdevice.batch_cfftx_tmpx(c3db::fft_tag,false, nx, ny * nq, 2*nfft3d, a, c3db::tmpx);
c3db::zeroend_fftb(nx, ny, nq, 1, a);
}

/*************************
Expand Down Expand Up @@ -1508,6 +1509,7 @@ void CGrid::pfftby(const int nb, double *tmp1, double *tmp2, int request_indx)
void CGrid::pfftbx(const int nb, double *tmp1, double *tmp2, int request_indx)
{

std::cout << "pfftbx maptype=" << maptype << std::endl;
/**********************
**** slab mapping ****
**********************/
Expand All @@ -1518,22 +1520,23 @@ void CGrid::pfftbx(const int nb, double *tmp1, double *tmp2, int request_indx)
*** A(nx,ny,nz) <- fft1d^(-1)[A(kx,ny,nz)] ***
************************************************/
c3db::mygdevice.batch_cfftx_tmpx(c3db::fft_tag,false, nx, ny * nq, 2*nfft3d, tmp2, c3db::tmpx);
c3db::zeroend_fftb(nx, ny, nq, 1, tmp2);
std::memcpy(tmp1, tmp2, 2*nfft3d * sizeof(double));
}
/*************************
**** hilbert mapping ****
*************************/
else
{
std::cout << "startptranspose 3" << std::endl;
c3db::c_ptranspose_ijk_end(nb, 3, tmp1, tmp2, request_indx);

std::cout << "intto fftx" << std::endl;
/************************************************
*** do fft along kx dimension ***
*** A(nx,ny,nz) <- fft1d^(-1)[A(kx,ny,nz)] ***
************************************************/
c3db::mygdevice.batch_cfftx_tmpx(c3db::fft_tag,false, nx, nq1, 2*nfft3d, tmp1, c3db::tmpx);
c3db::zeroend_fftb(nx, nq1, 1, 1, tmp1);
std::cout << "out fftx" << std::endl;
if (2*nfft3d_map < 2*nfft3d)
std::memset(tmp1 + 2*nfft3d_map, 0, (2*nfft3d - 2*nfft3d_map) * sizeof(double));
}
Expand All @@ -1547,6 +1550,7 @@ void CGrid::pfftbx(const int nb, double *tmp1, double *tmp2, int request_indx)
void CGrid::pfftb_step(const int step, const int nb, double *a, double *tmp1,
double *tmp2, const int request_indx)
{
std::cout << "into pfftb_step step=" << step << " nb=" << nb << std::endl;
if (step == 0) {
// c3db::parall->astart(request_indx,parall->np_i());

Expand All @@ -1568,6 +1572,7 @@ void CGrid::pfftb_step(const int step, const int nb, double *a, double *tmp1,
pfftby(nb, tmp1, tmp2, request_indx);
} else if (step == 5) {
// pfftbx mem->dev->dev->mem
std::cout << "into pfftbx nb=" << nb << std::endl;
pfftbx(nb, tmp1, tmp2, request_indx);
// c3db::parall->aend(request_indx);
}
Expand Down Expand Up @@ -2068,7 +2073,6 @@ void CGrid::pfftbx_end(const int nb, double *tmp1, double *tmp2, int request_ind
*** A(nx,ny,nz) <- fft1d^(-1)[A(kx,ny,nz)] ***
************************************************/
c3db::mygdevice.batch_cfftx_stages_tmpx(2,c3db::fft_tag,false, nx, ny * nq, 2*nfft3d, tmp2, c3db::tmpx,da_indx);
c3db::zeroend_fftb(nx, ny, nq, 1, tmp2);
std::memcpy(tmp1, tmp2, 2*nfft3d * sizeof(double));
}
/*************************
Expand All @@ -2081,7 +2085,6 @@ void CGrid::pfftbx_end(const int nb, double *tmp1, double *tmp2, int request_ind
*** A(nx,ny,nz) <- fft1d^(-1)[A(kx,ny,nz)] ***
************************************************/
c3db::mygdevice.batch_cfftx_stages_tmpx(2,c3db::fft_tag,false, nx, nq1, 2*nfft3d, tmp1, c3db::tmpx,da_indx);
c3db::zeroend_fftb(nx, nq1, 1, 1, tmp1);
if (2*nfft3d_map < 2*nfft3d)
std::memset(tmp1 + 2*nfft3d_map, 0, (2*nfft3d - 2*nfft3d_map) * sizeof(double));
}
Expand Down Expand Up @@ -2154,9 +2157,9 @@ void CGrid::cr_pfft3b_queuein(const int nb, double *a) {
shift1 = 2*nfft3d*(2*indx);
shift2 = 2*nfft3d*(2*indx + 1);
if (staged_gpu_fft_pipeline)
pfftb_step12(status, nb, a, atmp+shift1, atmp+shift2, indx+4,indx);
pfftb_step12(status, aqnbb[indx], a, atmp+shift1, atmp+shift2, indx+4,indx);
else
pfftb_step(status, nb, a, atmp+shift1, atmp+shift2, indx+4);
pfftb_step(status, aqnbb[indx], a, atmp+shift1, atmp+shift2, indx+4);
++aqstatus[indx];
}

Expand All @@ -2166,6 +2169,7 @@ void CGrid::cr_pfft3b_queuein(const int nb, double *a) {
++aqsize;
aqindx[aqsize - 1] = alast_index;
aqstatus[alast_index] = 0;
aqnbb[alast_index] = nb;

// status = 0;
shift1 = 2*nfft3d*(2*alast_index);
Expand All @@ -2187,19 +2191,21 @@ void CGrid::cr_pfft3b_queueout(const int nb, double *a)
int shift1, shift2;
int indx1 = aqindx[0];

while (aqstatus[indx1] < aqmax) {

for (auto q = 0; q < aqsize; ++q) {
int indx = aqindx[q];
int status = aqstatus[indx] + 1;
shift1 = 2*nfft3d * (2*indx);
shift2 = 2*nfft3d * (2*indx+1);
if (staged_gpu_fft_pipeline)
pfftb_step12(status,nb,a,atmp+shift1,atmp+shift2,indx+4,indx);
else
pfftb_step(status,nb,a,atmp+shift1,atmp+shift2,indx+4);
++aqstatus[indx];
}
while (aqstatus[indx1] < aqmax)
{
for (auto q=0; q<aqsize; ++q)
{
int indx = aqindx[q];
int status = aqstatus[indx] + 1;
shift1 = 2*nfft3d * (2*indx);
shift2 = 2*nfft3d * (2*indx+1);
std::cout << "queueout q=" << q << " status=" << status << " gpu_staged=" << staged_gpu_fft_pipeline << std::endl;
if (staged_gpu_fft_pipeline)
pfftb_step12(status,aqnbb[indx],a,atmp+shift1,atmp+shift2,indx+4,indx);
else
pfftb_step(status,aqnbb[indx],a,atmp+shift1,atmp+shift2,indx+4);
++aqstatus[indx];
}
}
double scal1 = 1.0 / ((double)((nx) * (ny) * (nz)));
double enrr0 = scal1 * c3db::rr_dot(atmp, atmp);
Expand Down Expand Up @@ -2970,9 +2976,9 @@ void CGrid::rc_pfft3f_queuein(const int nb, double *b)
shift1 = 2*nfft3d * (2*indx);
shift2 = 2*nfft3d * (2*indx + 1);
if (staged_gpu_fft_pipeline)
pfftf_step10(status, nb, b, btmp + shift1, btmp + shift2, indx+4,indx);
pfftf_step10(status, bqnbb[indx], b, btmp + shift1, btmp + shift2, indx+4,indx);
else
pfftf_step(status, nb, b, btmp + shift1, btmp + shift2, indx+4);
pfftf_step(status, bqnbb[indx], b, btmp + shift1, btmp + shift2, indx+4);
++bqstatus[indx];
}

Expand All @@ -2982,6 +2988,7 @@ void CGrid::rc_pfft3f_queuein(const int nb, double *b)
++bqsize;
bqindx[bqsize - 1] = blast_index;
bqstatus[blast_index] = 0;
bqnbb[blast_index] = nb;

// status = 0;
shift1 = 2*nfft3d * (2*blast_index);
Expand Down Expand Up @@ -3011,9 +3018,9 @@ void CGrid::rc_pfft3f_queueout(const int nb, double *b)
shift1 = 2*nfft3d * (2*indx);
shift2 = 2*nfft3d * (2*indx + 1);
if (staged_gpu_fft_pipeline)
pfftf_step10(status, nb, b, btmp + shift1, btmp + shift2, indx+4,indx);
pfftf_step10(status, bqnbb[indx], b, btmp + shift1, btmp + shift2, indx+4,indx);
else
pfftf_step(status, nb, b, btmp + shift1, btmp + shift2, indx+4);
pfftf_step(status, bqnbb[indx], b, btmp + shift1, btmp + shift2, indx+4);
++bqstatus[indx];
}
}
Expand Down
6 changes: 4 additions & 2 deletions Nwpw/nwpwlib/C3dB/CGrid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ class CGrid : public k1db, public c3db {

/* pfft_queue data */
int aqmax, aqsize, alast_index;
int *aqindx, *aqstatus;
int *aqindx, *aqstatus, *aqnbb;
double *atmp;

int bqmax, bqsize, blast_index;
int *bqindx, *bqstatus;
int *bqindx, *bqstatus, *bqnbb;
double *btmp;

/* zplane data */
Expand Down Expand Up @@ -104,9 +104,11 @@ class CGrid : public k1db, public c3db {
delete [] atmp;
delete [] aqindx;
delete [] aqstatus;
delete [] aqnbb;
delete [] btmp;
delete [] bqindx;
delete [] bqstatus;
delete [] bqnbb;

// deallocate async buffer data
for (auto q=0; q<aqmax; ++q)
Expand Down
13 changes: 10 additions & 3 deletions Nwpw/nwpwlib/C3dB/Cneb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,23 +774,30 @@ void Cneb::gh_fftb(double *psi, double *psi_r)
int indx1, indx1n, shift1;
int indx2, indx2n, shift2;

n = neq[0] + neq[1];
n = (neq[0] + neq[1])*nbrillq;
shift1 = 2 * CGrid::npack1_max();
shift2 = n2ft3d;
indx1 = indx1n = 0;
indx2 = indx2n = 0;
done = 0;
while (!done)
{
std::cout << "gh indx1=" << indx1 << " indx2=" << indx2 << " neq=" << neq[0]+neq[1]<< std::endl;
if (indx1 < n)
{
cr_pfft3b_queuein(1, psi + indx1n);
std::cout << "queuein" << std::endl;
int nbq1 = (indx1/(neq[0]+neq[1])) + 1;
std::cout << "queuein In nbq1="<< nbq1 << std::endl;
cr_pfft3b_queuein(nbq1, psi + indx1n);
indx1n += shift1;
++indx1;
}
if (cr_pfft3b_queuefilled() || (indx1 >= n))
{
cr_pfft3b_queueout(1, psi_r + indx2n);
int nbq2 = (indx2/(neq[0]+neq[1])) + 1;
std::cout << "queueout In nbq2="<< nbq2 << std::endl;
cr_pfft3b_queueout(nbq2, psi_r + indx2n);
std::cout << "queueout Out" << std::endl;
indx2n += shift2;
++indx2;
}
Expand Down

0 comments on commit 3db0caf

Please sign in to comment.