Skip to content

Commit

Permalink
Optimize SR-ERI integral screening
Browse files Browse the repository at this point in the history
  • Loading branch information
sunqm committed Feb 1, 2023
1 parent c83088e commit ce65731
Show file tree
Hide file tree
Showing 18 changed files with 182 additions and 115 deletions.
14 changes: 0 additions & 14 deletions src/cint1e.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,6 @@
} \
POP_PRIM2CTR

// little endian on x86
//typedef union {
// double d;
// unsigned short s[4];
//} type_IEEE754;
static double approx_log(double x)
{
//type_IEEE754 y;
//y.d = x;
//return ((double)(y.s[3] >> 4) - 1023) * 0.7;
//return log(x);
return 2.5;
}

int CINT1e_loop_nopt(double *out, CINTEnvVars *envs, double *cache)
{
int *shls = envs->shls;
Expand Down
7 changes: 4 additions & 3 deletions src/cint1e_grids.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ int CINT1e_grids_loop(double *gctr, CINTEnvVars *envs, double *cache)
CINTOpt_log_max_pgto_coeff(log_maxcj, cj, j_prim, j_ctr);
if (CINTset_pairdata(pdata_base, ai, aj, envs->ri, envs->rj,
log_maxci, log_maxcj, envs->li_ceil, envs->lj_ceil,
i_prim, j_prim, SQUARE(envs->rirj), expcutoff)) {
i_prim, j_prim, SQUARE(envs->rirj), expcutoff, env)) {
return 0;
}

double fac1i, fac1j, expij;
double fac1i, fac1j, expij, cutoff;
double *rij;
int ip, jp, i, grids_offset, bgrids;
int empty[4] = {1, 1, 1, 1};
Expand Down Expand Up @@ -169,6 +169,7 @@ int CINT1e_grids_loop(double *gctr, CINTEnvVars *envs, double *cache)
if (pdata_ij->cceij > expcutoff) {
continue;
}
cutoff = expcutoff - pdata_ij->cceij;
envs->ai[0] = ai[ip];
expij = pdata_ij->eij;
rij = pdata_ij->rij;
Expand All @@ -182,7 +183,7 @@ int CINT1e_grids_loop(double *gctr, CINTEnvVars *envs, double *cache)
}

envs->fac[0] = fac1i;
CINTg0_1e_grids(g, envs, cache, gridsT);
CINTg0_1e_grids(g, cutoff, envs, cache, gridsT);
(*envs->f_gout)(gout, g, idx, envs, *gempty);
PRIM2CTR(i, gout, bgrids * nf * n_comp);
}
Expand Down
11 changes: 8 additions & 3 deletions src/cint2c2e.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,14 @@

#define PUSH \
if (cum == SIMDD) { \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
POP_PRIM2CTR; \
} \
envs->ai[cum] = ai[ip]; \
envs->ak[cum] = ak[kp]; \
envs->fac[cum] = fac1k; \
envs->fac[cum] = expcutoff; \
if (*iempty) { \
fp2c[np2c] = CINTiprim_to_ctr_0; \
*iempty = 0; \
Expand Down Expand Up @@ -110,15 +111,15 @@

#define RUN_REST \
if (cum == 1) { \
(*envs->f_g0_2e_simd1)(g, &bc, envs, 0); \
(*envs->f_g0_2e_simd1)(g, cutoff, &bc, envs, 0); \
(*envs->f_gout_simd1)(gout, g, idx, envs); \
} else if (cum > 1) { \
r1 = MM_SET1(1.); \
for (i = 0; i < envs->nrys_roots; i++) { \
MM_STORE(bc.u+i*SIMDD, r1); \
MM_STORE(bc.w+i*SIMDD, r1); \
} \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
} \
POP_PRIM2CTR;
Expand Down Expand Up @@ -149,6 +150,7 @@ int CINT2c2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty
double *ci = env + bas(PTR_COEFF, i_sh);
double *ck = env + bas(PTR_COEFF, k_sh);
double *coeff[2] = {ci, ck};
double expcutoff = envs->expcutoff;
int n_comp = envs->ncomp_e1 * envs->ncomp_e2 * envs->ncomp_tensor;
int nf = envs->nf;
double fac1k;
Expand Down Expand Up @@ -179,6 +181,7 @@ int CINT2c2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty
g = gout + len0; // for gx, gy, gz

ALIGNMM Rys2eT bc;
ALIGNMM double cutoff[SIMDD];
int *idx;
MALLOC_INSTACK(idx, envs->nf * 3);
int *non0ctri, *non0ctrk;
Expand Down Expand Up @@ -238,6 +241,7 @@ int CINT2c2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
double *ci = env + bas(PTR_COEFF, i_sh);
double *ck = env + bas(PTR_COEFF, k_sh);
double *coeff[2] = {ci, ck};
double expcutoff = envs->expcutoff;
int n_comp = envs->ncomp_e1 * envs->ncomp_e2 * envs->ncomp_tensor;
int nf = envs->nf;
double fac1k;
Expand Down Expand Up @@ -268,6 +272,7 @@ int CINT2c2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
g = gout + len0; // for gx, gy, gz

ALIGNMM Rys2eT bc;
ALIGNMM double cutoff[SIMDD];
double common_factor = envs->common_factor * (M_PI*M_PI*M_PI)*2/SQRTPI
* CINTcommon_fac_sp(envs->i_l) * CINTcommon_fac_sp(envs->k_l);

Expand Down
73 changes: 50 additions & 23 deletions src/cint2e.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@

#define PUSH(RIJ, RKL) \
if (cum == SIMDD) { \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
POP_PRIM2CTR; \
} \
Expand All @@ -115,6 +115,7 @@
envs->rkl[2*SIMDD+cum] = *(RKL+2); \
fac1i = fac1j * expijkl; \
envs->fac[cum] = fac1i; \
cutoff[cum] = eijcutoff - pdata_ij->cceij; \
if (*iempty) { \
fp2c[np2c] = CINTiprim_to_ctr_0; \
*iempty = 0; \
Expand Down Expand Up @@ -151,33 +152,19 @@

#define RUN_REST \
if (cum == 1) { \
(*envs->f_g0_2e_simd1)(g, &bc, envs, 0); \
(*envs->f_g0_2e_simd1)(g, cutoff, &bc, envs, 0); \
(*envs->f_gout_simd1)(gout, g, idx, envs); \
} else if (cum > 1) { \
r1 = MM_SET1(1.); \
for (i = 0; i < envs->nrys_roots; i++) { \
MM_STORE(bc.u+i*SIMDD, r1); \
MM_STORE(bc.w+i*SIMDD, r1); \
} \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
} \
POP_PRIM2CTR

// little endian on x86
//typedef union {
// double d;
// unsigned short s[4];
//} type_IEEE754;
static inline double approx_log(double x)
{
//type_IEEE754 y;
//y.d = x;
//return ((double)(y.s[3] >> 4) - 1023) * 0.7;
//return log(x);
return 3;
}

int CINT2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty)
{
int *shls = envs->shls;
Expand Down Expand Up @@ -209,6 +196,7 @@ int CINT2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty)
double *cl = env + bas(PTR_COEFF, l_sh);
double *coeff[4] = {ci, cj, ck, cl};
double expcutoff = envs->expcutoff;
double rr_ij = SQUARE(envs->rirj);
double *log_maxci, *log_maxcj, *log_maxck, *log_maxcl;
PairData *pdata_base, *pdata_ij;
MALLOC_DATA_INSTACK(log_maxci, i_prim+j_prim+k_prim+l_prim);
Expand All @@ -220,7 +208,7 @@ int CINT2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty)
CINTOpt_log_max_pgto_coeff(log_maxcj, cj, j_prim, j_ctr);
if (CINTset_pairdata(pdata_base, ai, aj, envs->ri, envs->rj,
log_maxci, log_maxcj, envs->li_ceil, envs->lj_ceil,
i_prim, j_prim, SQUARE(envs->rirj), expcutoff)) {
i_prim, j_prim, rr_ij, expcutoff, env)) {
return 0;
}
CINTOpt_log_max_pgto_coeff(log_maxck, ck, k_prim, k_ctr);
Expand Down Expand Up @@ -295,10 +283,48 @@ int CINT2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty)
g = gout + len0; // for gx, gy, gz

ALIGNMM Rys2eT bc;
double rr_kl = SQUARE(envs->rkrl);
double log_rr_kl = (envs->lk_ceil+envs->ll_ceil+1)*approx_log(rr_kl+1)/2;
double akl, ekl, expijkl, ccekl, eijcutoff;
ALIGNMM double cutoff[SIMDD];
ALIGNMM double rkl[4];
double rr_kl = SQUARE(envs->rkrl);
double log_rr_kl, akl, ekl, expijkl, ccekl, eijcutoff;
akl = ak[k_prim-1] + al[l_prim-1];
log_rr_kl = 1.7 - 1.5 * approx_log(akl);
int lkl = envs->lk_ceil + envs->ll_ceil;
#ifdef WITH_RANGE_COULOMB
double omega = env[PTR_RANGE_OMEGA];
if (omega < 0) {
// Normally the factor
// (aj*d/aij+theta*R)^li * (ai*d/aij+theta*R)^lj * pi^1.5/aij^{(li+lj+3)/2}
// is a good approximation for polynomial parts in SR-ERIs.
// <~ (aj*d/aij+theta*R)^li * (ai*d/aij+theta*R)^lj * (pi/aij)^1.5
// <~ (d+theta*R)^li * (d+theta*R)^lj * (pi/aij)^1.5
if (envs->nrys_roots > 1) {
double r_guess = 8.;
double omega2 = omega * omega;
int lij = envs->li_ceil + envs->lj_ceil;
if (lij > 0) {
double aij = ai[i_prim-1] + aj[j_prim-1];
double dist_ij = sqrt(rr_ij);
double theta = omega2 / (omega2 + aij);
expcutoff += lij * approx_log(
(dist_ij+theta*r_guess+1.)/(dist_ij+1.));
}
if (lkl > 0) {
double theta = omega2 / (omega2 + akl);
log_rr_kl += lkl * approx_log(
sqrt(rr_kl) + theta*r_guess + 1.);
}
}
} else {
if (lkl > 0) {
log_rr_kl += lkl * approx_log(sqrt(rr_kl) + 1.);
}
}
#else
if (lkl > 0) {
log_rr_kl += lkl * approx_log(sqrt(rr_kl) + 1.);
}
#endif

INITSIMD;

Expand Down Expand Up @@ -397,7 +423,7 @@ int CINT2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
MALLOC_DATA_INSTACK(_pdata_ij, i_prim*j_prim + k_prim*l_prim);
if (CINTset_pairdata(_pdata_ij, ai, aj, envs->ri, envs->rj,
log_maxci, log_maxcj, envs->li_ceil, envs->lj_ceil,
i_prim, j_prim, SQUARE(envs->rirj), expcutoff)) {
i_prim, j_prim, SQUARE(envs->rirj), expcutoff, env)) {
return 0;
}

Expand All @@ -406,7 +432,7 @@ int CINT2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
_pdata_kl = _pdata_ij + i_prim*j_prim;
if (CINTset_pairdata(_pdata_kl, ak, al, envs->rk, envs->rl,
log_maxck, log_maxcl, envs->lk_ceil, envs->ll_ceil,
k_prim, l_prim, SQUARE(envs->rkrl), expcutoff)) {
k_prim, l_prim, SQUARE(envs->rkrl), expcutoff, env)) {
return 0;
}
}
Expand Down Expand Up @@ -479,6 +505,7 @@ int CINT2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
int *non0idxl = opt->sortedidx[l_sh];
int *non0ctr[4] = {non0ctri, non0ctrj, non0ctrk, non0ctrl};
int *non0idx[4] = {non0idxi, non0idxj, non0idxk, non0idxl};
ALIGNMM double cutoff[SIMDD];

INITSIMD;

Expand Down
14 changes: 8 additions & 6 deletions src/cint3c2e.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@

#define PUSH(RIJ, EXPIJ) \
if (cum == SIMDD) { \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
POP_PRIM2CTR; \
} \
Expand All @@ -81,6 +81,7 @@
envs->rij[2*SIMDD+cum] = *(RIJ+2); \
fac1i = fac1j * EXPIJ; \
envs->fac[cum] = fac1i; \
cutoff[cum] = expcutoff - pdata_ij->cceij; \
if (*iempty) { \
fp2c[np2c] = CINTiprim_to_ctr_0; \
*iempty = 0; \
Expand Down Expand Up @@ -114,15 +115,15 @@

#define RUN_REST \
if (cum == 1) { \
(*envs->f_g0_2e_simd1)(g, &bc, envs, 0); \
(*envs->f_g0_2e_simd1)(g, cutoff, &bc, envs, 0); \
(*envs->f_gout_simd1)(gout, g, idx, envs); \
} else if (cum > 1) { \
r1 = MM_SET1(1.); \
for (i = 0; i < envs->nrys_roots; i++) { \
MM_STORE(bc.u+i*SIMDD, r1); \
MM_STORE(bc.w+i*SIMDD, r1); \
} \
(*envs->f_g0_2e)(g, &bc, envs, cum); \
(*envs->f_g0_2e)(g, cutoff, &bc, envs, cum); \
(*envs->f_gout)(gout, g, idx, envs); \
} \
POP_PRIM2CTR
Expand Down Expand Up @@ -169,7 +170,7 @@ int CINT3c2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty
CINTOpt_log_max_pgto_coeff(log_maxcj, cj, j_prim, j_ctr);
if (CINTset_pairdata(pdata_base, ai, aj, envs->ri, envs->rj,
log_maxci, log_maxcj, envs->li_ceil, envs->lj_ceil,
i_prim, j_prim, SQUARE(envs->rirj), expcutoff)) {
i_prim, j_prim, SQUARE(envs->rirj), expcutoff, env)) {
return 0;
}

Expand Down Expand Up @@ -233,6 +234,7 @@ int CINT3c2e_loop_nopt(double *out, CINTEnvVars *envs, double *cache, int *empty
g = gout + len0; // for gx, gy, gz

ALIGNMM Rys2eT bc;
ALIGNMM double cutoff[SIMDD];
INITSIMD;

PairData *pdata_ij;
Expand Down Expand Up @@ -313,7 +315,7 @@ int CINT3c2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
MALLOC_DATA_INSTACK(pdata_base, i_prim*j_prim);
if (CINTset_pairdata(pdata_base, ai, aj, envs->ri, envs->rj,
log_maxci, log_maxcj, envs->li_ceil, envs->lj_ceil,
i_prim, j_prim, SQUARE(envs->rirj), expcutoff)) {
i_prim, j_prim, SQUARE(envs->rirj), expcutoff, env)) {
return 0;
}
}
Expand Down Expand Up @@ -376,7 +378,7 @@ int CINT3c2e_loop(double *out, CINTEnvVars *envs, double *cache, int *empty)
g = gout + len0; // for gx, gy, gz

ALIGNMM Rys2eT bc;

ALIGNMM double cutoff[SIMDD];
INITSIMD;

for (kp = 0; kp < k_prim; kp++) {
Expand Down
2 changes: 1 addition & 1 deletion src/fmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#define SML_FLOAT80 2.0e-20
#define SQRTPIE4 .8862269254527580136490837416705725913987747280611935641069038949264
#define SQRTPIE4l .8862269254527580136490837416705725913987747280611935641069038949264l
#define ERFC_bound 350
#define ERFC_bound 200

#ifdef HAVE_QUADMATH_H
#include <quadmath.h>
Expand Down
2 changes: 1 addition & 1 deletion src/g1e_grids.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void CINTinit_int1e_grids_EnvVars(CINTEnvVars *envs, int *ng, int *shls,
r[ig+GRID_BLKSIZE*1]*r[ig+GRID_BLKSIZE*1] + \
r[ig+GRID_BLKSIZE*2]*r[ig+GRID_BLKSIZE*2])

int CINTg0_1e_grids(double *RESTRICT g, CINTEnvVars *envs,
int CINTg0_1e_grids(double *RESTRICT g, double cutoff, CINTEnvVars *envs,
double *cache, double *RESTRICT gridsT)
{
int ngrids = envs->ngrids;
Expand Down
3 changes: 2 additions & 1 deletion src/g1e_grids.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
void CINTinit_int1e_grids_EnvVars(CINTEnvVars *envs, int *ng, int *shls,
int *atm, int natm, int *bas, int nbas, double *env);

int CINTg0_1e_grids(double *g, CINTEnvVars *envs, double *cache, double *gridsT);
int CINTg0_1e_grids(double *g, double cutoff, CINTEnvVars *envs,
double *cache, double *gridsT);
void CINTg0_1e_grids_igtj(double *g, CINTEnvVars *envs);
void CINTg0_1e_grids_iltj(double *g, CINTEnvVars *envs);

Expand Down
5 changes: 3 additions & 2 deletions src/g2e.c
Original file line number Diff line number Diff line change
Expand Up @@ -1868,7 +1868,7 @@ void CINTg0_2e_il2d4d(double *g, Rys2eT *bc, CINTEnvVars *envs)
/*
* g[i,k,l,j] = < ik | lj > = ( i j | k l )
*/
int CINTg0_2e(double *g, Rys2eT *bc, CINTEnvVars *envs, int count)
int CINTg0_2e(double *g, double *cutoff, Rys2eT *bc, CINTEnvVars *envs, int count)
{
ALIGNMM double aij[SIMDD];
ALIGNMM double akl[SIMDD];
Expand Down Expand Up @@ -1978,7 +1978,8 @@ int CINTg0_2e(double *g, Rys2eT *bc, CINTEnvVars *envs, int count)
//ABORT }
#ifdef WITH_RANGE_COULOMB
if (omega < 0) {
int all_negligible = _CINTsr_rys_roots_batch(envs, x, theta, u, w, count);
int all_negligible = _CINTsr_rys_roots_batch(
envs, x, theta, u, w, cutoff, count);
if (all_negligible) {
// g still has to be evaluated since iempty (which
// indicates whether g is zero) in cint2e is determined
Expand Down
6 changes: 4 additions & 2 deletions src/g2e.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ void CINTinit_int2e_stg_EnvVars(CINTEnvVars *envs, int *ng, int *shls,
void CINTinit_int2e_yp_EnvVars(CINTEnvVars *envs, int *ng, int *shls,
int *atm, int natm, int *bas, int nbas, double *env);

int CINTg0_2e(double *g, Rys2eT *bc, CINTEnvVars *envs, int count);
int CINTg0_2e_simd1(double *g, Rys2eT *bc, CINTEnvVars *envs, int idsimd);
int CINTg0_2e(double *g, double *cutoff,
Rys2eT *bc, CINTEnvVars *envs, int count);
int CINTg0_2e_simd1(double *g, double *cutoff,
Rys2eT *bc, CINTEnvVars *envs, int idsimd);
void CINTg0_2e_2d(double *g, Rys2eT *bc, CINTEnvVars *envs);
void CINTg0_2e_2d_simd1(double *g, Rys2eT *bc, CINTEnvVars *envs);
void CINTg0_2e_lj2d4d(double *g, Rys2eT *bc, CINTEnvVars *envs);
Expand Down
Loading

0 comments on commit ce65731

Please sign in to comment.