Skip to content

Commit

Permalink
Feature: add stress for DFT+U and DeltaSpin in PW code
Browse files Browse the repository at this point in the history
  • Loading branch information
dyzheng committed Sep 2, 2024
1 parent 43141e5 commit c324dfb
Show file tree
Hide file tree
Showing 9 changed files with 452 additions and 221 deletions.
1 change: 1 addition & 0 deletions source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ list(APPEND objects
stress_func_kin.cpp
stress_func_loc.cpp
stress_func_nl.cpp
stress_func_onsite.cpp
stress_func_us.cpp
stress_pw.cpp
VL_in_pw.cpp
Expand Down
5 changes: 0 additions & 5 deletions source/module_hamilt_pw/hamilt_pwdft/forces_onsite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@
#include "module_hamilt_lcao/module_dftu/dftu.h"
#include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"


#ifdef _OPENMP
#include <omp.h>
#endif

template <typename FPTYPE, typename Device>
void Forces<FPTYPE, Device>::cal_force_onsite(ModuleBase::matrix& force_onsite,
const ModuleBase::matrix& wg,
Expand Down
457 changes: 276 additions & 181 deletions source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class FS_Nonlocal_tools
* @brief calculate the dbecp_{ij} = <psi|\partial beta/\partial varepsilon_{ij}> for all beta functions
* stress_{ij} = -1/omega \sum_{n,k}f_{nk} \sum_I \sum_{lm,l'm'}D_{l,l'}^{I} becp * dbecp_{ij} also calculated
*/
void cal_dbecp_s(int ik, int npm, int ipol, int jpol, FPTYPE* stress);
void cal_dbecp_s(int ik, int npm, int ipol, int jpol, FPTYPE* stress = nullptr);
/**
* @brief calculate the dbecp_i = <psi|\partial beta/\partial \tau^I_i> for all beta functions
*/
Expand All @@ -75,7 +75,10 @@ class FS_Nonlocal_tools

void cal_force_dftu(int ik, int npm, FPTYPE* force, const int* orbital_corr, const std::complex<FPTYPE>* vu);
void cal_force_dspin(int ik, int npm, FPTYPE* force, const ModuleBase::Vector3<double>* lambda);

void cal_stress_dftu(int ik, int npm, FPTYPE* stress, const int* orbital_corr, const std::complex<FPTYPE>* vu);
void cal_stress_dspin(int ik, int npm, FPTYPE* stress, const ModuleBase::Vector3<double>* lambda);


std::complex<FPTYPE>* get_becp() { return becp; }
std::complex<FPTYPE>* get_dbecp() { return dbecp; }

Expand Down
44 changes: 20 additions & 24 deletions source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,6 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
};
// kernel for DFT+U
void operator()(const base_device::DEVICE_CPU* ctx,
const int& ipol,
const int& jpol,
const int& nkb,
const int& nbands_occ,
const int& ntype,
Expand Down Expand Up @@ -242,12 +240,12 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
const int ip_end = (orbital_l + 1) * (orbital_l + 1);
const int tlp1 = 2 * orbital_l + 1;
const int tlp1_2 = tlp1 * tlp1;
for (int ib = 0; ib < nbands_occ; ib++)
for (int ia = 0; ia < atom_na[it]; ia++)
{
const int ib2 = ib*2;
FPTYPE fac = d_wg[ik * wg_nc + ib];
for (int ia = 0; ia < atom_na[it]; ia++)
for (int ib = 0; ib < nbands_occ; ib++)
{
const int ib2 = ib*2;
FPTYPE fac = d_wg[ik * wg_nc + ib];
for (int ip1 = ip_begin; ip1 < ip_end; ip1++)
{
const int m1 = ip1 - ip_begin;
Expand All @@ -270,18 +268,16 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
local_stress -= fac * (ps[0] * dbb0 + ps[1] * dbb1 + ps[2] * dbb2 + ps[3] * dbb3).real();
}
} // end ip
vu += 4 * tlp1_2;// step for vu
} // ia
}
}// ib
vu += 4 * tlp1_2;// step for vu
}// ia
sum += atom_na[it] * nproj;
iat += atom_na[it];
} // end it
stress[ipol * 3 + jpol] += local_stress;
*stress += local_stress;
};
// kernel for DeltaSpin
void operator()(const base_device::DEVICE_CPU* ctx,
const int& ipol,
const int& jpol,
const int& nkb,
const int& nbands_occ,
const int& ntype,
Expand All @@ -300,17 +296,17 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
for (int it = 0; it < ntype; it++)
{
const int nproj = atom_nh[it];
for (int ib = 0; ib < nbands_occ; ib++)
for (int ia = 0; ia < atom_na[it]; ia++)
{
const int ib2 = ib*2;
FPTYPE fac = d_wg[ik * wg_nc + ib];
for (int ia = 0; ia < atom_na[it]; ia++)
int iat = iat0 + ia;
const std::complex<FPTYPE> coefficients0(lambda[iat*3+2], 0.0);
const std::complex<FPTYPE> coefficients1(lambda[iat*3] , lambda[iat*3+1]);
const std::complex<FPTYPE> coefficients2(lambda[iat*3] , -1 * lambda[iat*3+1]);
const std::complex<FPTYPE> coefficients3(-1 * lambda[iat*3+2], 0.0);
for (int ib = 0; ib < nbands_occ; ib++)
{
int iat = iat0 + ia;
const std::complex<FPTYPE> coefficients0(lambda[iat*3+2], 0.0);
const std::complex<FPTYPE> coefficients1(lambda[iat*3] , lambda[iat*3+1]);
const std::complex<FPTYPE> coefficients2(lambda[iat*3] , -1 * lambda[iat*3+1]);
const std::complex<FPTYPE> coefficients3(-1 * lambda[iat*3+2], 0.0);
const int ib2 = ib*2;
FPTYPE fac = d_wg[ik * wg_nc + ib];
for (int ip = 0; ip < nproj; ip++)
{
const int inkb1 = ib2 * nkb + sum + ia * nproj + ip;
Expand All @@ -321,12 +317,12 @@ struct cal_stress_nl_op<FPTYPE, base_device::DEVICE_CPU>
const std::complex<FPTYPE> dbb3 = conj(dbecp[nkb + inkb1]) * becp[nkb + inkb1];
local_stress -= fac * (coefficients0 * dbb0 + coefficients1 * dbb1 + coefficients2 * dbb2 + coefficients3 * dbb3).real();
} // end ip
} // ia
}
}// ib
}// ia
sum += atom_na[it] * nproj;
iat0 += atom_na[it];
} // end it
stress[ipol * 3 + jpol] += local_stress;
*stress += local_stress;
}
};

Expand Down
4 changes: 0 additions & 4 deletions source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ struct cal_stress_nl_op
FPTYPE* stress);
// kernel for DFT+U
void operator()(const base_device::DEVICE_CPU* ctx,
const int& ipol,
const int& jpol,
const int& nkb,
const int& nbands_occ,
const int& ntype,
Expand All @@ -143,8 +141,6 @@ struct cal_stress_nl_op
FPTYPE* stress);
// kernel for DeltaSpin
void operator()(const base_device::DEVICE_CPU* ctx,
const int& ipol,
const int& jpol,
const int& nkb,
const int& nbands_occ,
const int& ntype,
Expand Down
26 changes: 22 additions & 4 deletions source/module_hamilt_pw/hamilt_pwdft/stress_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ template <typename FPTYPE, typename Device = base_device::DEVICE_CPU>
class Stress_Func
{
public:
Stress_Func(){};
~Stress_Func(){};
Stress_Func() {};
~Stress_Func() {};

// stress functions
// 1) the stress from the electron kinetic energy
Expand Down Expand Up @@ -119,7 +119,7 @@ class Stress_Func
FPTYPE* drhocg,
ModulePW::PW_Basis* rho_basis,
int type); // used in nonlinear core correction stress

// 6) the stress from the exchange-correlation functional term
void stress_gga(ModuleBase::matrix& sigma,
ModulePW::PW_Basis* rho_basis,
Expand All @@ -134,7 +134,7 @@ class Stress_Func

// 7) the stress from the non-local pseudopotentials
/**
* @brief This routine computes the atomic force of non-local pseudopotential
* @brief This routine computes the stress contribution of non-local pseudopotential
* Stress^{NL}_{ij} = -1/\Omega \sum_{n,k}f_{nk}\sum_I \sum_{lm,l'm'}D_{l,l'}^{I} [
* \sum_G \langle c_{nk}(\mathbf{G+K})|\beta_{lm}^I(\mathbf{G+K})\rangle *
* \sum_{G'}\langle \partial \beta_{lm}^I(\mathbf{G+K})/\partial \varepsilon_{ij}
Expand All @@ -153,6 +153,24 @@ class Stress_Func
pseudopot_cell_vnl* nlpp_in,
const UnitCell& ucell_in); // nonlocal part in PW basis

// 7) the stress from the DFT+U and DeltaSpin calculations
/**
* @brief This routine computes the stress contribution from the DFT+U and DeltaSpin calculations
* Stress^{NL}_{ij} = -1/\Omega \sum_{n,k}f_{nk}\sum_I \sum_{lm,l'm'}(V^U_{lmm'\sigma\sigma'} +
* f(\lambda,\sigma\sigma')) [ \sum_G \langle c_{nk}(\mathbf{G+K})|\alpha_{lm}^I(\mathbf{G+K})\rangle *
* \sum_{G'}\langle \partial \alpha_{lm}^I(\mathbf{G+K})/\partial \varepsilon_{ij}
* |c_{nk}(\mathbf{G+K})\rangle ] there would be three parts in the above equation: (1) sum over becp and dbecp with
* f(U+\lambda, \sigma\sigma', lmm')^{I} ----- first line in the above equation (2) calculate becp = <psi | alpha>
* ----- second line in the above equation (3) calculate dbecp = <psi | dalpha> ----- third line in the above
* equation
*/
void stress_onsite(ModuleBase::matrix& sigma,
const ModuleBase::matrix& wg,
const ModulePW::PW_Basis_K* wfc_basis,
const UnitCell& ucell_in,
const psi::Psi<complex<FPTYPE>, Device>* psi_in,
ModuleSymmetry::Symmetry* p_symm); // nonlocal part in PW basis

void get_dvnl1(ModuleBase::ComplexMatrix& vkb,
const int ik,
const int ipol,
Expand Down
113 changes: 113 additions & 0 deletions source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include "module_base/module_device/device.h"
#include "module_base/timer.h"
#include "module_hamilt_pw/hamilt_pwdft/onsite_projector.h"
#include "module_parameter/parameter.h"
#include "module_hamilt_lcao/module_dftu/dftu.h"
#include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
#include "stress_func.h"
// calculate the nonlocal pseudopotential stress in PW
template <typename FPTYPE, typename Device>
void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
const ModuleBase::matrix& wg,
const ModulePW::PW_Basis_K* wfc_basis,
const UnitCell& ucell_in,
const psi::Psi<complex<FPTYPE>, Device>* psi_in,
ModuleSymmetry::Symmetry* p_symm)
{
ModuleBase::TITLE("Stress_Func", "stress_onsite");
if(psi_in == nullptr || wfc_basis == nullptr)
{
return;
}
ModuleBase::timer::tick("Stress_Func", "stress_onsite");

FPTYPE* stress_device = nullptr;
resmem_var_op()(this->ctx, stress_device, 9);
setmem_var_op()(this->ctx, stress_device, 0, 9);
std::vector<FPTYPE> sigma_onsite(9, 0.0);

auto* onsite_p = projectors::OnsiteProjector<FPTYPE, Device>::get_instance();

const int nks = wfc_basis->nks;
for (int ik = 0; ik < nks; ik++) // loop k points
{
// skip zero weights to speed up
int nbands_occ = wg.nc;
while (wg(ik, nbands_occ - 1) == 0.0)
{
nbands_occ--;
if (nbands_occ == 0)
{
break;
}
}
const int npm = nbands_occ;

// calculate becp = <psi|beta> for all beta functions
onsite_p->get_fs_tools()->cal_becp(ik, npm);
// calculate dbecp = <psi|d(beta)/dR> for all beta functions
// calculate stress = \sum <psi|d(beta_j)/dR> * <psi|beta_i> * D_{ij}
for (int ipol = 0; ipol < 3; ipol++)
{
for (int jpol = 0; jpol <= ipol; jpol++)
{
FPTYPE* stress_device_tmp = stress_device + (ipol * 3 + jpol);
onsite_p->get_fs_tools()->cal_dbecp_s(ik, npm, ipol, jpol, nullptr);
if(PARAM.inp.dft_plus_u)
{
auto* dftu = ModuleDFTU::DFTU::get_instance();
onsite_p->get_fs_tools()->cal_stress_dftu(ik, npm, stress_device_tmp, dftu->orbital_corr, dftu->get_eff_pot_pw(0));
}
if(PARAM.inp.sc_mag_switch)
{
SpinConstrain<std::complex<double>, Device>& sc = SpinConstrain<std::complex<double>, Device>::getScInstance();
const std::vector<ModuleBase::Vector3<double>>& lambda = sc.get_sc_lambda();
onsite_p->get_fs_tools()->cal_stress_dspin(ik, npm, stress_device_tmp, lambda.data());
}
}
}
}
// transfer stress from device to host
syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, sigma_onsite.data(), stress_device, 9);
delmem_var_op()(this->ctx, stress_device);
// sum up forcenl from all processors
for (int l = 0; l < 3; l++)
{
for (int m = 0; m < 3; m++)
{
if (m > l)
{
sigma_onsite[l * 3 + m] = sigma_onsite[m * 3 + l];
}
Parallel_Reduce::reduce_all(sigma_onsite[l * 3 + m]); // qianrui fix a bug for kpar > 1
}
}
// rescale the stress with 1/omega
for (int ipol = 0; ipol < 3; ipol++)
{
for (int jpol = 0; jpol < 3; jpol++)
{
sigma_onsite[ipol * 3 + jpol] *= 1.0 / ucell_in.omega;
}
}

for (int ipol = 0; ipol < 3; ipol++)
{
for (int jpol = 0; jpol < 3; jpol++)
{
sigma(ipol, jpol) = sigma_onsite[ipol * 3 + jpol];
}
}
// do symmetry
if (ModuleSymmetry::Symmetry::symm_flag == 1)
{
p_symm->symmetrize_mat3(sigma, ucell_in.lat);
} // end symmetry

ModuleBase::timer::tick("Stress_Func", "stress_onsite");
}

template class Stress_Func<double, base_device::DEVICE_CPU>;
#if ((defined __CUDA) || (defined __ROCM))
template class Stress_Func<double, base_device::DEVICE_GPU>;
#endif
16 changes: 15 additions & 1 deletion source/module_hamilt_pw/hamilt_pwdft/stress_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ void Stress_PW<FPTYPE, Device>::cal_stress(ModuleBase::matrix& sigmatot,
// vdw stress
ModuleBase::matrix sigmavdw;
sigmavdw.create(3, 3);
// DFT+U and DeltaSpin stress
ModuleBase::matrix sigmaonsite;
sigmaonsite.create(3, 3);

for (int i = 0; i < 3; i++)
{
Expand All @@ -60,6 +63,7 @@ void Stress_PW<FPTYPE, Device>::cal_stress(ModuleBase::matrix& sigmatot,
sigmaewa(i, j) = 0.0;
sigmaxcc(i, j) = 0.0;
sigmavdw(i, j) = 0.0;
sigmaonsite(i, j) = 0.0;
}
}

Expand Down Expand Up @@ -107,13 +111,19 @@ void Stress_PW<FPTYPE, Device>::cal_stress(ModuleBase::matrix& sigmatot,
// vdw term
stress_vdw(sigmavdw, ucell);

// DFT+U and DeltaSpin stress
if(PARAM.inp.dft_plus_u || PARAM.inp.sc_mag_switch)
{
this->stress_onsite(sigmaonsite, this->pelec->wg, wfc_basis, ucell, psi_in, p_symm);
}

for (int ipol = 0; ipol < 3; ipol++)
{
for (int jpol = 0; jpol < 3; jpol++)
{
sigmatot(ipol, jpol) = sigmakin(ipol, jpol) + sigmahar(ipol, jpol) + sigmanl(ipol, jpol)
+ sigmaxc(ipol, jpol) + sigmaxcc(ipol, jpol) + sigmaewa(ipol, jpol)
+ sigmaloc(ipol, jpol) + sigmavdw(ipol, jpol);
+ sigmaloc(ipol, jpol) + sigmavdw(ipol, jpol) + sigmaonsite(ipol, jpol);
}
}

Expand All @@ -138,6 +148,10 @@ void Stress_PW<FPTYPE, Device>::cal_stress(ModuleBase::matrix& sigmatot,
ModuleIO::print_stress("XC STRESS", sigmaxc, GlobalV::TEST_STRESS, ry);
ModuleIO::print_stress("EWALD STRESS", sigmaewa, GlobalV::TEST_STRESS, ry);
ModuleIO::print_stress("NLCC STRESS", sigmaxcc, GlobalV::TEST_STRESS, ry);
if(PARAM.inp.dft_plus_u || PARAM.inp.sc_mag_switch)
{
ModuleIO::print_stress("ONSITE STRESS", sigmaonsite, GlobalV::TEST_STRESS, ry);
}
ModuleIO::print_stress("TOTAL STRESS", sigmatot, GlobalV::TEST_STRESS, ry);
}
ModuleBase::timer::tick("Stress_PW", "cal_stress");
Expand Down

0 comments on commit c324dfb

Please sign in to comment.