Skip to content

Commit

Permalink
1.1 Release (#228)
Browse files Browse the repository at this point in the history
* Remove unreachable code

* Bump version

* Missing functions

* updated readme

---------

Co-authored-by: Qiming Sun <[email protected]>
Co-authored-by: xiaojie.wu <[email protected]>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent e83eab4 commit 2cdca60
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 16 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ Features
- Unrestricted Hartree-Fock and Unrestricted DFT, gradient, and Hessian;
- MP2/DF-MP2 and CCSD (experimental);
- Polarizability, IR, and NMR shielding
- QM/MM with PBC
- CHELPG, ESP, and RESP atomic charge

Limitations
--------
- Rys roots up to 9 for density fitting scheme and direct scf scheme;
- Atomic basis up to g orbitals;
- Auxiliary basis up to i orbitals;
- Density fitting scheme up to ~168 atoms with def2-tzvpd basis, bounded by CPU memory;
- Hessian is unavailable for Direct SCF yet;
- meta-GGA without density laplacian;
- Double hybrid functionals are not supported;

Expand Down
2 changes: 1 addition & 1 deletion gpu4pyscf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.0.2'
__version__ = '1.1.0'

# monkey patch libxc reference due to a bug in nvcc
from pyscf.dft import libxc
Expand Down
2 changes: 2 additions & 0 deletions gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip1.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "vhf.cuh"
#include "rys_roots_unrolled.cu"
#include "create_tasks_ip1.cu"
int rys_ejk_ip1_unrolled_lmax = 2;
int rys_ejk_ip1_unrolled_max_order = 4;


__device__ static
Expand Down
2 changes: 2 additions & 0 deletions gpu4pyscf/lib/gvhf-rys/unrolled_os.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "vhf.cuh"
#include "gamma_inc_unrolled.cu"
#include "create_tasks.cu"
int os_jk_unrolled_lmax = 1;
int os_jk_unrolled_max_order = 0;


__device__ static
Expand Down
3 changes: 3 additions & 0 deletions gpu4pyscf/lib/gvhf-rys/unrolled_rys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include "vhf.cuh"
#include "rys_roots_unrolled.cu"
#include "create_tasks.cu"
int rys_jk_unrolled_lmax = 3;
int rys_jk_unrolled_max_order = 4;
int rys_jk_unrolled_max_nf = 60;


__device__ static
Expand Down
3 changes: 3 additions & 0 deletions gpu4pyscf/lib/gvhf-rys/unrolled_rys_ip1.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include "vhf.cuh"
#include "rys_roots_unrolled.cu"
#include "create_tasks_ip1.cu"
int rys_vjk_ip1_unrolled_lmax = 2;
int rys_vjk_ip1_unrolled_max_order = 4;
int rys_vjk_ip1_unrolled_max_nf = 60;


__device__ static
Expand Down
3 changes: 3 additions & 0 deletions gpu4pyscf/lib/gvhf-rys/unrolled_rys_j.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include "vhf.cuh"
#include "rys_roots_unrolled.cu"
#include "create_tasks.cu"
int rys_j_unrolled_lmax = 4;
int rys_j_unrolled_max_order = 6;
int rys_j_unrolled_max_gout_size = 90;


__device__ static
Expand Down
6 changes: 3 additions & 3 deletions gpu4pyscf/lib/gvhf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

add_library(gvhf SHARED
constant.cu
nr_jk_driver.cu
#nr_jk_driver.cu
nr_jk_driver_int3c2e_ip1.cu
nr_jk_driver_int3c2e_ip2.cu
nr_jk_driver_int3c2e_pass1.cu
nr_jk_driver_int3c2e_pass2.cu
nr_jk_driver_ip1.cu
get_veff_driver_ip1.cu
#nr_jk_driver_ip1.cu
#get_veff_driver_ip1.cu
)

#option(BUILD_SHARED_LIBS "build shared libraries" 1)
Expand Down
23 changes: 12 additions & 11 deletions gpu4pyscf/scf/jk.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,20 @@
'get_jk', 'get_j',
]

libvhf_rys = load_library('libgvhf_rys')
libvhf_rys.RYS_build_jk.restype = ctypes.c_int
libvhf_rys.cuda_version.restype = ctypes.c_int
CUDA_VERSION = libvhf_rys.cuda_version()

PTR_BAS_COORD = 7
LMAX = 4
TILE = 2
QUEUE_DEPTH = 262144
UNROLL_ORDER = 4
UNROLL_LMAX = 3
UNROLL_NFMAX = 60
UNROLL_ORDER = ctypes.c_int.in_dll(libvhf_rys, 'rys_jk_unrolled_max_order').value
UNROLL_LMAX = ctypes.c_int.in_dll(libvhf_rys, 'rys_jk_unrolled_lmax').value
UNROLL_NFMAX = ctypes.c_int.in_dll(libvhf_rys, 'rys_jk_unrolled_max_nf').value
UNROLL_J_LMAX = ctypes.c_int.in_dll(libvhf_rys, 'rys_j_unrolled_lmax').value
UNROLL_J_MAX_ORDER = ctypes.c_int.in_dll(libvhf_rys, 'rys_j_unrolled_max_order').value
GOUT_WIDTH = 42
SHM_SIZE = getattr(__config__, 'GPU_SHM_SIZE',
int(gpu_specs['sharedMemPerBlockOptin']//9)*8)
Expand All @@ -31,11 +38,6 @@
# TODO: test different size for L2 cache efficiency
NAO_IN_GROUP = 1500

libvhf_rys = load_library('libgvhf_rys')
libvhf_rys.RYS_build_jk.restype = ctypes.c_int
libvhf_rys.cuda_version.restype = ctypes.c_int
CUDA_VERSION = libvhf_rys.cuda_version()

def get_jk(mol, dm, hermi=1, vhfopt=None, with_j=True, with_k=True, verbose=None):
'''Compute J, K matrices
'''
Expand Down Expand Up @@ -705,9 +707,8 @@ def _j_engine_quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE):
nf3_ij = (lij+1)*(lij+2)*(lij+3)//6
nf3_kl = (lkl+1)*(lkl+2)*(lkl+3)//6
nroots = order // 2 + 1
lmax = 4 # not angular momentum of orbital basis. see rys_contract_j kernel
max_order = 6
if order <= max_order and lij <= lmax and lkl <= lmax:
# UNROLL_J_LMAX is different to UNROLL_LMAX of orbital basis. see rys_contract_j kernel
if order <= UNROLL_J_MAX_ORDER and lij <= UNROLL_J_LMAX and lkl <= UNROLL_J_LMAX:
if CUDA_VERSION >= 12040 and order <= 2:
return 512, 1, False
return 256, 1, False
Expand Down

0 comments on commit 2cdca60

Please sign in to comment.