From 616615f334c56be1195c1fb480d90f8fc37bf1bf Mon Sep 17 00:00:00 2001 From: Ryan Stocks Date: Thu, 18 Jul 2024 15:56:46 +0800 Subject: [PATCH] HIP compilation --- src/hip/builtin.hip | 315 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) diff --git a/src/hip/builtin.hip b/src/hip/builtin.hip index 0b71482..8071265 100644 --- a/src/hip/builtin.hip +++ b/src/hip/builtin.hip @@ -399,6 +399,209 @@ __global__ GGA_EXC_VXC_INC_GENERATOR( device_eval_exc_vxc_inc_helper_polar_kerne } +template +__global__ MGGA_EXC_GENERATOR( device_eval_exc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if( tid < N ) { + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], eps[tid] ); + + } + +} + + +template +__global__ MGGA_EXC_GENERATOR( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if( tid < N ) { + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : nullptr; + auto* tau_i = tau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid] ); + + } + +} + +template +__global__ MGGA_EXC_VXC_GENERATOR( device_eval_exc_vxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if( tid < N ) { + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + + double dummy; + auto& vlapl_return = traits::needs_laplacian ? vlapl[tid] : dummy; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + eps[tid], vrho[tid], vsigma[tid], vlapl_return, vtau[tid] ); + + } + +} + +template +__global__ MGGA_EXC_VXC_GENERATOR( device_eval_exc_vxc_helper_polar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + double dummy_vlapl[2]; + + if( tid < N ) { + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid], vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], + vsigma_i[2], vlapl_i[0], vlapl_i[1], vtau_i[0], vtau_i[1] ); + + } + +} + + +template +__global__ MGGA_EXC_INC_GENERATOR( device_eval_exc_inc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + double e; + if( tid < N ) { + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], e ); + eps[tid] += scal_fact * e; + + + } + +} + +template +__global__ MGGA_EXC_INC_GENERATOR( device_eval_exc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if( tid < N ) { + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e ); + eps[tid] += scal_fact * e; + + + } + +} + +template +__global__ MGGA_EXC_VXC_INC_GENERATOR( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + double e, vr, vs, vl, vt; + if( tid < N ) { + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + e, vr, vs, vl, vt ); + eps[tid] += scal_fact * e; + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + vtau[tid] += scal_fact * vt; + if(traits::needs_laplacian) vlapl[tid] += scal_fact * vl; + + } + +} + +template +__global__ MGGA_EXC_VXC_INC_GENERATOR( device_eval_exc_vxc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + double dummy_vlapl[2]; + if( tid < N ) { + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + + double e, vra, vrb, vsaa,vsab,vsbb, vla, vlb, vta, vtb; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e, vra, vrb, vsaa, vsab, vsbb, vla, vlb, vta, vtb ); + + eps[tid] += scal_fact * e; + vrho_i[0] += scal_fact * vra; + vrho_i[1] += scal_fact * vrb; + vsigma_i[0] += scal_fact * vsaa; + vsigma_i[1] += scal_fact * vsab; + vsigma_i[2] += scal_fact * vsbb; + vtau_i[0] += scal_fact * vta; + vtau_i[1] += scal_fact * vtb; + if(traits::needs_laplacian) { + vlapl_i[0] += scal_fact * vla; + vlapl_i[1] += scal_fact * vlb; + } + + } + +} template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { @@ -582,6 +785,99 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { } +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + device_eval_exc_helper_unpolar_kernel<<>>( + N, rho, sigma, lapl, tau, eps + ); + +} + +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + device_eval_exc_helper_polar_kernel<<>>( + N, rho, sigma, lapl, tau, eps + ); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + + device_eval_exc_vxc_helper_unpolar_kernel<<>>( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau + ); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + + device_eval_exc_vxc_helper_polar_kernel<<>>( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau + ); + +} + + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + device_eval_exc_inc_helper_unpolar_kernel<<>>( + scal_fact, N, rho, sigma, lapl, tau, eps + ); + +} + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + device_eval_exc_inc_helper_polar_kernel<<>>( + scal_fact, N, rho, sigma, lapl, tau, eps + ); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + + device_eval_exc_vxc_inc_helper_unpolar_kernel<<>>( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau + ); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { + + dim3 threads(32); + dim3 blocks( util::div_ceil( N, threads.x) ); + + device_eval_exc_vxc_inc_helper_polar_kernel<<>>( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau + ); + +} + #define LDA_GENERATE_DEVICE_HELPERS(KERN) \ template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ); \ @@ -602,6 +898,16 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); +#define MGGA_GENERATE_DEVICE_HELPERS(KERN) \ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar );\ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); + LDA_GENERATE_DEVICE_HELPERS( BuiltinSlaterExchange ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN3 ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN_RPA ); @@ -624,6 +930,15 @@ MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_C ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinFT98_X ); + +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07_K ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07OPT_K ); + +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_X ); LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_1 ) LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_2 )