diff --git a/gpu4pyscf/df/hessian/rhf.py b/gpu4pyscf/df/hessian/rhf.py index 8ba53ca5..e1a25ec3 100644 --- a/gpu4pyscf/df/hessian/rhf.py +++ b/gpu4pyscf/df/hessian/rhf.py @@ -42,6 +42,7 @@ from gpu4pyscf.lib import logger from gpu4pyscf import __config__ from gpu4pyscf.df.grad.rhf import _gen_metric_solver +from gpu4pyscf.gto.mole import sort_atoms LINEAR_DEP_THR = df.LINEAR_DEP_THR BLKSIZE = 128 @@ -430,7 +431,10 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): mol = hessobj.mol - h1ao = [None] * mol.natm + natm = mol.natm + nocc = int(cupy.count_nonzero(mo_occ > 0)) + nmo = len(mo_occ) + h1ao = cupy.empty((natm, 3, nmo, nocc)) for ia, h1, vj1, vk1 in _gen_jk(hessobj, mo_coeff, mo_occ, chkfile, atmlst, verbose, True): h1 += vj1 - vk1 * .5 diff --git a/gpu4pyscf/df/hessian/uhf.py b/gpu4pyscf/df/hessian/uhf.py index 999f5b8e..5d93c708 100644 --- a/gpu4pyscf/df/hessian/uhf.py +++ b/gpu4pyscf/df/hessian/uhf.py @@ -46,6 +46,7 @@ from gpu4pyscf.lib import logger from gpu4pyscf import __config__ from gpu4pyscf.df.grad.rhf import _gen_metric_solver +from gpu4pyscf.gto.mole import sort_atoms LINEAR_DEP_THR = df.LINEAR_DEP_THR BLKSIZE = 256 @@ -452,11 +453,14 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): mol = hessobj.mol + natm = mol.natm if atmlst is None: - atmlst = range(mol.natm) + atmlst = range(natm) - h1aoa = [None] * mol.natm - h1aob = [None] * mol.natm + nocca, noccb = hessobj.base.nelec + nmo = len(mo_occ[0]) + h1aoa = cupy.empty((natm, 3, nmo, nocca)) + h1aob = cupy.empty((natm, 3, nmo, noccb)) for ia, h1, vj1, vk1 in _gen_jk(hessobj, mo_coeff, mo_occ, chkfile, atmlst, verbose, True): h1a, h1b = h1 diff --git a/gpu4pyscf/df/hessian/uks.py b/gpu4pyscf/df/hessian/uks.py index 3a4dbd52..5133fc18 100644 --- a/gpu4pyscf/df/hessian/uks.py +++ b/gpu4pyscf/df/hessian/uks.py @@ -133,8 +133,3 @@ class Hessian(uks_hess.Hessian): hess_elec = uhf_hess.hess_elec kernel = rhf_hess.kernel hess = kernel - - def solve_mo1(self, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx=None, atmlst=None, max_memory=4000, verbose=None): - return uhf_hess.solve_mo1(self.base, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx, atmlst, max_memory, verbose) diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py index 1b8e3216..63c3fdc4 100644 --- a/gpu4pyscf/dft/numint.py +++ b/gpu4pyscf/dft/numint.py @@ -1528,7 +1528,6 @@ def _sparse_index(mol, coords, l_ctr_offsets): ctr_offsets_slice = cumsum[glob_ctr_offsets-1] ctr_offsets_slice[0] = 0 - from pyscf import gto gto_type = 'cart' if mol.cart else 'sph' non0shl_idx = non0shl_idx == 1 ao_loc_slice = gto.moleintor.make_loc(mol._bas[non0shl_idx,:], gto_type) diff --git a/gpu4pyscf/grad/rhf.py b/gpu4pyscf/grad/rhf.py index bcf2e594..bd1a4361 100644 --- a/gpu4pyscf/grad/rhf.py +++ b/gpu4pyscf/grad/rhf.py @@ -40,10 +40,11 @@ 'Grad' ] -def _jk_energy_per_atom(mol, dm, vhfopt=None, with_j=True, with_k=True, verbose=None): - ''' Computes the first-order derivatives of the energy contributions from J and K per atom. +def _jk_energy_per_atom(mol, dm, vhfopt=None, + j_factor=1., k_factor=1., verbose=None): + ''' Computes the first-order derivatives of the energy per atom for + j_factor * J_derivatives - k_factor * K_derivatives ''' - assert mol.omega >= 0 log = logger.new_logger(mol, verbose) cput0 = t1 = log.init_timer() if vhfopt is None: @@ -60,17 +61,7 @@ def _jk_energy_per_atom(mol, dm, vhfopt=None, with_j=True, with_k=True, verbose= dms = cp.asarray(dms, order='C') assert n_dm <= 2 - vj = vk = None - vj_ptr = vk_ptr = lib.c_null_ptr() - - assert with_j or with_k - if with_k: - vk = cp.zeros((mol.natm, 3)) - vk_ptr = ctypes.cast(vk.data.ptr, ctypes.c_void_p) - if with_j: - vj = cp.zeros((mol.natm, 3)) - vj_ptr = ctypes.cast(vj.data.ptr, ctypes.c_void_p) - + ejk = cp.zeros((mol.natm, 3)) init_constant(mol) ao_loc = mol.ao_loc dm_cond = cp.log(condense('absmax', dms, ao_loc) + 1e-300).astype(np.float32) @@ -107,7 +98,9 @@ def _jk_energy_per_atom(mol, dm, vhfopt=None, with_j=True, with_k=True, verbose= tile_kl_mapping = tile_mappings[k,l] scheme = _ejk_quartets_scheme(mol, uniq_l_ctr[[i, j, k, l]]) err = kern( - vj_ptr, vk_ptr, ctypes.cast(dms.data.ptr, ctypes.c_void_p), + ctypes.cast(ejk.data.ptr, ctypes.c_void_p), + ctypes.c_double(j_factor), ctypes.c_double(k_factor), + ctypes.cast(dms.data.ptr, ctypes.c_void_p), ctypes.c_int(n_dm), ctypes.c_int(nao), vhfopt.rys_envs, (ctypes.c_int*2)(*scheme), (ctypes.c_int*8)(*ij_shls, *kl_shls), @@ -137,11 +130,8 @@ def _jk_energy_per_atom(mol, dm, vhfopt=None, with_j=True, with_k=True, verbose= log.debug1('kernel launches %d', kern_counts) for llll, t in timing_collection.items(): log.debug1('%s wall time %.2f', llll, t) - - if with_j: - vj *= 2. log.timer_debug1('grad jk energy', *cput0) - return vj, vk + return ejk def _ejk_quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE): ls = l_ctr_pattern[:,0] @@ -151,8 +141,9 @@ def _ejk_quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE): nps = l_ctr_pattern[:,1] ij_prims = nps[0] * nps[1] nroots = (order + 1) // 2 + 1 - unit = nroots*2 + g_size*3 + ij_prims*4 + if mol.omega < 0: # SR + unit += nroots * 2 counts = shm_size // (unit*8) n = min(THREADS, _nearest_power2(counts)) gout_stride = THREADS // n @@ -367,8 +358,7 @@ def get_veff(self, mol=None, dm=None, verbose=None): if mol is None: mol = self.mol if dm is None: dm = self.base.make_rdm1() vhfopt = self.base._opt_gpu.get(None, None) - ej, ek = _jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) - return ej - ek * .5 + return _jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) Grad = Gradients diff --git a/gpu4pyscf/grad/rks.py b/gpu4pyscf/grad/rks.py index ce377873..e92e7352 100644 --- a/gpu4pyscf/grad/rks.py +++ b/gpu4pyscf/grad/rks.py @@ -103,22 +103,37 @@ def get_veff(ks_grad, mol=None, dm=None, verbose=None): exc1_per_atom = [exc1_per_atom[:,p0:p1].sum(axis=1) for p0, p1 in aoslices[:,2:]] exc1_per_atom = cupy.asarray(exc1_per_atom) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) + with_k = ni.libxc.is_hybrid_xc(mf.xc) vhfopt = mf._opt_gpu.get(None, None) - if not ni.libxc.is_hybrid_xc(mf.xc): - ej = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, with_k=False, - verbose=verbose)[0] - exc1_per_atom += ej - else: - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) - ej, ek = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) - ek *= hyb - if abs(omega) > 1e-10: # For range separated Coulomb operator - vhfopt = mf._opt_gpu.get(omega, None) - with mol.with_range_coulomb(omega): - ek_lr = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, with_j=False, - verbose=verbose)[1] - ek += ek_lr * (alpha - hyb) - exc1_per_atom += ej - ek * .5 + j_factor = 1. + k_factor = 0. + if with_k: + if omega == 0: + k_factor = hyb + elif alpha == 0: # LR=0, only SR exchange + pass + elif hyb == 0: # SR=0, only LR exchange + k_factor = alpha + else: # SR and LR exchange with different ratios + k_factor = alpha + ejk = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, j_factor, k_factor, + verbose=verbose) + exc1_per_atom += ejk + if with_k and omega != 0: + j_factor = 0. + omega = -omega # Prefer computing the SR part + if alpha == 0: # LR=0, only SR exchange + k_factor = hyb + elif hyb == 0: # SR=0, only LR exchange + # full range exchange was computed in the previous step + k_factor = -alpha + else: # SR and LR exchange with different ratios + k_factor = hyb - alpha # =beta + vhfopt = mf._opt_gpu.get(omega, None) + with mol.with_range_coulomb(omega): + exc1_per_atom += rhf_grad._jk_energy_per_atom( + mol, dm, vhfopt, j_factor, k_factor, verbose=verbose) return tag_array(exc1_per_atom, exc1_grid=exc) def _get_vxc_task(ni, mol, grids, xc_code, dms, mo_coeff, mo_occ, diff --git a/gpu4pyscf/grad/uhf.py b/gpu4pyscf/grad/uhf.py index 5010b2fa..3c1a493d 100644 --- a/gpu4pyscf/grad/uhf.py +++ b/gpu4pyscf/grad/uhf.py @@ -104,7 +104,7 @@ def grad_elec(mf_grad, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None): class Gradients(rhf_grad.GradientsBase): - + to_cpu = utils.to_cpu to_gpu = utils.to_gpu device = utils.device @@ -120,8 +120,8 @@ def get_veff(self, mol, dm, verbose=None): In the CPU version, get_veff returns the first order derivatives of Veff matrix. ''' vhfopt = self.base._opt_gpu.get(None, None) - ej, ek = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) - return ej - ek + ejk = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) + return ejk def make_rdm1e(self, mo_energy=None, mo_coeff=None, mo_occ=None): if mo_energy is None: mo_energy = self.base.mo_energy diff --git a/gpu4pyscf/grad/uks.py b/gpu4pyscf/grad/uks.py index f8d28492..bd3311b8 100644 --- a/gpu4pyscf/grad/uks.py +++ b/gpu4pyscf/grad/uks.py @@ -104,23 +104,37 @@ def get_veff(ks_grad, mol=None, dm=None, verbose=None): exc1_per_atom = [exc1_per_atom[:,p0:p1].sum(axis=1) for p0, p1 in aoslices[:,2:]] exc1_per_atom = cupy.asarray(exc1_per_atom) + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) + with_k = ni.libxc.is_hybrid_xc(mf.xc) vhfopt = mf._opt_gpu.get(None, None) - if not ni.libxc.is_hybrid_xc(mf.xc): - ej = rhf_grad._jk_energy_per_atom( - mol, dm[0]+dm[1], vhfopt, with_k=False, verbose=verbose)[0] - exc1_per_atom += ej - else: - omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) - ej, ek = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, verbose=verbose) - ek *= hyb - if omega != 0: - vhfopt = mf._opt_gpu.get(omega, None) - with mol.with_range_coulomb(omega): - ek_lr = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, with_j=False, - verbose=verbose)[1] - ek += ek_lr * (alpha - hyb) - - exc1_per_atom += ej - ek + j_factor = 1. + k_factor = 0. + if with_k: + if omega == 0: + k_factor = hyb + elif alpha == 0: # LR=0, only SR exchange + pass + elif hyb == 0: # SR=0, only LR exchange + k_factor = alpha + else: # SR and LR exchange with different ratios + k_factor = alpha + ejk = rhf_grad._jk_energy_per_atom(mol, dm, vhfopt, j_factor, k_factor, + verbose=verbose) + exc1_per_atom += ejk + if with_k and omega != 0: + j_factor = 0. + omega = -omega # Prefer computing the SR part + if alpha == 0: # LR=0, only SR exchange + k_factor = hyb + elif hyb == 0: # SR=0, only LR exchange + # full range exchange was computed in the previous step + k_factor = -alpha + else: # SR and LR exchange with different ratios + k_factor = hyb - alpha # =beta + vhfopt = mf._opt_gpu.get(omega, None) + with mol.with_range_coulomb(omega): + exc1_per_atom += rhf_grad._jk_energy_per_atom( + mol, dm, vhfopt, j_factor, k_factor, verbose=verbose) return tag_array(exc1_per_atom, exc1_grid=exc) diff --git a/gpu4pyscf/hessian/rhf.py b/gpu4pyscf/hessian/rhf.py index 3a65d839..d4fffb04 100644 --- a/gpu4pyscf/hessian/rhf.py +++ b/gpu4pyscf/hessian/rhf.py @@ -29,10 +29,10 @@ from pyscf.gto import ATOM_OF # import _response_functions to load gen_response methods in SCF class from gpu4pyscf.scf import _response_functions # noqa -from gpu4pyscf.gto.mole import sort_atoms from gpu4pyscf.scf import cphf from gpu4pyscf.lib.cupy_helper import ( - contract, tag_array, sandwich_dot, transpose_sum, get_avail_mem, condense) + contract, tag_array, sandwich_dot, transpose_sum, get_avail_mem, condense, + krylov) from gpu4pyscf.__config__ import props as gpu_specs from gpu4pyscf.lib import logger from gpu4pyscf.scf.jk import ( @@ -40,7 +40,8 @@ _make_tril_tile_mappings, _nearest_power2) from gpu4pyscf.grad import rhf as rhf_grad -libvhf_rys.RYS_per_atom_jk_ip2.restype = ctypes.c_int +libvhf_rys.RYS_per_atom_jk_ip2_type12.restype = ctypes.c_int +libvhf_rys.RYS_per_atom_jk_ip2_type3.restype = ctypes.c_int libvhf_rys.RYS_build_jk_ip1.restype = ctypes.c_int GB = 1024*1024*1024 @@ -52,14 +53,16 @@ def hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, ''' Different from PySF, using h1mo instead of h1ao for saving memory ''' log = logger.new_logger(hessobj, verbose) - time0 = t1 = (logger.process_clock(), logger.perf_counter()) - + time0 = t1 = log.init_timer() mol = hessobj.mol mf = hessobj.base if mo_energy is None: mo_energy = mf.mo_energy if mo_occ is None: mo_occ = mf.mo_occ if mo_coeff is None: mo_coeff = mf.mo_coeff + if atmlst is not None: + assert len(atmlst) == mol.natm + assert mo_coeff.dtype == cp.float64 mo_energy = cupy.asarray(mo_energy) mo_occ = cupy.asarray(mo_occ) mo_coeff = cupy.asarray(mo_coeff) @@ -68,54 +71,54 @@ def hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, t1 = log.timer_debug1('hess elec', *t1) if h1mo is None: h1mo = hessobj.make_h1(mo_coeff, mo_occ, None, atmlst, log) + if h1mo.size * 8 * 5 > get_avail_mem(): + # Reduce GPU memory footprint + h1mo = h1mo.get() t1 = log.timer_debug1('making H1', *t1) if mo1 is None or mo_e1 is None: mo1, mo_e1 = hessobj.solve_mo1(mo_energy, mo_coeff, mo_occ, h1mo, None, atmlst, max_memory, log) t1 = log.timer_debug1('solving MO1', *t1) - + + mo1 = cupy.asarray(mo1) + # *2 for double occupancy, *2 for +c.c. + de2 += contract('kxpi,lypi->klxy', cupy.asarray(h1mo), mo1) * 4 + mo1 = contract('kxai,pa->kxpi', mo1, mo_coeff) + mo_e1 = cupy.asarray(mo_e1) + nao = mo_coeff.shape[0] - mocc = cupy.array(mo_coeff[:,mo_occ>0]) - mo_energy = cupy.array(mo_energy) + mocc = mo_coeff[:,mo_occ>0] + mocc_e = mocc * mo_energy[mo_occ>0] s1a = -mol.intor('int1e_ipovlp', comp=3) s1a = cupy.asarray(s1a) + aoslices = mol.aoslice_by_atom() - if atmlst is None: - atmlst = range(mol.natm) - for i0, ia in enumerate(atmlst): - shl0, shl1, p0, p1 = aoslices[ia] + for i0, (p0, p1) in enumerate(aoslices[:,2:]): s1ao = cupy.zeros((3,nao,nao)) s1ao[:,p0:p1] += s1a[:,p0:p1] s1ao[:,:,p0:p1] += s1a[:,p0:p1].transpose(0,2,1) tmp = contract('xpq,pi->xiq', s1ao, mocc) s1oo = contract('xiq,qj->xij', tmp, mocc) + de2[i0] -= contract('xij,kyij->kxy', s1oo, mo_e1) * 2 - s1mo = contract('xij,ip->xpj', s1ao, mo_coeff) - - for j0 in range(i0+1): - ja = atmlst[j0] - q0, q1 = aoslices[ja][2:] -# *2 for double occupancy, *2 for +c.c. - de2[i0,j0] += contract('xpi,ypi->xy', h1mo[ia], mo1[ja]) * 4 - dm1 = contract('ypi,qi->ypq', mo1[ja], mocc*mo_energy[mo_occ>0]) - de2[i0,j0] -= contract('xpq,ypq->xy', s1mo, dm1) * 4 - de2[i0,j0] -= contract('xpq,ypq->xy', s1oo, mo_e1[ja]) * 2 - for j0 in range(i0): - de2[j0,i0] = de2[i0,j0].T + s1mo = contract('xpq,qi->xpi', s1ao, mocc_e) + de2[i0] -= contract('xpi,kypi->kxy', s1mo, mo1) * 4 + de2 = de2 + de2.transpose(1,0,3,2) + de2 *= .5 log.timer('RHF hessian', *time0) - return de2 def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None, max_memory=4000, verbose=None): - e1, ej, ek = _partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, - atmlst, max_memory, verbose, True) - return e1 + ej - ek + e1, ejk = _partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, + atmlst, max_memory, verbose) + return e1 + ejk def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, - atmlst=None, max_memory=4000, verbose=None, with_k=True): + atmlst=None, max_memory=4000, verbose=None, + j_factor=1., k_factor=1.): log = logger.new_logger(hessobj, verbose) time0 = t1 = (logger.process_clock(), logger.perf_counter()) mol = hessobj.mol @@ -129,7 +132,7 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, mocc = mo_coeff[:,mo_occ>0] dm0 = mocc.dot(mocc.T) * 2 vhfopt = mf._opt_gpu.get(None, None) - ej, ek = _partial_ejk_ip2(mol, dm0, vhfopt, with_k, verbose=log) + ejk = _partial_ejk_ip2(mol, dm0, vhfopt, j_factor, k_factor, verbose=log) t1 = log.timer_debug1('hessian of 2e part', *t1) # Energy weighted density matrix @@ -153,10 +156,12 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, e1[j0,i0] = e1[i0,j0].T log.timer('RHF partial hessian', *time0) - return e1, ej, ek + return e1, ejk -def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): - assert mol.omega >= 0 +def _partial_ejk_ip2(mol, dm, vhfopt=None, j_factor=1, k_factor=1., verbose=None): + '''Compute the energy per atom for + j_factor * J_derivatives - k_factor * K_derivatives + ''' log = logger.new_logger(mol, verbose) cput0 = t1 = log.init_timer() if vhfopt is None: @@ -174,13 +179,7 @@ def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): assert n_dm <= 2 natm = mol.natm - ej = cp.zeros((natm, natm, 3, 3)) - ek = cp.zeros((natm, natm, 3, 3)) - vj_ptr = ctypes.cast(ej.data.ptr, ctypes.c_void_p) - if with_k: - vk_ptr = ctypes.cast(ek.data.ptr, ctypes.c_void_p) - else: - vk_ptr = lib.c_null_ptr() + ejk = cp.zeros((natm, natm, 3, 3)) init_constant(mol) ao_loc = mol.ao_loc @@ -203,7 +202,8 @@ def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): timing_collection = {} kern_counts = 0 - kern = libvhf_rys.RYS_per_atom_jk_ip2 + kern1 = libvhf_rys.RYS_per_atom_jk_ip2_type12 + kern2 = libvhf_rys.RYS_per_atom_jk_ip2_type3 for i in range(n_groups): for j in range(i+1): @@ -217,8 +217,10 @@ def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): l_ctr_bas_loc[l], l_ctr_bas_loc[l+1]) tile_kl_mapping = tile_mappings[k,l] scheme = _ip2_quartets_scheme(mol, uniq_l_ctr[[i, j, k, l]]) - err = kern( - vj_ptr, vk_ptr, ctypes.cast(dms.data.ptr, ctypes.c_void_p), + err1 = kern1( + ctypes.cast(ejk.data.ptr, ctypes.c_void_p), + ctypes.c_double(j_factor), ctypes.c_double(k_factor), + ctypes.cast(dms.data.ptr, ctypes.c_void_p), ctypes.c_int(n_dm), ctypes.c_int(nao), vhfopt.rys_envs, (ctypes.c_int*2)(*scheme), (ctypes.c_int*8)(*ij_shls, *kl_shls), @@ -235,7 +237,27 @@ def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): ctypes.c_int(workers), mol._atm.ctypes, ctypes.c_int(mol.natm), mol._bas.ctypes, ctypes.c_int(mol.nbas), mol._env.ctypes) - if err != 0: + err2 = kern2( + ctypes.cast(ejk.data.ptr, ctypes.c_void_p), + ctypes.c_double(j_factor), ctypes.c_double(k_factor), + ctypes.cast(dms.data.ptr, ctypes.c_void_p), + ctypes.c_int(n_dm), ctypes.c_int(nao), + vhfopt.rys_envs, (ctypes.c_int*2)(*scheme), + (ctypes.c_int*8)(*ij_shls, *kl_shls), + ctypes.c_int(tile_ij_mapping.size), + ctypes.c_int(tile_kl_mapping.size), + ctypes.cast(tile_ij_mapping.data.ptr, ctypes.c_void_p), + ctypes.cast(tile_kl_mapping.data.ptr, ctypes.c_void_p), + ctypes.cast(vhfopt.tile_q_cond.data.ptr, ctypes.c_void_p), + ctypes.cast(vhfopt.q_cond.data.ptr, ctypes.c_void_p), + ctypes.cast(dm_cond.data.ptr, ctypes.c_void_p), + ctypes.c_float(log_cutoff), + ctypes.cast(pool.data.ptr, ctypes.c_void_p), + ctypes.cast(info.data.ptr, ctypes.c_void_p), + ctypes.c_int(workers), + mol._atm.ctypes, ctypes.c_int(mol.natm), + mol._bas.ctypes, ctypes.c_int(mol.nbas), mol._env.ctypes) + if err1 != 0 or err2 != 0: raise RuntimeError(f'RYS_per_atom_jk_ip2 kernel for {llll} failed') if log.verbose >= logger.DEBUG1: t1, t1p = log.timer_debug1(f'processing {llll}, tasks = {info[1]}', *t1), t1 @@ -248,20 +270,9 @@ def _partial_ejk_ip2(mol, dm, vhfopt=None, with_k=True, verbose=None): log.debug1('kernel launches %d', kern_counts) for llll, t in timing_collection.items(): log.debug1('%s wall time %.2f', llll, t) - - # *8 for the symmetry (i,j) = (j,i), (k,l) = (l,k) and (ij,kl) = (kl,ij) - # The additional factor 1/2 is from the two-electron Coulomb operator - ej *= 4 - if n_dm == 2: - # corresponding to the symmetry (i,j) = (j,i) and (k,l) = (l,k) for UHF - # density matrices. Including the additional factor 1/2 from operator, - # ek * 2 is required. For RHF, dm=2*dm_a, a factor of 4 has been - # included, which is cancelled by the contribution from dm_b (a - # factor of 2), the symmetry between i,j and k,l (a factor of 4), and - # the Coulomb operator (1/2). ek does not need to be scaled in RHF. - ek *= 2 + ejk = ejk + ejk.transpose(1,0,3,2) log.timer_debug1('ejk_ip2', *cput0) - return ej, ek + return ejk def _ip2_quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE): ls = l_ctr_pattern[:,0] @@ -271,16 +282,20 @@ def _ip2_quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE): nps = l_ctr_pattern[:,1] ij_prims = nps[0] * nps[1] nroots = (order + 2) // 2 + 1 - unit = nroots*2 + g_size*3 + ij_prims*4 + if mol.omega < 0: # SR + unit += nroots * 2 counts = shm_size // (unit*8) - n = THREADS // 16 - while n >= counts: - n >>= 1 + n = min(THREADS, _nearest_power2(counts)) gout_stride = THREADS // n return n, gout_stride def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): + '''Compute the first order Fock matrix (the force term of the CPHF + equation). Returns of this function are different to the PySCF CPU version. + This function returns matrices in the MO-occupied_orb basis, while the CPU + version returns matrices in MO basis. + ''' assert atmlst is None mol = hessobj.mol natm = mol.natm @@ -488,7 +503,13 @@ def get_ovlp(mol): def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1mo, fx=None, atmlst=None, max_memory=4000, verbose=None, max_cycle=50, level_shift=0): - '''Solve the first order equation + '''Solve the CPHF equation for the first orbitals. + Note: These orbitals are represented in MO basis. This is different to the + solve_mo1 function in the PySCF CPU version, which transforms the mo1 to AO + basis. Additionally, the return format is also different to the CPU version. + This function returns orbitals in a single tensor while the CPU version + returns a list of mo1 orbitals. + Kwargs: fx : function(dm_mo) => v1_mo A function to generate the induced potential. @@ -496,62 +517,84 @@ def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1mo, ''' mol = mf.mol log = logger.new_logger(mf, verbose) - nao = mo_coeff.shape[0] - mocc = mo_coeff[:,mo_occ>0] - nocc = mocc.shape[1] + t0 = log.init_timer() + + occidx = mo_occ > 0 + viridx = mo_occ == 0 + e_a = mo_energy[viridx] + e_i = mo_energy[occidx] + e_ai = 1 / (e_a[:,None] + level_shift - e_i) + nvir, nocc = e_ai.shape + + mocc = mo_coeff[:,occidx] + nao, nmo = mo_coeff.shape + natm = mol.natm if fx is None: fx = gen_vind(mf, mo_coeff, mo_occ) - s1a = -mol.intor('int1e_ipovlp', comp=3) - s1a = cupy.asarray(s1a) - def _ao2mo(mat): - tmp = contract('xij,jo->xio', mat, mocc) - return contract('xik,ip->xpk', tmp, mo_coeff) - cupy.get_default_memory_pool().free_all_blocks() + def fvind_vo(mo1): + mo1 = mo1.reshape(-1,nmo, nocc) + v = fx(mo1).reshape(-1,nmo, nocc) + if level_shift != 0: + v -= mo1 * level_shift + v[:,viridx,:] *= e_ai + v[:,occidx,:] = 0 + return v.reshape(-1,nmo*nocc) + + ipovlp = -mol.intor('int1e_ipovlp', comp=3) + ipovlp = cp.asarray(ipovlp) + cp.get_default_memory_pool().free_all_blocks() avail_mem = get_avail_mem() - blksize = int(avail_mem*0.4) // (8*3*nao*nao*4) // ALIGNED * ALIGNED - blksize = min(32, blksize) + # *4 for input dm, vj, vk, and vxc + blksize = int(min(avail_mem*.3 / (8*3*nao*nao*4), + avail_mem*.6 / (8*nmo*nocc*natm*3*5))) + if blksize < ALIGNED**2: + raise RuntimeError('GPU memory insufficient') + + blksize = (blksize // ALIGNED**2) * ALIGNED**2 log.debug(f'GPU memory {avail_mem/GB:.1f} GB available') log.debug(f'{blksize} atoms in each block CPHF equation') - # sort atoms to improve the convergence - sorted_idx = sort_atoms(mol) - atom_groups = [] - for p0,p1 in lib.prange(0,mol.natm,blksize): - blk = sorted_idx[p0:p1] - atom_groups.append(blk) - - mo1s = [None] * mol.natm - e1s = [None] * mol.natm + mo1s = np.zeros(h1mo.shape) + e1s = np.zeros((natm, 3, nocc, nocc)) aoslices = mol.aoslice_by_atom() - - for group in atom_groups: - s1vo = [] - h1vo = [] - for ia in group: - shl0, shl1, p0, p1 = aoslices[ia] - s1ao = cupy.zeros((3,nao,nao)) - s1ao[:,p0:p1] += s1a[:,p0:p1] - s1ao[:,:,p0:p1] += s1a[:,p0:p1].transpose(0,2,1) - s1vo.append(_ao2mo(s1ao)) - h1vo.append(h1mo[ia]) - - log.info(f'Solving CPHF equation for atoms {len(group)}/{mol.natm}') - h1vo = cupy.vstack(h1vo) - s1vo = cupy.vstack(s1vo) - tol = mf.conv_tol_cpscf - mo1, e1 = cphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, - level_shift=level_shift, tol=tol, verbose=verbose) - - mo1 = mo1.reshape(-1,3,nao,nocc) - e1 = e1.reshape(-1,3,nocc,nocc) - - for k, ia in enumerate(group): - mo1s[ia] = mo1[k] - e1s[ia] = e1[k].reshape(3,nocc,nocc) - mo1 = e1 = None + for i0, i1 in lib.prange(0, natm, blksize): + log.info('Solving CPHF equation for atoms [%d:%d]', i0, i1) + + h1mo_blk = h1mo[i0:i1] + if not isinstance(h1mo, cp.ndarray): + h1mo_blk = cp.asarray(h1mo_blk) + s1mo_blk = cp.empty_like(h1mo_blk) + for k, (p0, p1) in enumerate(aoslices[i0:i1,2:]): + s1ao = cp.zeros((3,nao,nao)) + s1ao[:,p0:p1] += ipovlp[:,p0:p1] + s1ao[:,:,p0:p1] += ipovlp[:,p0:p1].transpose(0,2,1) + tmp = contract('xij,jo->xio', s1ao, mocc) + s1mo_blk[k] = contract('xio,ip->xpo', tmp, mo_coeff) + + mo1 = hs = h1mo_blk - s1mo_blk * e_i + mo_e1 = hs[:,:,occidx] + mo1[:,:,viridx] *= -e_ai + mo1[:,:,occidx] = -s1mo_blk[:,:,occidx] * .5 + hs = s1mo_blk = h1mo_blk = None + + tol = mf.conv_tol_cpscf * (i1 - i0) + raw_mo1 = krylov(fvind_vo, mo1.reshape(-1,nmo*nocc), + tol=tol, max_cycle=max_cycle, verbose=log) + raw_mo1 = raw_mo1.reshape(i1-i0,3,nmo,nocc) + raw_mo1[:,:,occidx] = mo1[:,:,occidx] + + v1 = fx(raw_mo1).reshape(i1-i0,3,nmo,nocc) + mo1[:,:,viridx] -= v1[:,:,viridx] * e_ai + mo_e1 += v1[:,:,occidx] + mo_e1 += mo1[:,:,occidx] * (e_i[:,None] - e_i) + + mo1s[i0:i1] = mo1.get() + e1s[i0:i1] = mo_e1.get() + mo1 = raw_mo1 = mo_e1 = v1 = None + log.timer('CPHF solver', *t0) return mo1s, e1s def gen_vind(mf, mo_coeff, mo_occ): @@ -563,6 +606,8 @@ def gen_vind(mf, mo_coeff, mo_occ): nocc = mocc.shape[1] mocc_2 = mocc * 2 grids = getattr(mf, 'cphf_grids', None) + if grids is not None: + logger.info(mf, 'Secondary grids defined for CPHF in Hessian') vresp = mf.gen_response(mo_coeff, mo_occ, hermi=1, grids=grids) def fx(mo1): diff --git a/gpu4pyscf/hessian/rks.py b/gpu4pyscf/hessian/rks.py index 42ccae8b..f34924d4 100644 --- a/gpu4pyscf/hessian/rks.py +++ b/gpu4pyscf/hessian/rks.py @@ -50,19 +50,38 @@ def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, if mf.do_nlc(): raise NotImplementedError + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) with_k = ni.libxc.is_hybrid_xc(mf.xc) - de2, ej, ek = rhf_hess._partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, - atmlst, max_memory, verbose, - with_k=with_k) - de2 += ej # (A,B,dR_A,dR_B) + j_factor = 1. + k_factor = 0. if with_k: - de2 -= hyb * ek - if abs(omega) > 1e-10 and abs(alpha-hyb) > 1e-10: + if omega == 0: + k_factor = hyb + elif alpha == 0: # LR=0, only SR exchange + pass + elif hyb == 0: # SR=0, only LR exchange + k_factor = alpha + else: # SR and LR exchange with different ratios + k_factor = alpha + de2, ejk = rhf_hess._partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, + atmlst, max_memory, verbose, + j_factor, k_factor) + de2 += ejk # (A,B,dR_A,dR_B) + if with_k and omega != 0: + j_factor = 0. + omega = -omega # Prefer computing the SR part + if alpha == 0: # LR=0, only SR exchange + k_factor = hyb + elif hyb == 0: # SR=0, only LR exchange + # full range exchange was computed in the previous step + k_factor = -alpha + else: # SR and LR exchange with different ratios + k_factor = hyb - alpha # =beta vhfopt = mf._opt_gpu.get(omega, None) with mol.with_range_coulomb(omega): - ek_lr = rhf_hess._partial_ejk_ip2(mol, dm0, vhfopt, verbose=verbose)[1] - de2 -= (alpha-hyb) * ek_lr + de2 += rhf_hess._partial_ejk_ip2( + mol, dm0, vhfopt, j_factor, k_factor, verbose=verbose) mem_now = lib.current_memory()[0] max_memory = max(2000, mf.max_memory*.9-mem_now) diff --git a/gpu4pyscf/hessian/tests/test_rhf_hessian.py b/gpu4pyscf/hessian/tests/test_rhf_hessian.py index 66292ba4..82c6606c 100644 --- a/gpu4pyscf/hessian/tests/test_rhf_hessian.py +++ b/gpu4pyscf/hessian/tests/test_rhf_hessian.py @@ -23,7 +23,7 @@ def setUpModule(): global mol mol = gto.Mole() - mol.verbose = 1 + mol.verbose = 5 mol.output = '/dev/null' mol.atom.extend([ ["O" , (0. , 0. , 0.)], @@ -44,7 +44,7 @@ def test_hessian_rhf(self): hobj = mf.Hessian() ref = hobj.kernel() e2_gpu = hobj.to_gpu().kernel() - assert abs(ref - e2_gpu).max() < 1e-8 + assert abs(ref - e2_gpu).max() < 1e-6 def test_partial_hess_elec(self): mf = scf.RHF(mol) @@ -52,15 +52,15 @@ def test_partial_hess_elec(self): mf.kernel() hobj = mf.Hessian() e1_cpu, ej_cpu, ek_cpu = rhf_cpu._partial_hess_ejk(hobj) + e2_cpu = ej_cpu - ek_cpu mf = mf.to_gpu() mf.kernel() hobj = mf.Hessian() - e1_gpu, ej_gpu, ek_gpu = rhf_gpu._partial_hess_ejk(hobj) + e1_gpu, e2_gpu = rhf_gpu._partial_hess_ejk(hobj) assert abs(e1_cpu - e1_gpu.get()).max() < 1e-5 - assert abs(ej_cpu - ej_gpu.get()).max() < 1e-5 - assert abs(ek_cpu - ek_gpu.get()).max() < 1e-5 + assert abs(e2_cpu - e2_gpu.get()).max() < 1e-5 def test_ejk_ip2(self): mol = gto.M( @@ -78,17 +78,15 @@ def test_ejk_ip2(self): mo_occ = np.ones(nao) * 2 mo_energy = np.random.rand(nao) - ej, ek = rhf_gpu._partial_ejk_ip2(mol, dm) - assert abs(lib.fp(ej.get()) - -792951.4785844693) < 1e-7 - assert abs(lib.fp(ek.get()) - -352265.0466989743) < 1e-7 + ejk = rhf_gpu._partial_ejk_ip2(mol, dm) mf = mol.RHF() mf.mo_coeff = mo_coeff mf.mo_occ = mo_occ mf.mo_energy = mo_energy h = rhf_cpu.Hessian(mf) e1, refj, refk = rhf_cpu._partial_hess_ejk(h, mo_energy, mo_coeff, mo_occ) - assert abs(ej.get() - refj).max() < 1e-6 - assert abs(ek.get() - refk).max() < 1e-6 + e2_ref = refj - refk + assert abs(ejk.get() - e2_ref).max() < 1e-6 def test_get_jk(self): mol = gto.M( @@ -140,7 +138,7 @@ def test_hessian_rhf_D3(self): mf.conv_tol_cpscf = 1e-8 ref = mf.Hessian().kernel() e2_gpu = mf.Hessian().to_gpu().kernel() - assert abs(ref - e2_gpu).max() < 1e-8 + assert abs(ref - e2_gpu).max() < 1e-6 if __name__ == "__main__": print("Full Tests for RHF Hessian") diff --git a/gpu4pyscf/hessian/tests/test_rks_hessian.py b/gpu4pyscf/hessian/tests/test_rks_hessian.py index bbe272d3..a760feb2 100644 --- a/gpu4pyscf/hessian/tests/test_rks_hessian.py +++ b/gpu4pyscf/hessian/tests/test_rks_hessian.py @@ -29,7 +29,7 @@ def setUpModule(): ["O" , (0. , 0. , 0.)], [1 , (0. , -0.757 , 0.587)], [1 , (0. , 0.757 , 0.587)] ]) - mol.basis = 'sto3g' + mol.basis = ('sto3g', [[2, [1., 1.]]]) mol.build() def tearDownModule(): @@ -94,17 +94,17 @@ def test_vxc_deriv2(self): def test_hessian_lda(self, disp=None): print('-----testing LDA Hessian----') mf = mol.RKS(xc='LDA').run() - _vs_cpu(mf) + _vs_cpu(mf, tol=5e-6) def test_hessian_gga(self): print('-----testing PBE Hessian----') mf = mol.RKS(xc='PBE').run() - _vs_cpu(mf) + _vs_cpu(mf, tol=1e-6) def test_hessian_hybrid(self): print('-----testing B3LYP Hessian----') mf = mol.RKS(xc='b3lyp').run() - _vs_cpu(mf) + _vs_cpu(mf, tol=5e-6) def test_hessian_mgga(self): print('-----testing M06 Hessian----') diff --git a/gpu4pyscf/hessian/tests/test_uhf_hessian.py b/gpu4pyscf/hessian/tests/test_uhf_hessian.py index 32106d8f..eb046b1e 100644 --- a/gpu4pyscf/hessian/tests/test_uhf_hessian.py +++ b/gpu4pyscf/hessian/tests/test_uhf_hessian.py @@ -46,7 +46,7 @@ def test_hessian_uhf(self): hobj = mf.Hessian() ref = hobj.kernel() e2_gpu = hobj.to_gpu().kernel() - assert abs(ref - e2_gpu).max() < 1e-8 + assert abs(ref - e2_gpu).max() < 1e-6 def test_partial_hess_elec(self): mf = scf.UHF(mol) @@ -54,15 +54,15 @@ def test_partial_hess_elec(self): mf.kernel() hobj = mf.Hessian() e1_cpu, ej_cpu, ek_cpu = uhf_cpu._partial_hess_ejk(hobj) + e2_cpu = ej_cpu - ek_cpu mf = mf.to_gpu() mf.kernel() hobj = mf.Hessian() - e1_gpu, ej_gpu, ek_gpu = uhf_gpu._partial_hess_ejk(hobj) + e1_gpu, e2_gpu = uhf_gpu._partial_hess_ejk(hobj) assert numpy.linalg.norm(e1_cpu - e1_gpu.get()) < 1e-5 - assert numpy.linalg.norm(ej_cpu - ej_gpu.get()) < 1e-5 - assert numpy.linalg.norm(ek_cpu - ek_gpu.get()) < 1e-5 + assert numpy.linalg.norm(e2_cpu - e2_gpu.get()) < 1e-5 def test_hessian_uhf_D3(self): print('----- testing UHF with D3BJ ------') @@ -72,7 +72,7 @@ def test_hessian_uhf_D3(self): mf.conv_tol_cpscf = 1e-8 ref = mf.Hessian().kernel() e2_gpu = mf.Hessian().to_gpu().kernel() - assert abs(ref - e2_gpu).max() < 1e-8 + assert abs(ref - e2_gpu).max() < 1e-6 if __name__ == "__main__": print("Full Tests for UHF Hessian") diff --git a/gpu4pyscf/hessian/tests/test_uks_hessian.py b/gpu4pyscf/hessian/tests/test_uks_hessian.py index 76beb1e8..d0f93714 100644 --- a/gpu4pyscf/hessian/tests/test_uks_hessian.py +++ b/gpu4pyscf/hessian/tests/test_uks_hessian.py @@ -110,12 +110,12 @@ def test_hessian_lda(self, disp=None): def test_hessian_gga(self): print('-----testing PBE Hessian----') mf = mol.UKS(xc='PBE').run() - _vs_cpu(mf) + _vs_cpu(mf, tol=1e-6) def test_hessian_hybrid(self): print('-----testing B3LYP Hessian----') mf = mol.UKS(xc='b3lyp').run() - _vs_cpu(mf) + _vs_cpu(mf, tol=1e-6) def test_hessian_mgga(self): print('-----testing M06 Hessian----') diff --git a/gpu4pyscf/hessian/uhf.py b/gpu4pyscf/hessian/uhf.py index 56e0acfc..81f26c17 100644 --- a/gpu4pyscf/hessian/uhf.py +++ b/gpu4pyscf/hessian/uhf.py @@ -23,14 +23,15 @@ ''' from functools import reduce +import numpy as np import cupy import cupy as cp from pyscf import lib +from pyscf.scf import ucphf # import _response_functions to load gen_response methods in SCF class from gpu4pyscf.scf import _response_functions # noqa -from gpu4pyscf.scf import ucphf from gpu4pyscf.gto.mole import sort_atoms -from gpu4pyscf.lib.cupy_helper import contract, tag_array, get_avail_mem +from gpu4pyscf.lib.cupy_helper import contract, tag_array, get_avail_mem, krylov from gpu4pyscf.lib import logger from gpu4pyscf.grad import rhf as rhf_grad from gpu4pyscf.hessian import rhf as rhf_hess_gpu @@ -51,6 +52,8 @@ def hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, if mo_energy is None: mo_energy = mf.mo_energy if mo_occ is None: mo_occ = mf.mo_occ if mo_coeff is None: mo_coeff = mf.mo_coeff + if atmlst is not None: + assert len(atmlst) == mol.natm mo_energy = cupy.asarray(mo_energy) mo_occ = cupy.asarray(mo_occ) @@ -60,73 +63,71 @@ def hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, t1 = log.timer_debug1('hess elec', *t1) if h1mo is None: h1mo = hessobj.make_h1(mo_coeff, mo_occ, None, atmlst, log) + if h1mo[0].size * 8 * 10 > get_avail_mem(): + # Reduce GPU memory footprint + h1mo = (h1mo[0].get(), h1mo[1].get()) t1 = log.timer_debug1('making H1', *t1) if mo1 is None or mo_e1 is None: mo1, mo_e1 = hessobj.solve_mo1(mo_energy, mo_coeff, mo_occ, h1mo, None, atmlst, max_memory, log) t1 = log.timer_debug1('solving MO1', *t1) - mo1a, mo1b = mo1 - mo_e1a, mo_e1b = mo_e1 - h1aoa, h1aob = h1mo + + mo1a = cupy.asarray(mo1[0]) + mo1b = cupy.asarray(mo1[1]) + de2 += contract('kxpi,lypi->klxy', cupy.asarray(h1mo[0]), mo1a) * 2 + de2 += contract('kxpi,lypi->klxy', cupy.asarray(h1mo[1]), mo1b) * 2 + mo1a = contract('kxai,pa->kxpi', mo1a, mo_coeff[0]) + mo1b = contract('kxai,pa->kxpi', mo1b, mo_coeff[1]) + + mo_e1a = cupy.asarray(mo_e1[0]) + mo_e1b = cupy.asarray(mo_e1[1]) nao, _ = mo_coeff[0].shape - mocca = cupy.array(mo_coeff[0][:,mo_occ[0]>0]) - moccb = cupy.array(mo_coeff[1][:,mo_occ[1]>0]) - mo_energy = cupy.array(mo_energy) + mocca = mo_coeff[0][:,mo_occ[0]>0] + moccb = mo_coeff[1][:,mo_occ[1]>0] mo_ea = mo_energy[0][mo_occ[0]>0] mo_eb = mo_energy[1][mo_occ[1]>0] - + mocca_e = mocca * mo_ea + moccb_e = moccb * mo_eb s1a = -mol.intor('int1e_ipovlp', comp=3) s1a = cupy.asarray(s1a) + aoslices = mol.aoslice_by_atom() - if atmlst is None: - atmlst = range(mol.natm) - for i0, ia in enumerate(atmlst): - shl0, shl1, p0, p1 = aoslices[ia] + for i0, (p0, p1) in enumerate(aoslices[:,2:]): s1ao = cupy.zeros((3,nao,nao)) s1ao[:,p0:p1] += s1a[:,p0:p1] s1ao[:,:,p0:p1] += s1a[:,p0:p1].transpose(0,2,1) tmp = contract('xpq,pi->xiq', s1ao, mocca) - s1ooa = contract('xiq,qj->xij', tmp, mocca) + s1oo = contract('xiq,qj->xij', tmp, mocca) + de2[i0] -= contract('xij,kyij->kxy', s1oo, mo_e1a) tmp = contract('xpq,pi->xiq', s1ao, moccb) - s1oob = contract('xiq,qj->xij', tmp, moccb) + s1oo = contract('xiq,qj->xij', tmp, moccb) + de2[i0] -= contract('xij,kyij->kxy', s1oo, mo_e1b) - #s1oo = cupy.einsum('xpq,pi,qj->xij', s1ao, mocc, mocc) - s1moa = contract('xij,ip->xpj', s1ao, mo_coeff[0]) - s1mob = contract('xij,ip->xpj', s1ao, mo_coeff[1]) - for j0 in range(i0+1): - ja = atmlst[j0] - q0, q1 = aoslices[ja][2:] -# *2 for double occupancy, *2 for +c.c. - #dm1 = cupy.einsum('ypi,qi->ypq', mo1[ja], mocc) - #de2_gpu[i0,j0] += cupy.einsum('xpq,ypq->xy', h1ao[ia], dm1) * 4 - de2[i0,j0] += contract('xpi,ypi->xy', h1aoa[ia], mo1a[ja]) * 2 - de2[i0,j0] += contract('xpi,ypi->xy', h1aob[ia], mo1b[ja]) * 2 - dm1a = contract('ypi,qi->ypq', mo1a[ja], mocca*mo_ea) - dm1b = contract('ypi,qi->ypq', mo1b[ja], moccb*mo_eb) - de2[i0,j0] -= contract('xpq,ypq->xy', s1moa, dm1a) * 2 - de2[i0,j0] -= contract('xpq,ypq->xy', s1mob, dm1b) * 2 - de2[i0,j0] -= contract('xpq,ypq->xy', s1ooa, mo_e1a[ja]) - de2[i0,j0] -= contract('xpq,ypq->xy', s1oob, mo_e1b[ja]) - for j0 in range(i0): - de2[j0,i0] = de2[i0,j0].T + s1mo = contract('xpq,qi->xpi', s1ao, mocca_e) + de2[i0] -= contract('xpi,kypi->kxy', s1mo, mo1a) * 2 - log.timer('UHF hessian', *time0) + s1mo = contract('xpq,qi->xpi', s1ao, moccb_e) + de2[i0] -= contract('xpi,kypi->kxy', s1mo, mo1b) * 2 + de2 = de2 + de2.transpose(1,0,3,2) + de2 *= .5 + log.timer('UHF hessian', *time0) return de2 def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None, max_memory=4000, verbose=None): '''Partial derivative ''' - e1, ej, ek = _partial_hess_ejk( + e1, ejk = _partial_hess_ejk( hessobj, mo_energy, mo_coeff, mo_occ, atmlst, max_memory, verbose, True) - return e1 + ej - ek # (A,B,dR_A,dR_B) + return e1 + ejk # (A,B,dR_A,dR_B) def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, - atmlst=None, max_memory=4000, verbose=None, with_k=True): + atmlst=None, max_memory=4000, verbose=None, + j_factor=1., k_factor=1.): log = logger.new_logger(hessobj, verbose) time0 = t1 = (logger.process_clock(), logger.perf_counter()) @@ -144,7 +145,8 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, dm0b = moccb.dot(moccb.T) dm0 = cp.asarray((dm0a, dm0b)) vhfopt = mf._opt_gpu.get(None, None) - ej, ek = rhf_hess_gpu._partial_ejk_ip2(mol, dm0, vhfopt, with_k, verbose=log) + ejk = rhf_hess_gpu._partial_ejk_ip2(mol, dm0, vhfopt, j_factor, k_factor, + verbose=log) t1 = log.timer_debug1('hessian of 2e part', *t1) # Energy weighted density matrix @@ -170,7 +172,7 @@ def _partial_hess_ejk(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, e1[j0,i0] = e1[i0,j0].T log.timer('UHF partial hessian', *time0) - return e1, ej, ek + return e1, ejk def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): assert atmlst is None @@ -234,7 +236,11 @@ def get_ovlp(mol): def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1mo, fx=None, atmlst=None, max_memory=4000, verbose=None, max_cycle=50, level_shift=0): - '''Solve the first order equation + '''Solve the CPHF equation for the first orbitals. + Note: These orbitals are represented in MO basis. This is different to the + solve_mo1 function in the PySCF CPU version, which transforms the mo1 to AO + basis. + Kwargs: fx : function(dm_mo) => v1_mo A function to generate the induced potential. @@ -242,72 +248,125 @@ def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1mo, ''' mol = mf.mol log = logger.new_logger(mf, verbose) + t0 = log.init_timer() + + occidxa = mo_occ[0] > 0 + occidxb = mo_occ[1] > 0 + viridxa = mo_occ[0] == 0 + viridxb = mo_occ[1] == 0 + mo_ea, mo_eb = mo_energy + ei_a = mo_ea[occidxa] + ei_b = mo_eb[occidxb] + ea_a = mo_ea[viridxa] + ea_b = mo_eb[viridxb] + eai_a = 1 / (ea_a[:,None] + level_shift - ei_a) + eai_b = 1 / (ea_b[:,None] + level_shift - ei_b) + nvira, nocca = eai_a.shape + nvirb, noccb = eai_b.shape + nocc = nocca + noccb nao, nmo = mo_coeff[0].shape - mocca = mo_coeff[0][:,mo_occ[0]>0] - moccb = mo_coeff[1][:,mo_occ[1]>0] - nocca = mocca.shape[1] - noccb = moccb.shape[1] + mocca = mo_coeff[0][:,occidxa] + moccb = mo_coeff[1][:,occidxb] + natm = mol.natm + if fx is None: fx = gen_vind(mf, mo_coeff, mo_occ) - s1a = -mol.intor('int1e_ipovlp', comp=3) - s1a = cupy.asarray(s1a) - def _ao2mo(mat, mo, mocc): - tmp = contract('xij,jo->xio', mat, mocc) - return contract('xik,ip->xpk', tmp, mo) - cupy.get_default_memory_pool().free_all_blocks() + def fvind_vo(mo1): + mo1 = mo1.reshape(-1,nmo*nocc) + v = fx(mo1).reshape(-1,nmo*nocc) + if level_shift != 0: + v -= mo1 * level_shift + v1a = v[:,:nmo*nocca].reshape(-1,nmo,nocca) + v1b = v[:,nmo*nocca:].reshape(-1,nmo,noccb) + v1a[:,viridxa] *= eai_a + v1b[:,viridxb] *= eai_b + v1a[:,occidxa] = 0 + v1b[:,occidxb] = 0 + return v.reshape(-1,nmo*nocc) + + ipovlp = -mol.intor('int1e_ipovlp', comp=3) + ipovlp = cp.asarray(ipovlp) + cp.get_default_memory_pool().free_all_blocks() avail_mem = get_avail_mem() - blksize = int(avail_mem*0.4) // (8*3*nao*nao*4) // ALIGNED * ALIGNED - blksize = min(8, blksize) + # *8 for spin-up/down input dm, vj, vk, and vxc + blksize = int(min(avail_mem*.3 / (8*3*nao*nao*8), + avail_mem*.6 / (8*nmo*nocc*natm*3*5))) + if blksize < ALIGNED**2: + raise RuntimeError('GPU memory insufficient') + + blksize = (blksize // ALIGNED**2) * ALIGNED**2 log.debug(f'GPU memory {avail_mem/GB:.1f} GB available') log.debug(f'{blksize} atoms in each block CPHF equation') - # sort atoms to improve the convergence - sorted_idx = sort_atoms(mol) - atom_groups = [] - for p0,p1 in lib.prange(0,mol.natm,blksize): - blk = sorted_idx[p0:p1] - atom_groups.append(blk) - - mo1sa = [None] * mol.natm - mo1sb = [None] * mol.natm - e1sa = [None] * mol.natm - e1sb = [None] * mol.natm + natm = mol.natm + h1moa, h1mob = h1mo + mo1sa = np.zeros(h1moa.shape) + mo1sb = np.zeros(h1mob.shape) + e1sa = np.zeros((natm, 3, nocca, nocca)) + e1sb = np.zeros((natm, 3, noccb, noccb)) aoslices = mol.aoslice_by_atom() - for group in atom_groups: - s1voa = [] - s1vob = [] - h1voa = [] - h1vob = [] - for ia in group: - shl0, shl1, p0, p1 = aoslices[ia] - s1ao = cupy.zeros((3,nao,nao)) - s1ao[:,p0:p1] += s1a[:,p0:p1] - s1ao[:,:,p0:p1] += s1a[:,p0:p1].transpose(0,2,1) - s1voa.append(_ao2mo(s1ao, mo_coeff[0], mocca)) - s1vob.append(_ao2mo(s1ao, mo_coeff[1], moccb)) - h1voa.append(h1mo[0][ia]) - h1vob.append(h1mo[1][ia]) - - log.info(f'Solving CPHF equation for atoms {len(group)}/{mol.natm}') - h1vo = (cupy.vstack(h1voa), cupy.vstack(h1vob)) - s1vo = (cupy.vstack(s1voa), cupy.vstack(s1vob)) - tol = mf.conv_tol_cpscf - mo1, e1 = ucphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, - max_cycle=max_cycle, level_shift=level_shift, tol=tol, verbose=verbose) - - mo1a = mo1[0].reshape(-1,3,nao,nocca) - mo1b = mo1[1].reshape(-1,3,nao,noccb) - e1a = e1[0].reshape(-1,3,nocca,nocca) - e1b = e1[1].reshape(-1,3,noccb,noccb) - for k, ia in enumerate(group): - mo1sa[ia] = mo1a[k] - mo1sb[ia] = mo1b[k] - e1sa[ia] = e1a[k].reshape(3,nocca,nocca) - e1sb[ia] = e1b[k].reshape(3,noccb,noccb) - mo1 = e1 = None + for i0, i1 in lib.prange(0, natm, blksize): + log.info('Solving CPHF equation for atoms [%d:%d]', i0, i1) + + h1a_blk = h1moa[i0:i1] + h1b_blk = h1mob[i0:i1] + if not isinstance(h1moa, cp.ndarray): + h1a_blk = cp.asarray(h1a_blk) + h1b_blk = cp.asarray(h1b_blk) + s1a_blk = cp.empty_like(h1a_blk) + s1b_blk = cp.empty_like(h1b_blk) + for k, (p0, p1) in enumerate(aoslices[i0:i1,2:]): + s1ao = cp.zeros((3,nao,nao)) + s1ao[:,p0:p1] += ipovlp[:,p0:p1] + s1ao[:,:,p0:p1] += ipovlp[:,p0:p1].transpose(0,2,1) + tmp = contract('xij,jo->xio', s1ao, mocca) + s1a_blk[k] = contract('xio,ip->xpo', tmp, mo_coeff[0]) + tmp = contract('xij,jo->xio', s1ao, moccb) + s1b_blk[k] = contract('xio,ip->xpo', tmp, mo_coeff[1]) + + mo1a = hs_a = h1a_blk - s1a_blk * ei_a + mo1b = hs_b = h1b_blk - s1b_blk * ei_b + mo_e1a = hs_a[:,:,occidxa] + mo_e1b = hs_b[:,:,occidxb] + mo1a[:,:,viridxa] *= -eai_a + mo1b[:,:,viridxb] *= -eai_b + mo1a[:,:,occidxa] = -s1a_blk[:,:,occidxa] * .5 + mo1b[:,:,occidxb] = -s1b_blk[:,:,occidxb] * .5 + nset = (i1 - i0) * 3 + mo1 = cp.hstack((mo1a.reshape(nset,-1), mo1b.reshape(nset,-1))) + hs_a = hs_b = h1a_blk = h1b_blk = s1a_blk = s1b_blk = None + + tol = mf.conv_tol_cpscf * (i1 - i0) + raw_mo1 = krylov(fvind_vo, mo1.reshape(-1,nmo*nocc), + tol=tol, max_cycle=max_cycle, verbose=log) + raw_mo1a = mo1[:,:nmo*nocca].reshape(i1-i0,3,nmo,nocca) + raw_mo1b = mo1[:,nmo*nocca:].reshape(i1-i0,3,nmo,noccb) + + # The occ-occ block of mo1 is non-canonical + raw_mo1a[:,:,occidxa] = mo1a[:,:,occidxa] + raw_mo1b[:,:,occidxb] = mo1b[:,:,occidxb] + + v1 = fx(raw_mo1) + v1a = v1[:,:nmo*nocca].reshape(i1-i0,3,nmo,nocca) + v1b = v1[:,nmo*nocca:].reshape(i1-i0,3,nmo,noccb) + mo1a[:,:,viridxa] -= v1a[:,:,viridxa] * eai_a + mo1b[:,:,viridxb] -= v1b[:,:,viridxb] * eai_b + mo_e1a += v1a[:,:,occidxa] + mo_e1b += v1b[:,:,occidxb] + mo_e1a += mo1a[:,:,occidxa] * (ei_a[:,None] - ei_a) + mo_e1b += mo1b[:,:,occidxb] * (ei_b[:,None] - ei_b) + + mo1sa[i0:i1] = mo1a.get() + mo1sb[i0:i1] = mo1b.get() + e1sa[i0:i1] = mo_e1a.get() + e1sb[i0:i1] = mo_e1b.get() + mo1a = mo1b = mo1 = mo_e1a = mo_e1b = None + raw_mo1a = raw_mo1b = raw_mo1 = None + v1a = v1b = v1 = None + log.timer('CPHF solver', *t0) return (mo1sa, mo1sb), (e1sa, e1sb) def gen_vind(mf, mo_coeff, mo_occ): @@ -321,6 +380,8 @@ def gen_vind(mf, mo_coeff, mo_occ): nocca = mocca.shape[1] noccb = moccb.shape[1] grids = getattr(mf, 'cphf_grids', None) + if grids is not None: + logger.info(mf, 'Secondary grids defined for CPHF in Hessian') vresp = mf.gen_response(mo_coeff, mo_occ, hermi=1, grids=grids) def fx(mo1): diff --git a/gpu4pyscf/hessian/uks.py b/gpu4pyscf/hessian/uks.py index d42c8f76..a363706a 100644 --- a/gpu4pyscf/hessian/uks.py +++ b/gpu4pyscf/hessian/uks.py @@ -50,20 +50,38 @@ def partial_hess_elec(hessobj, mo_energy=None, mo_coeff=None, mo_occ=None, if mf.nlc != '': raise NotImplementedError + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(mf.xc, spin=mol.spin) with_k = ni.libxc.is_hybrid_xc(mf.xc) - de2, ej, ek = uhf_hess._partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, - atmlst, max_memory, verbose, - with_k=with_k) - de2 += ej # (A,B,dR_A,dR_B) + j_factor = 1. + k_factor = 0. if with_k: - de2 -= hyb * ek - - if abs(omega) > 1e-10 and abs(alpha-hyb) > 1e-10: + if omega == 0: + k_factor = hyb + elif alpha == 0: # LR=0, only SR exchange + pass + elif hyb == 0: # SR=0, only LR exchange + k_factor = alpha + else: # SR and LR exchange with different ratios + k_factor = alpha + de2, ejk = uhf_hess._partial_hess_ejk(hessobj, mo_energy, mo_coeff, mo_occ, + atmlst, max_memory, verbose, + j_factor, k_factor) + de2 += ejk # (A,B,dR_A,dR_B) + if with_k and omega != 0: + j_factor = 0. + omega = -omega # Prefer computing the SR part + if alpha == 0: # LR=0, only SR exchange + k_factor = hyb + elif hyb == 0: # SR=0, only LR exchange + # full range exchange was computed in the previous step + k_factor = -alpha + else: # SR and LR exchange with different ratios + k_factor = hyb - alpha # =beta vhfopt = mf._opt_gpu.get(omega, None) with mol.with_range_coulomb(omega): - ek_lr = rhf_hess._partial_ejk_ip2(mol, dm0, vhfopt, verbose=verbose)[1] - de2 -= (alpha-hyb) * ek_lr + de2 += rhf_hess._partial_ejk_ip2( + mol, dm0, vhfopt, j_factor, k_factor, verbose=verbose) mem_now = lib.current_memory()[0] max_memory = max(2000, mf.max_memory*.9-mem_now) diff --git a/gpu4pyscf/lib/cupy_helper.py b/gpu4pyscf/lib/cupy_helper.py index 7def14d6..c4523558 100644 --- a/gpu4pyscf/lib/cupy_helper.py +++ b/gpu4pyscf/lib/cupy_helper.py @@ -583,7 +583,7 @@ def krylov(aop, b, x0=None, tol=1e-10, max_cycle=30, dot=cupy.dot, callback function takes one dict as the argument which is generated by the builtin function :func:`locals`, so that the callback function can access all local variables in the current - envrionment. + environment. Returns: x : ndarray like b ''' @@ -653,6 +653,7 @@ def krylov(aop, b, x0=None, tol=1e-10, max_cycle=30, dot=cupy.dot, else: raise RuntimeError('Krylov solver failed to converge') + log.info(f'krylov space size {len(xs)}') xs = cupy.asarray(xs) ax = cupy.asarray(ax) nd = xs.shape[0] diff --git a/gpu4pyscf/lib/gvhf-rys/CMakeLists.txt b/gpu4pyscf/lib/gvhf-rys/CMakeLists.txt index f40ad488..791f9b4e 100644 --- a/gpu4pyscf/lib/gvhf-rys/CMakeLists.txt +++ b/gpu4pyscf/lib/gvhf-rys/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(gvhf_rys SHARED rys_contract_j.cu cart2xyz.c unrolled_rys_j.cu count_tasks.cu rys_contract_jk_ip1.cu unrolled_rys_ip1.cu unrolled_ejk_ip1.cu - rys_contract_jk_ip2.cu unrolled_ejk_ip2.cu + rys_contract_jk_ip2.cu unrolled_ejk_ip2_type12.cu unrolled_ejk_ip2_type3.cu ) #option(BUILD_SHARED_LIBS "build shared libraries" 1) diff --git a/gpu4pyscf/lib/gvhf-rys/create_tasks.cu b/gpu4pyscf/lib/gvhf-rys/create_tasks.cu index aec96997..2323f867 100644 --- a/gpu4pyscf/lib/gvhf-rys/create_tasks.cu +++ b/gpu4pyscf/lib/gvhf-rys/create_tasks.cu @@ -6,6 +6,7 @@ #include "vhf.cuh" +// 8-fold symmery __device__ static int _fill_jk_tasks(ShellQuartet *shl_quartet_idx, RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, @@ -168,6 +169,7 @@ static int _fill_jk_tasks(ShellQuartet *shl_quartet_idx, return ntasks; } +// 8-fold symmery __device__ static int _fill_sr_jk_tasks(ShellQuartet *shl_quartet_idx, RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, diff --git a/gpu4pyscf/lib/gvhf-rys/create_tasks_ip1.cu b/gpu4pyscf/lib/gvhf-rys/create_tasks_ip1.cu index 02ed4c32..0ea679d2 100644 --- a/gpu4pyscf/lib/gvhf-rys/create_tasks_ip1.cu +++ b/gpu4pyscf/lib/gvhf-rys/create_tasks_ip1.cu @@ -6,9 +6,10 @@ #include "vhf.cuh" +// 8-fold symmery __device__ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, - RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, + RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, int batch_ij, int batch_kl) { int nbas = envs.nbas; @@ -35,8 +36,8 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, int jsh0 = tile_j * TILE; int ish1 = ish0 + TILE; int jsh1 = jsh0 + TILE; - int do_j = jk.vj != NULL; - int do_k = jk.vk != NULL; + int do_j = jk.j_factor != NULL; + int do_k = jk.k_factor != NULL; int count = 0; float tile_q_ij = tile_q_cond[tile_ij]; @@ -53,9 +54,9 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, int lsh1 = lsh0 + TILE; for (int ish = ish0; ish < ish1; ++ish) { for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { - float q_ij = q_cond [ish*nbas+jsh]; - float d_ij = dm_cond[ish*nbas+jsh]; int bas_ij = ish * nbas + jsh; + float q_ij = q_cond [bas_ij]; + float d_ij = dm_cond[bas_ij]; for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { float d_ik = dm_cond[ish*nbas+ksh]; float d_jk = dm_cond[jsh*nbas+ksh]; @@ -64,14 +65,14 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, if (bas_ij < bas_kl) { continue; } - float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + float q_ijkl = q_ij + q_cond[bas_kl]; if (q_ijkl < cutoff) { continue; } float d_cutoff = cutoff - q_ijkl; if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || - (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { ++count; } } @@ -128,9 +129,9 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, ShellQuartet sq; for (int ish = ish0; ish < ish1; ++ish) { for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { - float q_ij = q_cond [ish*nbas+jsh]; - float d_ij = dm_cond[ish*nbas+jsh]; int bas_ij = ish * nbas + jsh; + float q_ij = q_cond [bas_ij]; + float d_ij = dm_cond[bas_ij]; sq.i = ish; sq.j = jsh; for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { @@ -141,14 +142,14 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, if (bas_ij < bas_kl) { continue; } - float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + float q_ijkl = q_ij + q_cond[bas_kl]; if (q_ijkl < cutoff) { continue; } float d_cutoff = cutoff - q_ijkl; if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || - (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { sq.k = ksh; sq.l = lsh; shl_quartet_idx[offset] = sq; @@ -162,6 +163,295 @@ static int _fill_ejk_tasks(ShellQuartet *shl_quartet_idx, return ntasks; } +__device__ +static int _fill_sr_ejk_tasks(ShellQuartet *shl_quartet_idx, + RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + int batch_ij, int batch_kl) +{ + int nbas = envs.nbas; + int *tile_ij_mapping = bounds.tile_ij_mapping; + int *tile_kl_mapping = bounds.tile_kl_mapping; + float *q_cond = bounds.q_cond; +#if TILE == 1 + float *tile_q_cond = q_cond; +#else + float *tile_q_cond = q_cond + nbas*nbas; +#endif + int nbas_tiles = nbas / TILE; + // TODO: implement q_ijij_cond + float *s_estimator = tile_q_cond + nbas_tiles*nbas_tiles; + float *dm_cond = bounds.dm_cond; + float cutoff = bounds.cutoff; + int t_id = threadIdx.y * blockDim.x + threadIdx.x; + int t_kl0 = batch_kl * TILES_IN_BATCH; + int t_kl1 = MIN(t_kl0 + TILES_IN_BATCH, bounds.ntile_kl_pairs); + int threads = blockDim.x * blockDim.y; + + int tile_ij = tile_ij_mapping[batch_ij]; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + int ish1 = ish0 + TILE; + int jsh1 = jsh0 + TILE; + int do_j = jk.j_factor != NULL; + int do_k = jk.k_factor != NULL; + + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *bas = envs.bas; + double *env = envs.env; + float omega = env[PTR_RANGE_OMEGA]; + float omega2 = omega * omega; + + int count = 0; + float tile_q_ij = tile_q_cond[tile_ij]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + float ai = expi[iprim-1]; + float aj = expj[jprim-1]; + float aij = ai + aj; + float aj_aij = aj / aij; + float xi = ri[0]; + float yi = ri[1]; + float zi = ri[2]; + float xj = rj[0]; + float yj = rj[1]; + float zj = rj[2]; + float xjxi = xj - xi; + float yjyi = yj - yi; + float zjzi = zj - zi; + float xpa = xjxi * aj_aij; + float ypa = yjyi * aj_aij; + float zpa = zjzi * aj_aij; + float xij = xi + xpa; + float yij = yi + ypa; + float zij = zi + zpa; + int bas_ij = ish * nbas + jsh; + float q_ij = q_cond [bas_ij]; + float d_ij = dm_cond[bas_ij]; + float skl_cutoff = cutoff - s_estimator[bas_ij]; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[bas_kl]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + float ak = expk[kprim-1]; + float al = expl[lprim-1]; + float akl = ak + al; + float al_akl = al / akl; + float xk = rk[0]; + float yk = rk[1]; + float zk = rk[2]; + float xl = rl[0]; + float yl = rl[1]; + float zl = rl[2]; + float xlxk = xl - xk; + float ylyk = yl - yk; + float zlzk = zl - zk; + float xqc = xlxk * al_akl; + float yqc = ylyk * al_akl; + float zqc = zlzk * al_akl; + float xkl = xk + xqc; + float ykl = yk + yqc; + float zkl = zk + zqc; + float theta = 1./(1./aij+1./akl+1./omega2); + float xpq = xij - xkl; + float ypq = yij - ykl; + float zpq = zij - zkl; + float rr = xpq*xpq + ypq*ypq + zpq*zpq; + float theta_rr = logf(rr + 1e-30f) + theta * rr; + d_cutoff = skl_cutoff - s_estimator[bas_kl] + theta_rr; + if (d_cutoff > 0) { + continue; + } + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { + ++count; + } + } + } + } + } + } + } + + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + extern __shared__ int thread_offsets[]; + thread_offsets[t_id] = count; + // Up-sweep phase + for (int stride = 1; stride < threads; stride *= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + thread_offsets[index] += thread_offsets[index-stride]; + } + } + __syncthreads(); + if (t_id == threads-1) { thread_offsets[threads-1] = 0; } + // Down-sweep phase + for (int stride = threads/2; stride > 0; stride /= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + int temp = thread_offsets[index - stride]; + thread_offsets[index - stride] = thread_offsets[index]; + thread_offsets[index] += temp; + } + } + __syncthreads(); + __shared__ int ntasks; + if (t_id == threads-1) { + ntasks = thread_offsets[threads-1] + count; + } + __syncthreads(); + if (ntasks == 0) { + return ntasks; + } + + int offset = thread_offsets[t_id]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + ShellQuartet sq; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + float ai = expi[iprim-1]; + float aj = expj[jprim-1]; + float aij = ai + aj; + float aj_aij = aj / aij; + float xi = ri[0]; + float yi = ri[1]; + float zi = ri[2]; + float xj = rj[0]; + float yj = rj[1]; + float zj = rj[2]; + float xjxi = xj - xi; + float yjyi = yj - yi; + float zjzi = zj - zi; + float xpa = xjxi * aj_aij; + float ypa = yjyi * aj_aij; + float zpa = zjzi * aj_aij; + float xij = xi + xpa; + float yij = yi + ypa; + float zij = zi + zpa; + int bas_ij = ish * nbas + jsh; + float q_ij = q_cond [bas_ij]; + float d_ij = dm_cond[bas_ij]; + float skl_cutoff = cutoff - s_estimator[bas_ij]; + sq.i = ish; + sq.j = jsh; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + float ak = expk[kprim-1]; + float al = expl[lprim-1]; + float akl = ak + al; + float al_akl = al / akl; + float xk = rk[0]; + float yk = rk[1]; + float zk = rk[2]; + float xl = rl[0]; + float yl = rl[1]; + float zl = rl[2]; + float xlxk = xl - xk; + float ylyk = yl - yk; + float zlzk = zl - zk; + float xqc = xlxk * al_akl; + float yqc = ylyk * al_akl; + float zqc = zlzk * al_akl; + float xkl = xk + xqc; + float ykl = yk + yqc; + float zkl = zk + zqc; + float theta = 1./(1./aij+1./akl+1./omega2); + float xpq = xij - xkl; + float ypq = yij - ykl; + float zpq = zij - zkl; + float rr = xpq*xpq + ypq*ypq + zpq*zpq; + float theta_rr = logf(rr + 1e-30f) + theta * rr; + d_cutoff = skl_cutoff - s_estimator[bas_kl] + theta_rr; + if (d_cutoff > 0) { + continue; + } + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[bas_kl] > d_cutoff)) { + sq.k = ksh; + sq.l = lsh; + shl_quartet_idx[offset] = sq; + ++offset; + } + } + } + } + } + } + } + return ntasks; +} + __device__ static int _fill_jk_tasks_s2kl(ShellQuartet *shl_quartet_idx, RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, @@ -264,3 +554,160 @@ static int _fill_jk_tasks_s2kl(ShellQuartet *shl_quartet_idx, } return ntasks; } + +__device__ +static int _fill_ejk_tasks_tmp(ShellQuartet *shl_quartet_idx, + RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, + int batch_ij, int batch_kl) +{ + int nbas = envs.nbas; + int *tile_ij_mapping = bounds.tile_ij_mapping; + int *tile_kl_mapping = bounds.tile_kl_mapping; + float *q_cond = bounds.q_cond; +#if TILE == 1 + float *tile_q_cond = q_cond; +#else + float *tile_q_cond = q_cond + nbas*nbas; +#endif + float *dm_cond = bounds.dm_cond; + float cutoff = bounds.cutoff; + int t_id = threadIdx.y * blockDim.x + threadIdx.x; + int t_kl0 = batch_kl * TILES_IN_BATCH; + int t_kl1 = MIN(t_kl0 + TILES_IN_BATCH, bounds.ntile_kl_pairs); + int threads = blockDim.x * blockDim.y; + + int tile_ij = tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + int ish1 = ish0 + TILE; + int jsh1 = jsh0 + TILE; + int do_j = jk.vj != NULL; + int do_k = jk.vk != NULL; + + int count = 0; + float tile_q_ij = tile_q_cond[tile_ij]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + int bas_ij = ish * nbas + jsh; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + ++count; + } + } + } + } + } + } + + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + extern __shared__ int thread_offsets[]; + thread_offsets[t_id] = count; + // Up-sweep phase + for (int stride = 1; stride < threads; stride *= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + thread_offsets[index] += thread_offsets[index-stride]; + } + } + __syncthreads(); + if (t_id == threads-1) { thread_offsets[threads-1] = 0; } + // Down-sweep phase + for (int stride = threads/2; stride > 0; stride /= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + int temp = thread_offsets[index - stride]; + thread_offsets[index - stride] = thread_offsets[index]; + thread_offsets[index] += temp; + } + } + __syncthreads(); + __shared__ int ntasks; + if (t_id == threads-1) { + ntasks = thread_offsets[threads-1] + count; + } + __syncthreads(); + if (ntasks == 0) { + return ntasks; + } + + int offset = thread_offsets[t_id]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + ShellQuartet sq; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + int bas_ij = ish * nbas + jsh; + sq.i = ish; + sq.j = jsh; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + sq.k = ksh; + sq.l = lsh; + shl_quartet_idx[offset] = sq; + ++offset; + } + } + } + } + } + } + return ntasks; +} + diff --git a/gpu4pyscf/lib/gvhf-rys/create_tasks_ip2.cu b/gpu4pyscf/lib/gvhf-rys/create_tasks_ip2.cu new file mode 100644 index 00000000..a63e512d --- /dev/null +++ b/gpu4pyscf/lib/gvhf-rys/create_tasks_ip2.cu @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include + +#include "vhf.cuh" + +__device__ +static int _fill_ejk_ip2_type2_tasks(ShellQuartet *shl_quartet_idx, + RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, + int batch_ij, int batch_kl) +{ + int nbas = envs.nbas; + int *tile_ij_mapping = bounds.tile_ij_mapping; + int *tile_kl_mapping = bounds.tile_kl_mapping; + float *q_cond = bounds.q_cond; +#if TILE == 1 + float *tile_q_cond = q_cond; +#else + float *tile_q_cond = q_cond + nbas*nbas; +#endif + float *dm_cond = bounds.dm_cond; + float cutoff = bounds.cutoff; + int t_id = threadIdx.y * blockDim.x + threadIdx.x; + int t_kl0 = batch_kl * TILES_IN_BATCH; + int t_kl1 = MIN(t_kl0 + TILES_IN_BATCH, bounds.ntile_kl_pairs); + int threads = blockDim.x * blockDim.y; + + int tile_ij = tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + int ish1 = ish0 + TILE; + int jsh1 = jsh0 + TILE; + int do_j = jk.vj != NULL; + int do_k = jk.vk != NULL; + + int count = 0; + float tile_q_ij = tile_q_cond[tile_ij]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + for (int ksh = ksh0; ksh < ksh1; ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + ++count; + } + } + } + } + } + } + + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + extern __shared__ int thread_offsets[]; + thread_offsets[t_id] = count; + // Up-sweep phase + for (int stride = 1; stride < threads; stride *= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + thread_offsets[index] += thread_offsets[index-stride]; + } + } + __syncthreads(); + if (t_id == threads-1) { thread_offsets[threads-1] = 0; } + // Down-sweep phase + for (int stride = threads/2; stride > 0; stride /= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + int temp = thread_offsets[index - stride]; + thread_offsets[index - stride] = thread_offsets[index]; + thread_offsets[index] += temp; + } + } + __syncthreads(); + __shared__ int ntasks; + if (t_id == threads-1) { + ntasks = thread_offsets[threads-1] + count; + } + __syncthreads(); + if (ntasks == 0) { + return ntasks; + } + + int offset = thread_offsets[t_id]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + ShellQuartet sq; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < MIN(ish+1, jsh1); ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + sq.i = ish; + sq.j = jsh; + for (int ksh = ksh0; ksh < ksh1; ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < MIN(ksh+1, lsh1); ++lsh) { + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + sq.k = ksh; + sq.l = lsh; + shl_quartet_idx[offset] = sq; + ++offset; + } + } + } + } + } + } + return ntasks; +} + +__device__ +static int _fill_ejk_ip2_type3_tasks(ShellQuartet *shl_quartet_idx, + RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, + int batch_ij, int batch_kl) +{ + int nbas = envs.nbas; + int *tile_ij_mapping = bounds.tile_ij_mapping; + int *tile_kl_mapping = bounds.tile_kl_mapping; + float *q_cond = bounds.q_cond; +#if TILE == 1 + float *tile_q_cond = q_cond; +#else + float *tile_q_cond = q_cond + nbas*nbas; +#endif + float *dm_cond = bounds.dm_cond; + float cutoff = bounds.cutoff; + int t_id = threadIdx.y * blockDim.x + threadIdx.x; + int t_kl0 = batch_kl * TILES_IN_BATCH; + int t_kl1 = MIN(t_kl0 + TILES_IN_BATCH, bounds.ntile_kl_pairs); + int threads = blockDim.x * blockDim.y; + + int tile_ij = tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + int ish1 = ish0 + TILE; + int jsh1 = jsh0 + TILE; + int do_j = jk.vj != NULL; + int do_k = jk.vk != NULL; + + int count = 0; + float tile_q_ij = tile_q_cond[tile_ij]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < jsh1; ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + int bas_ij = ish * nbas + jsh; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < lsh1; ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + ++count; + } + } + } + } + } + } + + // https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + extern __shared__ int thread_offsets[]; + thread_offsets[t_id] = count; + // Up-sweep phase + for (int stride = 1; stride < threads; stride *= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + thread_offsets[index] += thread_offsets[index-stride]; + } + } + __syncthreads(); + if (t_id == threads-1) { thread_offsets[threads-1] = 0; } + // Down-sweep phase + for (int stride = threads/2; stride > 0; stride /= 2) { + __syncthreads(); + int index = (t_id + 1) * stride * 2 - 1; + if (index < threads) { + int temp = thread_offsets[index - stride]; + thread_offsets[index - stride] = thread_offsets[index]; + thread_offsets[index] += temp; + } + } + __syncthreads(); + __shared__ int ntasks; + if (t_id == threads-1) { + ntasks = thread_offsets[threads-1] + count; + } + __syncthreads(); + if (ntasks == 0) { + return ntasks; + } + + int offset = thread_offsets[t_id]; + for (int t_kl_id = t_kl0+t_id; t_kl_id < t_kl1; t_kl_id += threads) { + int tile_kl = tile_kl_mapping[t_kl_id]; + if (tile_q_ij + tile_q_cond[tile_kl] < cutoff) { + break; + } + int tile_k = tile_kl / nbas_tiles; + int tile_l = tile_kl % nbas_tiles; + int ksh0 = tile_k * TILE; + int lsh0 = tile_l * TILE; + int ksh1 = ksh0 + TILE; + int lsh1 = lsh0 + TILE; + ShellQuartet sq; + for (int ish = ish0; ish < ish1; ++ish) { + for (int jsh = jsh0; jsh < jsh1; ++jsh) { + float q_ij = q_cond [ish*nbas+jsh]; + float d_ij = dm_cond[ish*nbas+jsh]; + int bas_ij = ish * nbas + jsh; + sq.i = ish; + sq.j = jsh; + for (int ksh = ksh0; ksh < MIN(ish+1, ksh1); ++ksh) { + float d_ik = dm_cond[ish*nbas+ksh]; + float d_jk = dm_cond[jsh*nbas+ksh]; + for (int lsh = lsh0; lsh < lsh1; ++lsh) { + int bas_kl = ksh * nbas + lsh; + if (bas_ij < bas_kl) { + continue; + } + float q_ijkl = q_ij + q_cond[ksh*nbas+lsh]; + if (q_ijkl < cutoff) { + continue; + } + float d_cutoff = cutoff - q_ijkl; + if ((do_k && (d_ik+dm_cond[jsh*nbas+lsh] > d_cutoff || + d_jk+dm_cond[ish*nbas+lsh] > d_cutoff)) || + (do_j && d_ij+dm_cond[ksh*nbas+lsh] > d_cutoff)) { + sq.k = ksh; + sq.l = lsh; + shl_quartet_idx[offset] = sq; + ++offset; + } + } + } + } + } + } + return ntasks; +} diff --git a/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip1.cu b/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip1.cu index 78f09701..f153c1b4 100644 --- a/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip1.cu +++ b/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip1.cu @@ -7,7 +7,6 @@ #include "vhf.cuh" #include "rys_roots.cu" #include "create_tasks_ip1.cu" -#include "create_tasks.cu" #define GWIDTH_IP1 18 @@ -56,7 +55,7 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound extern __shared__ double rw[]; double *g = rw + nsq_per_block * nroots*2; double *Rpa_cicj = g + nsq_per_block * g_size*3; - double Rqc[3]; + double Rqc[3], Rpq[3]; double goutx[GWIDTH_IP1]; double gouty[GWIDTH_IP1]; double goutz[GWIDTH_IP1]; @@ -88,7 +87,7 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound Rpa[0] = xpa; Rpa[1] = ypa; Rpa[2] = zpa; - double theta_ij = ai * aj / aij; + double theta_ij = ai * aj_aij; double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); Rpa[3] = ci[ip] * cj[jp] * Kab; Rpa[4] = ri[0] + xpa; @@ -167,6 +166,9 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound double xpq = xij - xkl; double ypq = yij - ykl; double zpq = zij - zkl; + Rpq[0] = xpq; + Rpq[1] = ypq; + Rpq[2] = zpq; __syncthreads(); double aij = Rpa[10]; if (gout_id == 0) { @@ -178,12 +180,23 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound double theta_rr = theta * rr; if (omega == 0) { rys_roots(nroots, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(nroots, theta_fac*theta_rr, rw); __syncthreads(); double sqrt_theta_fac = sqrt(theta_fac); - for (int irys = 0; irys < nroots; ++irys) { + for (int irys = gout_id; irys < nroots; irys+=gout_stride) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } else { + int _nroots = nroots/2; + rys_roots(_nroots, theta_rr, rw+nroots*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(_nroots, theta_fac*theta_rr, rw); + __syncthreads(); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = gout_id; irys < _nroots; irys+=gout_stride) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; } @@ -207,10 +220,7 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound // gx(0,n+1) = c0*gx(0,n) + n*b10*gx(0,n-1) for (int n = gout_id; n < 3; n += gout_stride) { double *_gx = g + n * g_size * nsq_per_block; - double xij = rij[n]; - double xkl = rk[n] + Rqc[n]; - double xpq = xij - xkl; - double c0x = Rpa[n] - rt_aij * xpq; + double c0x = Rpa[n] - rt_aij * Rpq[n]; s0x = _gx[sq_id]; s1x = c0x * s0x; _gx[sq_id + nsq_per_block] = s1x; @@ -229,10 +239,7 @@ static void rys_jk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bound int i = n / 3; //for i in range(lij+1): int _ix = n % 3; double *_gx = g + (i + _ix * g_size) * nsq_per_block; - double xij = rij[_ix]; - double xkl = rk[_ix] + Rqc[_ix]; - double xpq = xij - xkl; - double cpx = Rqc[_ix] + rt_akl * xpq; + double cpx = Rqc[_ix] + rt_akl * Rpq[_ix]; //for i in range(lij+1): // trr(i,1) = c0p * trr(i,0) + i*b00 * trr(i-1,0) if (n < lij3) { @@ -448,7 +455,7 @@ void rys_jk_ip1_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ -static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +static void rys_ejk_ip1_general(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks) { // sq is short for shl_quartet @@ -498,11 +505,10 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int nbas = envs.nbas; int nao = ao_loc[nbas]; double *env = envs.env; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; - double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != NULL; + int do_k = jk.k_factor != NULL; + double *dm = jk.dm; extern __shared__ double rw[]; double *g = rw + nsq_per_block * nroots*2; double *Rpa_cicj = g + nsq_per_block * g_size*3; @@ -545,30 +551,18 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_ix = 0; - double vj_iy = 0; - double vj_iz = 0; - double vj_jx = 0; - double vj_jy = 0; - double vj_jz = 0; - double vj_kx = 0; - double vj_ky = 0; - double vj_kz = 0; - double vj_lx = 0; - double vj_ly = 0; - double vj_lz = 0; - double vk_ix = 0; - double vk_iy = 0; - double vk_iz = 0; - double vk_jx = 0; - double vk_jy = 0; - double vk_jz = 0; - double vk_kx = 0; - double vk_ky = 0; - double vk_kz = 0; - double vk_lx = 0; - double vk_ly = 0; - double vk_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; for (int ij = gout_id; ij < iprim*jprim; ij += gout_stride) { int ip = ij / jprim; int jp = ij % jprim; @@ -583,7 +577,7 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun Rpa[sq_id+0*nsq_per_block] = xjxi * aj_aij; Rpa[sq_id+1*nsq_per_block] = yjyi * aj_aij; Rpa[sq_id+2*nsq_per_block] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; + double theta_ij = ai * aj_aij; double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); Rpa[sq_id+3*nsq_per_block] = fac_sym * ci[ip] * cj[jp] * Kab; } @@ -641,7 +635,7 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double theta_rr = theta * rr; if (omega == 0) { rys_roots(nroots, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(nroots, theta_fac*theta_rr, rw); __syncthreads(); @@ -650,6 +644,17 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; } + } else { + int _nroots = nroots/2; + rys_roots(_nroots, theta_rr, rw+nroots*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(_nroots, theta_fac*theta_rr, rw); + __syncthreads(); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = gout_id; irys < _nroots; irys+=gout_stride) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } double s0x, s1x, s2x; for (int irys = 0; irys < nroots; ++irys) { @@ -789,41 +794,49 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int _j = j + j0; int _k = k + k0; int _l = l + l0; - int _jl = _j*nao+_l; - int _jk = _j*nao+_k; - int _il = _i*nao+_l; - int _ik = _i*nao+_k; - int _ji = _j*nao+_i; - int _lk = _l*nao+_k; - double dd_jk = dm[_jk] * dm[_il]; - double dd_jl = dm[_jl] * dm[_ik]; - double dd_k = dd_jk + dd_jl; - double dd_j = dm[_ji] * dm[_lk]; - if (jk.n_dm > 1) { - int nao2 = nao * nao; - double dd_jk = dm[nao2+_jk] * dm[nao2+_il]; - double dd_jl = dm[nao2+_jl] * dm[nao2+_ik]; - dd_k += dd_jk + dd_jl; - dd_j = (dm[_ji] + dm[nao2+_ji]) * (dm[_lk] + dm[nao2+_lk]); + double dd = 0.; + if (do_k) { + int _jl = _j*nao+_l; + int _jk = _j*nao+_k; + int _il = _i*nao+_l; + int _ik = _i*nao+_k; + dd = dm[_jk] * dm[_il]; + dd += dm[_jl] * dm[_ik]; + if (jk.n_dm > 1) { + int nao2 = nao * nao; + dd += dm[nao2+_jk] * dm[nao2+_il]; + dd += dm[nao2+_jl] * dm[nao2+_ik]; + } + dd *= jk.k_factor; + } + if (do_j) { + int _ji = _j*nao+_i; + int _lk = _l*nao+_k; + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[_ji] * dm[_lk]; + } else { + int nao2 = nao * nao; + dd += jk.j_factor * (dm[_ji] + dm[nao2+_ji]) * (dm[_lk] + dm[nao2+_lk]); + } } int addrx = sq_id + (ix + jx*stride_j + kx*stride_k + lx*stride_l) * nsq_per_block; int addry = sq_id + (iy + jy*stride_j + ky*stride_k + ly*stride_l) * nsq_per_block; int addrz = sq_id + (iz + jz*stride_j + kz*stride_k + lz*stride_l) * nsq_per_block; - double prod_xy = gx[addrx] * gy[addry]; - double prod_xz = gx[addrx] * gz[addrz]; - double prod_yz = gy[addry] * gz[addrz]; - double fix = ai2 * gx[addrx+i_1]; if (ix > 0) { fix -= ix * gx[addrx-i_1]; } fix *= prod_yz; vk_ix += fix * dd_k; vj_ix += fix * dd_j; - double fiy = ai2 * gy[addry+i_1]; if (iy > 0) { fiy -= iy * gy[addry-i_1]; } fiy *= prod_xz; vk_iy += fiy * dd_k; vj_iy += fiy * dd_j; - double fiz = ai2 * gz[addrz+i_1]; if (iz > 0) { fiz -= iz * gz[addrz-i_1]; } fiz *= prod_xy; vk_iz += fiz * dd_k; vj_iz += fiz * dd_j; - double fjx = aj2 * gx[addrx+j_1]; if (jx > 0) { fjx -= jx * gx[addrx-j_1]; } fjx *= prod_yz; vk_jx += fjx * dd_k; vj_jx += fjx * dd_j; - double fjy = aj2 * gy[addry+j_1]; if (jy > 0) { fjy -= jy * gy[addry-j_1]; } fjy *= prod_xz; vk_jy += fjy * dd_k; vj_jy += fjy * dd_j; - double fjz = aj2 * gz[addrz+j_1]; if (jz > 0) { fjz -= jz * gz[addrz-j_1]; } fjz *= prod_xy; vk_jz += fjz * dd_k; vj_jz += fjz * dd_j; - double fkx = ak2 * gx[addrx+k_1]; if (kx > 0) { fkx -= kx * gx[addrx-k_1]; } fkx *= prod_yz; vk_kx += fkx * dd_k; vj_kx += fkx * dd_j; - double fky = ak2 * gy[addry+k_1]; if (ky > 0) { fky -= ky * gy[addry-k_1]; } fky *= prod_xz; vk_ky += fky * dd_k; vj_ky += fky * dd_j; - double fkz = ak2 * gz[addrz+k_1]; if (kz > 0) { fkz -= kz * gz[addrz-k_1]; } fkz *= prod_xy; vk_kz += fkz * dd_k; vj_kz += fkz * dd_j; - double flx = al2 * gx[addrx+l_1]; if (lx > 0) { flx -= lx * gx[addrx-l_1]; } flx *= prod_yz; vk_lx += flx * dd_k; vj_lx += flx * dd_j; - double fly = al2 * gy[addry+l_1]; if (ly > 0) { fly -= ly * gy[addry-l_1]; } fly *= prod_xz; vk_ly += fly * dd_k; vj_ly += fly * dd_j; - double flz = al2 * gz[addrz+l_1]; if (lz > 0) { flz -= lz * gz[addrz-l_1]; } flz *= prod_xy; vk_lz += flz * dd_k; vj_lz += flz * dd_j; + double prod_xy = gx[addrx] * gy[addry] * dd; + double prod_xz = gx[addrx] * gz[addrz] * dd; + double prod_yz = gy[addry] * gz[addrz] * dd; + double fix = ai2 * gx[addrx+i_1]; if (ix > 0) { fix -= ix * gx[addrx-i_1]; } v_ix += fix * prod_yz; + double fiy = ai2 * gy[addry+i_1]; if (iy > 0) { fiy -= iy * gy[addry-i_1]; } v_iy += fiy * prod_xz; + double fiz = ai2 * gz[addrz+i_1]; if (iz > 0) { fiz -= iz * gz[addrz-i_1]; } v_iz += fiz * prod_xy; + double fjx = aj2 * gx[addrx+j_1]; if (jx > 0) { fjx -= jx * gx[addrx-j_1]; } v_jx += fjx * prod_yz; + double fjy = aj2 * gy[addry+j_1]; if (jy > 0) { fjy -= jy * gy[addry-j_1]; } v_jy += fjy * prod_xz; + double fjz = aj2 * gz[addrz+j_1]; if (jz > 0) { fjz -= jz * gz[addrz-j_1]; } v_jz += fjz * prod_xy; + double fkx = ak2 * gx[addrx+k_1]; if (kx > 0) { fkx -= kx * gx[addrx-k_1]; } v_kx += fkx * prod_yz; + double fky = ak2 * gy[addry+k_1]; if (ky > 0) { fky -= ky * gy[addry-k_1]; } v_ky += fky * prod_xz; + double fkz = ak2 * gz[addrz+k_1]; if (kz > 0) { fkz -= kz * gz[addrz-k_1]; } v_kz += fkz * prod_xy; + double flx = al2 * gx[addrx+l_1]; if (lx > 0) { flx -= lx * gx[addrx-l_1]; } v_lx += flx * prod_yz; + double fly = al2 * gy[addry+l_1]; if (ly > 0) { fly -= ly * gy[addry-l_1]; } v_ly += fly * prod_xz; + double flz = al2 * gz[addrz+l_1]; if (lz > 0) { flz -= lz * gz[addrz-l_1]; } v_lz += flz * prod_xy; } } } @@ -834,89 +847,49 @@ static void rys_ejk_ip1_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int la = bas[lsh*BAS_SLOTS+ATOM_OF]; int t_id = sq_id + gout_id * nsq_per_block; int threads = nsq_per_block * gout_stride; - if (vj != NULL) { + double *reduce = rw; + __syncthreads(); + reduce[t_id+0 *threads] = v_ix; + reduce[t_id+1 *threads] = v_iy; + reduce[t_id+2 *threads] = v_iz; + reduce[t_id+3 *threads] = v_jx; + reduce[t_id+4 *threads] = v_jy; + reduce[t_id+5 *threads] = v_jz; + reduce[t_id+6 *threads] = v_kx; + reduce[t_id+7 *threads] = v_ky; + reduce[t_id+8 *threads] = v_kz; + reduce[t_id+9 *threads] = v_lx; + reduce[t_id+10*threads] = v_ly; + reduce[t_id+11*threads] = v_lz; + for (int i = gout_stride/2; i > 0; i >>= 1) { __syncthreads(); - double *reduce = rw; - reduce[t_id+0 *threads] = vj_ix; - reduce[t_id+1 *threads] = vj_iy; - reduce[t_id+2 *threads] = vj_iz; - reduce[t_id+3 *threads] = vj_jx; - reduce[t_id+4 *threads] = vj_jy; - reduce[t_id+5 *threads] = vj_jz; - reduce[t_id+6 *threads] = vj_kx; - reduce[t_id+7 *threads] = vj_ky; - reduce[t_id+8 *threads] = vj_kz; - reduce[t_id+9 *threads] = vj_lx; - reduce[t_id+10*threads] = vj_ly; - reduce[t_id+11*threads] = vj_lz; - for (int i = gout_stride/2; i > 0; i >>= 1) { - __syncthreads(); - if (gout_id < i) { + if (gout_id < i) { #pragma unroll - for (int n = 0; n < 12; ++n) { - reduce[n*threads + t_id] += reduce[n*threads + t_id +i*nsq_per_block]; - } + for (int n = 0; n < 12; ++n) { + reduce[n*threads + t_id] += reduce[n*threads + t_id +i*nsq_per_block]; } } - if (gout_id == 0) { - atomicAdd(vj+ia*3+0, reduce[sq_id+0 *threads]); - atomicAdd(vj+ia*3+1, reduce[sq_id+1 *threads]); - atomicAdd(vj+ia*3+2, reduce[sq_id+2 *threads]); - atomicAdd(vj+ja*3+0, reduce[sq_id+3 *threads]); - atomicAdd(vj+ja*3+1, reduce[sq_id+4 *threads]); - atomicAdd(vj+ja*3+2, reduce[sq_id+5 *threads]); - atomicAdd(vj+ka*3+0, reduce[sq_id+6 *threads]); - atomicAdd(vj+ka*3+1, reduce[sq_id+7 *threads]); - atomicAdd(vj+ka*3+2, reduce[sq_id+8 *threads]); - atomicAdd(vj+la*3+0, reduce[sq_id+9 *threads]); - atomicAdd(vj+la*3+1, reduce[sq_id+10*threads]); - atomicAdd(vj+la*3+2, reduce[sq_id+11*threads]); - } } - if (vk != NULL) { - __syncthreads(); - double *reduce = rw; - reduce[t_id+0 *threads] = vk_ix; - reduce[t_id+1 *threads] = vk_iy; - reduce[t_id+2 *threads] = vk_iz; - reduce[t_id+3 *threads] = vk_jx; - reduce[t_id+4 *threads] = vk_jy; - reduce[t_id+5 *threads] = vk_jz; - reduce[t_id+6 *threads] = vk_kx; - reduce[t_id+7 *threads] = vk_ky; - reduce[t_id+8 *threads] = vk_kz; - reduce[t_id+9 *threads] = vk_lx; - reduce[t_id+10*threads] = vk_ly; - reduce[t_id+11*threads] = vk_lz; - for (int i = gout_stride/2; i > 0; i >>= 1) { - __syncthreads(); - if (gout_id < i) { -#pragma unroll - for (int n = 0; n < 12; ++n) { - reduce[n*threads + t_id] += reduce[n*threads + t_id +i*nsq_per_block]; - } - } - } - if (gout_id == 0) { - atomicAdd(vk+ia*3+0, reduce[sq_id+0 *threads]); - atomicAdd(vk+ia*3+1, reduce[sq_id+1 *threads]); - atomicAdd(vk+ia*3+2, reduce[sq_id+2 *threads]); - atomicAdd(vk+ja*3+0, reduce[sq_id+3 *threads]); - atomicAdd(vk+ja*3+1, reduce[sq_id+4 *threads]); - atomicAdd(vk+ja*3+2, reduce[sq_id+5 *threads]); - atomicAdd(vk+ka*3+0, reduce[sq_id+6 *threads]); - atomicAdd(vk+ka*3+1, reduce[sq_id+7 *threads]); - atomicAdd(vk+ka*3+2, reduce[sq_id+8 *threads]); - atomicAdd(vk+la*3+0, reduce[sq_id+9 *threads]); - atomicAdd(vk+la*3+1, reduce[sq_id+10*threads]); - atomicAdd(vk+la*3+2, reduce[sq_id+11*threads]); - } + if (gout_id == 0 && task_id < ntasks) { + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, reduce[sq_id+0 *threads]); + atomicAdd(ejk+ia*3+1, reduce[sq_id+1 *threads]); + atomicAdd(ejk+ia*3+2, reduce[sq_id+2 *threads]); + atomicAdd(ejk+ja*3+0, reduce[sq_id+3 *threads]); + atomicAdd(ejk+ja*3+1, reduce[sq_id+4 *threads]); + atomicAdd(ejk+ja*3+2, reduce[sq_id+5 *threads]); + atomicAdd(ejk+ka*3+0, reduce[sq_id+6 *threads]); + atomicAdd(ejk+ka*3+1, reduce[sq_id+7 *threads]); + atomicAdd(ejk+ka*3+2, reduce[sq_id+8 *threads]); + atomicAdd(ejk+la*3+0, reduce[sq_id+9 *threads]); + atomicAdd(ejk+la*3+1, reduce[sq_id+10*threads]); + atomicAdd(ejk+la*3+2, reduce[sq_id+11*threads]); } } } __global__ -void rys_ejk_ip1_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -932,8 +905,16 @@ void rys_ejk_ip1_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, while (batch_id < nbatches) { int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { rys_ejk_ip1_general(envs, jk, bounds, shl_quartet_idx, ntasks); } diff --git a/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip2.cu b/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip2.cu index 3fee48ac..b648e7e7 100644 --- a/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip2.cu +++ b/gpu4pyscf/lib/gvhf-rys/rys_contract_jk_ip2.cu @@ -7,10 +7,15 @@ #include "vhf.cuh" #include "rys_roots.cu" #include "create_tasks_ip1.cu" +#include "create_tasks_ip2.cu" + +// type 1: (d^2i j | k l) +// type 2: (di dj | k l) +// type 3: (di j | dk l) __device__ -static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *shl_quartet_idx, int ntasks) +static void rys_ejk_ip2_type12_general(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks) { // sq is short for shl_quartet int sq_id = threadIdx.x; @@ -36,6 +41,10 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int stride_k = bounds.stride_k; int stride_l = bounds.stride_l; int g_size = stride_l * (ll + 2); + int g_stride_i = nsq_per_block; + int g_stride_j = stride_j*nsq_per_block; + int g_stride_k = stride_k*nsq_per_block; + int g_stride_l = stride_l*nsq_per_block; int *idx_ij = c_g_pair_idx + c_g_pair_offsets[li*LMAX1+lj]; int *idy_ij = idx_ij + nfij; int *idz_ij = idy_ij + nfij; @@ -47,31 +56,14 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int nbas = envs.nbas; int nao = ao_loc[nbas]; double *env = envs.env; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != NULL; + int do_k = jk.k_factor != NULL; + double *dm = jk.dm; extern __shared__ double rw[]; double *g = rw + nsq_per_block * nroots*2; double *Rpa_cicj = g + nsq_per_block * g_size*3; double Rqc[3], Rpq[3]; - int at1_at2 = gout_id % 16; - int at1 = at1_at2 % 4; - int at2 = at1_at2 / 4; - int nx_at1, ny_at1, nz_at1, stride_at1; - int nx_at2, ny_at2, nz_at2, stride_at2; - switch (at1) { - case 0: stride_at1 = nsq_per_block; break; - case 1: stride_at1 = stride_j*nsq_per_block; break; - case 2: stride_at1 = stride_k*nsq_per_block; break; - case 3: stride_at1 = stride_l*nsq_per_block; break; - } - switch (at2) { - case 0: stride_at2 = nsq_per_block; break; - case 1: stride_at2 = stride_j*nsq_per_block; break; - case 2: stride_at2 = stride_k*nsq_per_block; break; - case 3: stride_at2 = stride_l*nsq_per_block; break; - } for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { __syncthreads(); @@ -110,54 +102,48 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - - int stride_assoc; - double x1x2_assoc, y1y2_assoc, z1z2_assoc; - if (at1 == at2) { - switch (at1) { - case 0: stride_assoc = stride_j*nsq_per_block; - x1x2_assoc = ri[0] - rj[0]; - y1y2_assoc = ri[1] - rj[1]; - z1z2_assoc = ri[2] - rj[2]; - break; - - case 1: stride_assoc = nsq_per_block; - x1x2_assoc = rj[0] - ri[0]; - y1y2_assoc = rj[1] - ri[1]; - z1z2_assoc = rj[2] - ri[2]; - break; - - case 2: stride_assoc = stride_l*nsq_per_block; - x1x2_assoc = rk[0] - rl[0]; - y1y2_assoc = rk[1] - rl[1]; - z1z2_assoc = rk[2] - rl[2]; - break; - - case 3: stride_assoc = stride_k*nsq_per_block; - x1x2_assoc = rl[0] - rk[0]; - y1y2_assoc = rl[1] - rk[1]; - z1z2_assoc = rl[2] - rk[2]; - break; - } - } - double vj_xx = 0; - double vj_xy = 0; - double vj_xz = 0; - double vj_yx = 0; - double vj_yy = 0; - double vj_yz = 0; - double vj_zx = 0; - double vj_zy = 0; - double vj_zz = 0; - double vk_xx = 0; - double vk_xy = 0; - double vk_xz = 0; - double vk_yx = 0; - double vk_yy = 0; - double vk_yz = 0; - double vk_zx = 0; - double vk_zy = 0; - double vk_zz = 0; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; for (int ij = gout_id; ij < iprim*jprim; ij += gout_stride) { int ip = ij / jprim; int jp = ij % jprim; @@ -165,15 +151,15 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double aj = expj[jp]; double aij = ai + aj; double aj_aij = aj / aij; - double xjxi = rj[0] - ri[0]; - double yjyi = rj[1] - ri[1]; - double zjzi = rj[2] - ri[2]; + double xixj = ri[0] - rj[0]; + double yiyj = ri[1] - rj[1]; + double zizj = ri[2] - rj[2]; double *Rpa = Rpa_cicj + ij*4*nsq_per_block; - Rpa[sq_id+0*nsq_per_block] = xjxi * aj_aij; - Rpa[sq_id+1*nsq_per_block] = yjyi * aj_aij; - Rpa[sq_id+2*nsq_per_block] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; - double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sq_id+0*nsq_per_block] = xixj * -aj_aij; + Rpa[sq_id+1*nsq_per_block] = yiyj * -aj_aij; + Rpa[sq_id+2*nsq_per_block] = zizj * -aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xixj*xixj+yiyj*yiyj+zizj*zizj)); Rpa[sq_id+3*nsq_per_block] = fac_sym * ci[ip] * cj[jp] * Kab; } for (int klp = 0; klp < kprim*lprim; ++klp) { @@ -181,20 +167,20 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int lp = klp % lprim; double ak = expk[kp]; double al = expl[lp]; - double akl = ak + al; double ak2 = ak * 2; double al2 = al * 2; + double akl = ak + al; double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - Rqc[0] = xlxk * al_akl; // (ak*xk+al*xl)/akl - Rqc[1] = ylyk * al_akl; - Rqc[2] = zlzk * al_akl; + double xkxl = rk[0] - rl[0]; + double ykyl = rk[1] - rl[1]; + double zkzl = rk[2] - rl[2]; + Rqc[0] = xkxl * -al_akl; // (ak*xk+al*xl)/akl + Rqc[1] = ykyl * -al_akl; + Rqc[2] = zkzl * -al_akl; __syncthreads(); if (gout_id == 0) { - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xkxl*xkxl+ykyl*ykyl+zkzl*zkzl)); double ckcl = ck[kp] * cl[lp] * Kcd; g[sq_id] = ckcl; } @@ -230,7 +216,7 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double theta_rr = theta * rr; if (omega == 0) { rys_roots(nroots, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(nroots, theta_fac*theta_rr, rw); __syncthreads(); @@ -239,19 +225,17 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; } - } - double a2_at1, a2_at2; - switch (at1) { - case 0: a2_at1 = ai2; break; - case 1: a2_at1 = aj2; break; - case 2: a2_at1 = ak2; break; - case 3: a2_at1 = al2; break; - } - switch (at2) { - case 0: a2_at2 = ai2; break; - case 1: a2_at2 = aj2; break; - case 2: a2_at2 = ak2; break; - case 3: a2_at2 = al2; break; + } else { + int _nroots = nroots/2; + rys_roots(_nroots, theta_rr, rw+nroots*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(_nroots, theta_fac*theta_rr, rw); + __syncthreads(); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = gout_id; irys < _nroots; irys+=gout_stride) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } double s0x, s1x, s2x; for (int irys = 0; irys < nroots; ++irys) { @@ -367,7 +351,7 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun double *gx = g; double *gy = gx + nsq_per_block * g_size; double *gz = gy + nsq_per_block * g_size; - for (int n = gout_id/16; n < nfij*nfkl; n+=gout_stride/16) { + for (int n = gout_id; n < nfij*nfkl; n+=gout_stride) { int kl = n / nfij; int ij = n % nfij; if (kl >= nfkl) break; @@ -389,82 +373,630 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int ly = kly / (lk + 1); int kz = klz % (lk + 1); int lz = klz / (lk + 1); - switch (at1) { - case 0: nx_at1 = ix; ny_at1 = iy; nz_at1 = iz; break; - case 1: nx_at1 = jx; ny_at1 = jy; nz_at1 = jz; break; - case 2: nx_at1 = kx; ny_at1 = ky; nz_at1 = kz; break; - case 3: nx_at1 = lx; ny_at1 = ly; nz_at1 = lz; break; + int i = ij % nfi; + int j = ij / nfi; + int k = kl % nfk; + int l = kl / nfk; + int _i = i + i0; + int _j = j + j0; + int _k = k + k0; + int _l = l + l0; + double dd = 0.; + if (do_k) { + int _jl = _j*nao+_l; + int _jk = _j*nao+_k; + int _il = _i*nao+_l; + int _ik = _i*nao+_k; + dd = dm[_jk] * dm[_il]; + dd += dm[_jl] * dm[_ik]; + if (jk.n_dm > 1) { + int nao2 = nao * nao; + dd += dm[nao2+_jk] * dm[nao2+_il]; + dd += dm[nao2+_jl] * dm[nao2+_ik]; + } + dd *= jk.k_factor; } - switch (at2) { - case 0: nx_at2 = ix; ny_at2 = iy; nz_at2 = iz; break; - case 1: nx_at2 = jx; ny_at2 = jy; nz_at2 = jz; break; - case 2: nx_at2 = kx; ny_at2 = ky; nz_at2 = kz; break; - case 3: nx_at2 = lx; ny_at2 = ly; nz_at2 = lz; break; + if (do_j) { + int _ji = _j*nao+_i; + int _lk = _l*nao+_k; + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[_ji] * dm[_lk]; + } else { + int nao2 = nao * nao; + dd += jk.j_factor * (dm[_ji] + dm[nao2+_ji]) * (dm[_lk] + dm[nao2+_lk]); + } } int addrx = sq_id + (ix + jx*stride_j + kx*stride_k + lx*stride_l) * nsq_per_block; int addry = sq_id + (iy + jy*stride_j + ky*stride_k + ly*stride_l) * nsq_per_block; int addrz = sq_id + (iz + jz*stride_j + kz*stride_k + lz*stride_l) * nsq_per_block; - double g1x = a2_at1 * gx[addrx+stride_at1]; - double g1y = a2_at1 * gy[addry+stride_at1]; - double g1z = a2_at1 * gz[addrz+stride_at1]; - if (nx_at1 > 0) { g1x -= nx_at1 * gx[addrx-stride_at1]; } - if (ny_at1 > 0) { g1y -= ny_at1 * gy[addry-stride_at1]; } - if (nz_at1 > 0) { g1z -= nz_at1 * gz[addrz-stride_at1]; } - - double g2x = a2_at2 * gx[addrx+stride_at2]; - double g2y = a2_at2 * gy[addry+stride_at2]; - double g2z = a2_at2 * gz[addrz+stride_at2]; - if (nx_at2 > 0) { g2x -= nx_at2 * gx[addrx-stride_at2]; } - if (ny_at2 > 0) { g2y -= ny_at2 * gy[addry-stride_at2]; } - if (nz_at2 > 0) { g2z -= nz_at2 * gz[addrz-stride_at2]; } + double Ix = gx[addrx] * dd; + double Iy = gy[addry] * dd; + double Iz = gz[addrz] * dd; + double prod_yz = gy[addry] * Iz; + double prod_xz = gx[addrx] * Iz; + double prod_xy = gx[addrx] * Iy; + double g1x, g1y, g1z; + double g2x, g2y, g2z; double g3x, g3y, g3z; - if (at1 == at2) { - double _gx_inc2 = gx[addrx+stride_at1+stride_assoc] - gx[addrx+stride_at1] * x1x2_assoc; - double _gy_inc2 = gy[addry+stride_at1+stride_assoc] - gy[addry+stride_at1] * y1y2_assoc; - double _gz_inc2 = gz[addrz+stride_at1+stride_assoc] - gz[addrz+stride_at1] * z1z2_assoc; - g3x = a2_at1 * (a2_at1 * _gx_inc2 - (2*nx_at1+1) * gx[addrx]); - g3y = a2_at1 * (a2_at1 * _gy_inc2 - (2*ny_at1+1) * gy[addry]); - g3z = a2_at1 * (a2_at1 * _gz_inc2 - (2*nz_at1+1) * gz[addrz]); - if (nx_at1 > 1) { g3x += nx_at1*(nx_at1-1) * gx[addrx-stride_at1*2]; } - if (ny_at1 > 1) { g3y += ny_at1*(ny_at1-1) * gy[addry-stride_at1*2]; } - if (nz_at1 > 1) { g3z += nz_at1*(nz_at1-1) * gz[addrz-stride_at1*2]; } - } else { - g3x = a2_at1 * gx[addrx+stride_at1+stride_at2]; - g3y = a2_at1 * gy[addry+stride_at1+stride_at2]; - g3z = a2_at1 * gz[addrz+stride_at1+stride_at2]; - if (nx_at1 > 0) { g3x -= nx_at1 * gx[addrx-stride_at1+stride_at2]; } - if (ny_at1 > 0) { g3y -= ny_at1 * gy[addry-stride_at1+stride_at2]; } - if (nz_at1 > 0) { g3z -= nz_at1 * gz[addrz-stride_at1+stride_at2]; } - g3x *= a2_at2; - g3y *= a2_at2; - g3z *= a2_at2; - - if (nx_at2 > 0) { - double fx = a2_at1 * gx[addrx+stride_at1-stride_at2]; - if (nx_at1 > 0) { fx -= nx_at1 * gx[addrx-stride_at1-stride_at2]; } - g3x -= nx_at2 * fx; + double _gx_inc2, _gy_inc2, _gz_inc2; + g1x = aj2 * gx[addrx+g_stride_j]; + g1y = aj2 * gy[addry+g_stride_j]; + g1z = aj2 * gz[addrz+g_stride_j]; + if (jx > 0) { g1x -= jx * gx[addrx-g_stride_j]; } + if (jy > 0) { g1y -= jy * gy[addry-g_stride_j]; } + if (jz > 0) { g1z -= jz * gz[addrz-g_stride_j]; } + + g2x = ai2 * gx[addrx+g_stride_i]; + g2y = ai2 * gy[addry+g_stride_i]; + g2z = ai2 * gz[addrz+g_stride_i]; + if (ix > 0) { g2x -= ix * gx[addrx-g_stride_i]; } + if (iy > 0) { g2y -= iy * gy[addry-g_stride_i]; } + if (iz > 0) { g2z -= iz * gz[addrz-g_stride_i]; } + + g3x = ai2 * gx[addrx+g_stride_i+g_stride_j]; + g3y = ai2 * gy[addry+g_stride_i+g_stride_j]; + g3z = ai2 * gz[addrz+g_stride_i+g_stride_j]; + if (ix > 0) { g3x -= ix * gx[addrx-g_stride_i+g_stride_j]; } + if (iy > 0) { g3y -= iy * gy[addry-g_stride_i+g_stride_j]; } + if (iz > 0) { g3z -= iz * gz[addrz-g_stride_i+g_stride_j]; } + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + if (jx > 0) { + double fx = ai2 * gx[addrx+g_stride_i-g_stride_j]; + if (ix > 0) { fx -= ix * gx[addrx-g_stride_i-g_stride_j]; } + g3x -= jx * fx; + } + if (jy > 0) { + double fy = ai2 * gy[addry+g_stride_i-g_stride_j]; + if (iy > 0) { fy -= iy * gy[addry-g_stride_i-g_stride_j]; } + g3y -= jy * fy; + } + if (jz > 0) { + double fz = ai2 * gz[addrz+g_stride_i-g_stride_j]; + if (iz > 0) { fz -= iz * gz[addrz-g_stride_i-g_stride_j]; } + g3z -= jz * fz; + } + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + + double xixj = ri[0] - rj[0]; + double yiyj = ri[1] - rj[1]; + double zizj = ri[2] - rj[2]; + _gx_inc2 = gx[addrx+g_stride_i+g_stride_j] + gx[addrx+g_stride_j] * xixj; + _gy_inc2 = gy[addry+g_stride_i+g_stride_j] + gy[addry+g_stride_j] * yiyj; + _gz_inc2 = gz[addrz+g_stride_i+g_stride_j] + gz[addrz+g_stride_j] * zizj; + g3x = aj2 * (aj2 * _gx_inc2 - (2*jx+1) * gx[addrx]); + g3y = aj2 * (aj2 * _gy_inc2 - (2*jy+1) * gy[addry]); + g3z = aj2 * (aj2 * _gz_inc2 - (2*jz+1) * gz[addrz]); + if (jx > 1) { g3x += jx*(jx-1) * gx[addrx-g_stride_j*2]; } + if (jy > 1) { g3y += jy*(jy-1) * gy[addry-g_stride_j*2]; } + if (jz > 1) { g3z += jz*(jz-1) * gz[addrz-g_stride_j*2]; } + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + + _gx_inc2 = gx[addrx+g_stride_i+g_stride_j] - gx[addrx+g_stride_i] * xixj; + _gy_inc2 = gy[addry+g_stride_i+g_stride_j] - gy[addry+g_stride_i] * yiyj; + _gz_inc2 = gz[addrz+g_stride_i+g_stride_j] - gz[addrz+g_stride_i] * zizj; + g3x = ai2 * (ai2 * _gx_inc2 - (2*ix+1) * gx[addrx]); + g3y = ai2 * (ai2 * _gy_inc2 - (2*iy+1) * gy[addry]); + g3z = ai2 * (ai2 * _gz_inc2 - (2*iz+1) * gz[addrz]); + if (ix > 1) { g3x += ix*(ix-1) * gx[addrx-g_stride_i*2]; } + if (iy > 1) { g3y += iy*(iy-1) * gy[addry-g_stride_i*2]; } + if (iz > 1) { g3z += iz*(iz-1) * gz[addrz-g_stride_i*2]; } + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + + g1x = al2 * gx[addrx+g_stride_l]; + g1y = al2 * gy[addry+g_stride_l]; + g1z = al2 * gz[addrz+g_stride_l]; + if (lx > 0) { g1x -= lx * gx[addrx-g_stride_l]; } + if (ly > 0) { g1y -= ly * gy[addry-g_stride_l]; } + if (lz > 0) { g1z -= lz * gz[addrz-g_stride_l]; } + + g2x = ak2 * gx[addrx+g_stride_k]; + g2y = ak2 * gy[addry+g_stride_k]; + g2z = ak2 * gz[addrz+g_stride_k]; + if (kx > 0) { g2x -= kx * gx[addrx-g_stride_k]; } + if (ky > 0) { g2y -= ky * gy[addry-g_stride_k]; } + if (kz > 0) { g2z -= kz * gz[addrz-g_stride_k]; } + + g3x = ak2 * gx[addrx+g_stride_k+g_stride_l]; + g3y = ak2 * gy[addry+g_stride_k+g_stride_l]; + g3z = ak2 * gz[addrz+g_stride_k+g_stride_l]; + if (kx > 0) { g3x -= kx * gx[addrx-g_stride_k+g_stride_l]; } + if (ky > 0) { g3y -= ky * gy[addry-g_stride_k+g_stride_l]; } + if (kz > 0) { g3z -= kz * gz[addrz-g_stride_k+g_stride_l]; } + g3x *= al2; + g3y *= al2; + g3z *= al2; + if (lx > 0) { + double fx = ak2 * gx[addrx+g_stride_k-g_stride_l]; + if (kx > 0) { fx -= kx * gx[addrx-g_stride_k-g_stride_l]; } + g3x -= lx * fx; + } + if (ly > 0) { + double fy = ak2 * gy[addry+g_stride_k-g_stride_l]; + if (ky > 0) { fy -= ky * gy[addry-g_stride_k-g_stride_l]; } + g3y -= ly * fy; + } + if (lz > 0) { + double fz = ak2 * gz[addrz+g_stride_k-g_stride_l]; + if (kz > 0) { fz -= kz * gz[addrz-g_stride_k-g_stride_l]; } + g3z -= lz * fz; + } + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + + double xkxl = rk[0] - rl[0]; + double ykyl = rk[1] - rl[1]; + double zkzl = rk[2] - rl[2]; + _gx_inc2 = gx[addrx+g_stride_k+g_stride_l] + gx[addrx+g_stride_l] * xkxl; + _gy_inc2 = gy[addry+g_stride_k+g_stride_l] + gy[addry+g_stride_l] * ykyl; + _gz_inc2 = gz[addrz+g_stride_k+g_stride_l] + gz[addrz+g_stride_l] * zkzl; + g3x = al2 * (al2 * _gx_inc2 - (2*lx+1) * gx[addrx]); + g3y = al2 * (al2 * _gy_inc2 - (2*ly+1) * gy[addry]); + g3z = al2 * (al2 * _gz_inc2 - (2*lz+1) * gz[addrz]); + if (lx > 1) { g3x += lx*(lx-1) * gx[addrx-g_stride_l*2]; } + if (ly > 1) { g3y += ly*(ly-1) * gy[addry-g_stride_l*2]; } + if (lz > 1) { g3z += lz*(lz-1) * gz[addrz-g_stride_l*2]; } + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + + _gx_inc2 = gx[addrx+g_stride_k+g_stride_l] - gx[addrx+g_stride_k] * xkxl; + _gy_inc2 = gy[addry+g_stride_k+g_stride_l] - gy[addry+g_stride_k] * ykyl; + _gz_inc2 = gz[addrz+g_stride_k+g_stride_l] - gz[addrz+g_stride_k] * zkzl; + g3x = ak2 * (ak2 * _gx_inc2 - (2*kx+1) * gx[addrx]); + g3y = ak2 * (ak2 * _gy_inc2 - (2*ky+1) * gy[addry]); + g3z = ak2 * (ak2 * _gz_inc2 - (2*kz+1) * gz[addrz]); + if (kx > 1) { g3x += kx*(kx-1) * gx[addrx-g_stride_k*2]; } + if (ky > 1) { g3y += ky*(ky-1) * gy[addry-g_stride_k*2]; } + if (kz > 1) { g3z += kz*(kz-1) * gz[addrz-g_stride_k*2]; } + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} + +__device__ +static void rys_ejk_ip2_type3_general(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks) +{ + // sq is short for shl_quartet + int sq_id = threadIdx.x; + int nsq_per_block = blockDim.x; + int gout_id = threadIdx.y; + int gout_stride = blockDim.y; + int li = bounds.li; + int lj = bounds.lj; + int lk = bounds.lk; + int ll = bounds.ll; + int nfi = bounds.nfi; + int nfk = bounds.nfk; + int nfij = bounds.nfij; + int nfkl = bounds.nfkl; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int lij = li + lj + 2; + int lkl = lk + ll + 2; + int nroots = bounds.nroots; + int stride_j = bounds.stride_j; + int stride_k = bounds.stride_k; + int stride_l = bounds.stride_l; + int g_stride_i = nsq_per_block; + int g_stride_j = stride_j*nsq_per_block; + int g_stride_k = stride_k*nsq_per_block; + int g_stride_l = stride_l*nsq_per_block; + int g_size = stride_l * (ll + 2); + int *idx_ij = c_g_pair_idx + c_g_pair_offsets[li*LMAX1+lj]; + int *idy_ij = idx_ij + nfij; + int *idz_ij = idy_ij + nfij; + int *idx_kl = c_g_pair_idx + c_g_pair_offsets[lk*LMAX1+ll]; + int *idy_kl = idx_kl + nfkl; + int *idz_kl = idy_kl + nfkl; + int *bas = envs.bas; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != NULL; + int do_k = jk.k_factor != NULL; + double *dm = jk.dm; + extern __shared__ double rw[]; + double *g = rw + nsq_per_block * nroots*2; + double *Rpa_cicj = g + nsq_per_block * g_size*3; + double Rqc[3], Rpq[3]; + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + //int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + for (int ij = gout_id; ij < iprim*jprim; ij += gout_stride) { + int ip = ij / jprim; + int jp = ij % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xixj = ri[0] - rj[0]; + double yiyj = ri[1] - rj[1]; + double zizj = ri[2] - rj[2]; + double *Rpa = Rpa_cicj + ij*4*nsq_per_block; + Rpa[sq_id+0*nsq_per_block] = xixj * -aj_aij; + Rpa[sq_id+1*nsq_per_block] = yiyj * -aj_aij; + Rpa[sq_id+2*nsq_per_block] = zizj * -aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xixj*xixj+yiyj*yiyj+zizj*zizj)); + Rpa[sq_id+3*nsq_per_block] = fac_sym * ci[ip] * cj[jp] * Kab; + } + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double akl = ak + al; + double ak2 = ak * 2; + double al2 = al * 2; + double al_akl = al / akl; + double xkxl = rk[0] - rl[0]; + double ykyl = rk[1] - rl[1]; + double zkzl = rk[2] - rl[2]; + Rqc[0] = xkxl * -al_akl; // (ak*xk+al*xl)/akl + Rqc[1] = ykyl * -al_akl; + Rqc[2] = zkzl * -al_akl; + __syncthreads(); + if (gout_id == 0) { + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xkxl*xkxl+ykyl*ykyl+zkzl*zkzl)); + double ckcl = ck[kp] * cl[lp] * Kcd; + g[sq_id] = ckcl; + } + int ijprim = iprim * jprim; + for (int ijp = 0; ijp < ijprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp*4*nsq_per_block; + double xij = ri[0] + Rpa[sq_id+0*nsq_per_block]; + double yij = ri[1] + Rpa[sq_id+1*nsq_per_block]; + double zij = ri[2] + Rpa[sq_id+2*nsq_per_block]; + double xkl = rk[0] + Rqc[0]; + double ykl = rk[1] + Rqc[1]; + double zkl = rk[2] + Rqc[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + Rpq[0] = xpq; + Rpq[1] = ypq; + Rpq[2] = zpq; + __syncthreads(); + if (gout_id == 0) { + double cicj = Rpa[sq_id+3*nsq_per_block]; + g[sq_id + g_size * nsq_per_block] = cicj / (aij*akl*sqrt(aij+akl)); + } + double rr = xpq*xpq + ypq*ypq + zpq*zpq; + double theta = aij * akl / (aij + akl); + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(nroots, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(nroots, theta_fac*theta_rr, rw); + __syncthreads(); + double sqrt_theta_fac = sqrt(theta_fac); + for (int irys = gout_id; irys < nroots; irys+=gout_stride) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } else { + int _nroots = nroots/2; + rys_roots(_nroots, theta_rr, rw+nroots*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(_nroots, theta_fac*theta_rr, rw); + __syncthreads(); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = gout_id; irys < _nroots; irys+=gout_stride) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + double s0x, s1x, s2x; + for (int irys = 0; irys < nroots; ++irys) { + __syncthreads(); + if (gout_id == 0) { + g[sq_id + 2*g_size*nsq_per_block] = rw[sq_id+(irys*2+1)*nsq_per_block]; + } + double rt = rw[sq_id + irys*2*nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double rt_akl = rt_aa * aij; + double b00 = .5 * rt_aa; + double b10 = .5/aij * (1 - rt_aij); + double b01 = .5/akl * (1 - rt_akl); + + __syncthreads(); + // gx(0,n+1) = c0*gx(0,n) + n*b10*gx(0,n-1) + for (int n = gout_id; n < 3; n += gout_stride) { + double *_gx = g + n * g_size * nsq_per_block; + int ir = sq_id + n * nsq_per_block; + double c0x = Rpa[ir] - rt_aij * Rpq[n]; + s0x = _gx[sq_id]; + s1x = c0x * s0x; + _gx[sq_id + nsq_per_block] = s1x; + for (int i = 1; i < lij; ++i) { + s2x = c0x * s1x + i * b10 * s0x; + _gx[sq_id + (i+1)*nsq_per_block] = s2x; + s0x = s1x; + s1x = s2x; + } + } + + int lij3 = (lij+1)*3; + for (int n = gout_id; n < lij3+gout_id; n += gout_stride) { + __syncthreads(); + int i = n / 3; //for i in range(lij+1): + int _ix = n % 3; + double *_gx = g + (i + _ix * g_size) * nsq_per_block; + double cpx = Rqc[_ix] + rt_akl * Rpq[_ix]; + //for i in range(lij+1): + // trr(i,1) = c0p * trr(i,0) + i*b00 * trr(i-1,0) + if (n < lij3) { + s0x = _gx[sq_id]; + s1x = cpx * s0x; + if (i > 0) { + s1x += i * b00 * _gx[sq_id-nsq_per_block]; } - if (ny_at2 > 0) { - double fy = a2_at1 * gy[addry+stride_at1-stride_at2]; - if (ny_at1 > 0) { fy -= ny_at1 * gy[addry-stride_at1-stride_at2]; } - g3y -= ny_at2 * fy; + _gx[sq_id + stride_k*nsq_per_block] = s1x; + } + + //for k in range(1, lkl): + // for i in range(lij+1): + // trr(i,k+1) = cp * trr(i,k) + k*b01 * trr(i,k-1) + i*b00 * trr(i-1,k) + for (int k = 1; k < lkl; ++k) { + __syncthreads(); + if (n < lij3) { + s2x = cpx*s1x + k*b01*s0x; + if (i > 0) { + s2x += i * b00 * _gx[sq_id + (k*stride_k-1)*nsq_per_block]; + } + _gx[sq_id + (k*stride_k+stride_k)*nsq_per_block] = s2x; + s0x = s1x; + s1x = s2x; } - if (nz_at2 > 0) { - double fz = a2_at1 * gz[addrz+stride_at1-stride_at2]; - if (nz_at1 > 0) { fz -= nz_at1 * gz[addrz-stride_at1-stride_at2]; } - g3z -= nz_at2 * fz; + } + } + + // hrr + // g(i,j+1) = rirj * g(i,j) + g(i+1,j) + // g(...,k,l+1) = rkrl * g(...,k,l) + g(...,k+1,l) + __syncthreads(); + if (task_id < ntasks) { + int lkl3 = (lkl+1)*3; + for (int m = gout_id; m < lkl3; m += gout_stride) { + int k = m / 3; + int _ix = m % 3; + double xixj = ri[_ix] - rj[_ix]; + double *_gx = g + (_ix*g_size + k*stride_k) * nsq_per_block; + for (int j = 0; j <= lj; ++j) { + int ij = (lij-j) + j*stride_j; + s1x = _gx[sq_id + ij*nsq_per_block]; + for (--ij; ij >= j*stride_j; --ij) { + s0x = _gx[sq_id + ij*nsq_per_block]; + _gx[sq_id + (ij+stride_j)*nsq_per_block] = xixj * s0x + s1x; + s1x = s0x; + } } } - double gout_xx = g3x * gy[addry] * gz[addrz]; - double gout_yy = g3y * gx[addrx] * gz[addrz]; - double gout_zz = g3z * gx[addrx] * gy[addry]; - double gout_xy = g2x * g1y * gz[addrz]; - double gout_xz = g2x * g1z * gy[addry]; - double gout_yx = g2y * g1x * gz[addrz]; - double gout_yz = g2y * g1z * gx[addrx]; - double gout_zx = g2z * g1x * gy[addry]; - double gout_zy = g2z * g1y * gx[addrx]; + } + __syncthreads(); + if (task_id < ntasks) { + for (int n = gout_id; n < stride_k*3; n += gout_stride) { + int i = n / 3; + int _ix = n % 3; + double xkxl = rk[_ix] - rl[_ix]; + double *_gx = g + (_ix*g_size + i) * nsq_per_block; + for (int l = 0; l <= ll; ++l) { + int kl = (lkl-l)*stride_k + l*stride_l; + s1x = _gx[sq_id + kl*nsq_per_block]; + for (kl-=stride_k; kl >= l*stride_l; kl-=stride_k) { + s0x = _gx[sq_id + kl*nsq_per_block]; + _gx[sq_id + (kl+stride_l)*nsq_per_block] = xkxl * s0x + s1x; + s1x = s0x; + } + } + } + } + + __syncthreads(); + if (task_id >= ntasks) { + continue; + } + double *gx = g; + double *gy = gx + nsq_per_block * g_size; + double *gz = gy + nsq_per_block * g_size; + for (int n = gout_id; n < nfij*nfkl; n+=gout_stride) { + int kl = n / nfij; + int ij = n % nfij; + if (kl >= nfkl) break; + int ijx = idx_ij[ij]; + int ijy = idy_ij[ij]; + int ijz = idz_ij[ij]; + int klx = idx_kl[kl]; + int kly = idy_kl[kl]; + int klz = idz_kl[kl]; + int ix = ijx % (li + 1); + int jx = ijx / (li + 1); + int iy = ijy % (li + 1); + int jy = ijy / (li + 1); + int iz = ijz % (li + 1); + int jz = ijz / (li + 1); + int kx = klx % (lk + 1); + int lx = klx / (lk + 1); + int ky = kly % (lk + 1); + int ly = kly / (lk + 1); + int kz = klz % (lk + 1); + int lz = klz / (lk + 1); int i = ij % nfi; int j = ij / nfi; @@ -474,99 +1006,299 @@ static void rys_ejk_ip2_general(RysIntEnvVars envs, JKMatrix jk, BoundsInfo boun int _j = j + j0; int _k = k + k0; int _l = l + l0; - if (vk != NULL) { + double dd = 0.; + if (do_k) { int _jl = _j*nao+_l; int _jk = _j*nao+_k; int _il = _i*nao+_l; int _ik = _i*nao+_k; - double dd_jk = dm[_jk] * dm[_il]; - double dd_jl = dm[_jl] * dm[_ik]; - double dd = dd_jk + dd_jl; + dd = dm[_jk] * dm[_il]; + dd += dm[_jl] * dm[_ik]; if (jk.n_dm > 1) { int nao2 = nao * nao; - double dd_jk = dm[nao2+_jk] * dm[nao2+_il]; - double dd_jl = dm[nao2+_jl] * dm[nao2+_ik]; - dd += dd_jk + dd_jl; + dd += dm[nao2+_jk] * dm[nao2+_il]; + dd += dm[nao2+_jl] * dm[nao2+_ik]; } - vk_xx += gout_xx * dd; - vk_yy += gout_yy * dd; - vk_zz += gout_zz * dd; - vk_xy += gout_xy * dd; - vk_xz += gout_xz * dd; - vk_yx += gout_yx * dd; - vk_yz += gout_yz * dd; - vk_zx += gout_zx * dd; - vk_zy += gout_zy * dd; + dd *= jk.k_factor; } - if (vj != NULL) { + if (do_j) { int _ji = _j*nao+_i; int _lk = _l*nao+_k; - double dd; if (jk.n_dm == 1) { - dd = dm[_ji] * dm[_lk]; + dd += jk.j_factor * dm[_ji] * dm[_lk]; } else { int nao2 = nao * nao; - dd = (dm[_ji] + dm[nao2+_ji]) * (dm[_lk] + dm[nao2+_lk]); + dd += jk.j_factor * (dm[_ji] + dm[nao2+_ji]) * (dm[_lk] + dm[nao2+_lk]); } - vj_xx += gout_xx * dd; - vj_yy += gout_yy * dd; - vj_zz += gout_zz * dd; - vj_xy += gout_xy * dd; - vj_xz += gout_xz * dd; - vj_yx += gout_yx * dd; - vj_yz += gout_yz * dd; - vj_zx += gout_zx * dd; - vj_zy += gout_zy * dd; } + + int addrx = sq_id + (ix + jx*stride_j + kx*stride_k + lx*stride_l) * nsq_per_block; + int addry = sq_id + (iy + jy*stride_j + ky*stride_k + ly*stride_l) * nsq_per_block; + int addrz = sq_id + (iz + jz*stride_j + kz*stride_k + lz*stride_l) * nsq_per_block; + double Ix = gx[addrx] * dd; + double Iy = gy[addry] * dd; + double Iz = gz[addrz] * dd; + double prod_yz = gy[addry] * Iz; + double prod_xz = gx[addrx] * Iz; + double prod_xy = gx[addrx] * Iy; + + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + gikx = ai2 * gx[addrx+g_stride_i+g_stride_k]; + giky = ai2 * gy[addry+g_stride_i+g_stride_k]; + gikz = ai2 * gz[addrz+g_stride_i+g_stride_k]; + if (ix > 0) { gikx -= ix * gx[addrx-g_stride_i+g_stride_k]; } + if (iy > 0) { giky -= iy * gy[addry-g_stride_i+g_stride_k]; } + if (iz > 0) { gikz -= iz * gz[addrz-g_stride_i+g_stride_k]; } + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + + gjkx = aj2 * gx[addrx+g_stride_j+g_stride_k]; + gjky = aj2 * gy[addry+g_stride_j+g_stride_k]; + gjkz = aj2 * gz[addrz+g_stride_j+g_stride_k]; + if (jx > 0) { gjkx -= jx * gx[addrx-g_stride_j+g_stride_k]; } + if (jy > 0) { gjky -= jy * gy[addry-g_stride_j+g_stride_k]; } + if (jz > 0) { gjkz -= jz * gz[addrz-g_stride_j+g_stride_k]; } + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + + if (kx > 0) { + double fx = ai2 * gx[addrx+g_stride_i-g_stride_k]; + if (ix > 0) { fx -= ix * gx[addrx-g_stride_i-g_stride_k]; } + gikx -= kx * fx; + fx = aj2 * gx[addrx+g_stride_j-g_stride_k]; + if (jx > 0) { fx -= jx * gx[addrx-g_stride_j-g_stride_k]; } + gjkx -= kx * fx; + } + if (ky > 0) { + double fy = ai2 * gy[addry+g_stride_i-g_stride_k]; + if (iy > 0) { fy -= iy * gy[addry-g_stride_i-g_stride_k]; } + giky -= ky * fy; + fy = aj2 * gy[addry+g_stride_j-g_stride_k]; + if (jy > 0) { fy -= jy * gy[addry-g_stride_j-g_stride_k]; } + gjky -= ky * fy; + } + if (kz > 0) { + double fz = ai2 * gz[addrz+g_stride_i-g_stride_k]; + if (iz > 0) { fz -= iz * gz[addrz-g_stride_i-g_stride_k]; } + gikz -= kz * fz; + fz = aj2 * gz[addrz+g_stride_j-g_stride_k]; + if (jz > 0) { fz -= jz * gz[addrz-g_stride_j-g_stride_k]; } + gjkz -= kz * fz; + } + + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + + gilx = ai2 * gx[addrx+g_stride_i+g_stride_l]; + gily = ai2 * gy[addry+g_stride_i+g_stride_l]; + gilz = ai2 * gz[addrz+g_stride_i+g_stride_l]; + if (ix > 0) { gilx -= ix * gx[addrx-g_stride_i+g_stride_l]; } + if (iy > 0) { gily -= iy * gy[addry-g_stride_i+g_stride_l]; } + if (iz > 0) { gilz -= iz * gz[addrz-g_stride_i+g_stride_l]; } + gilx *= al2; + gily *= al2; + gilz *= al2; + + gjlx = aj2 * gx[addrx+g_stride_j+g_stride_l]; + gjly = aj2 * gy[addry+g_stride_j+g_stride_l]; + gjlz = aj2 * gz[addrz+g_stride_j+g_stride_l]; + if (jx > 0) { gjlx -= jx * gx[addrx-g_stride_j+g_stride_l]; } + if (jy > 0) { gjly -= jy * gy[addry-g_stride_j+g_stride_l]; } + if (jz > 0) { gjlz -= jz * gz[addrz-g_stride_j+g_stride_l]; } + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + + if (lx > 0) { + double fx = ai2 * gx[addrx+g_stride_i-g_stride_l]; + if (ix > 0) { fx -= ix * gx[addrx-g_stride_i-g_stride_l]; } + gilx -= lx * fx; + fx = aj2 * gx[addrx+g_stride_j-g_stride_l]; + if (jx > 0) { fx -= jx * gx[addrx-g_stride_j-g_stride_l]; } + gjlx -= lx * fx; + } + if (ly > 0) { + double fy = ai2 * gy[addry+g_stride_i-g_stride_l]; + if (iy > 0) { fy -= iy * gy[addry-g_stride_i-g_stride_l]; } + gily -= ly * fy; + fy = aj2 * gy[addry+g_stride_j-g_stride_l]; + if (jy > 0) { fy -= jy * gy[addry-g_stride_j-g_stride_l]; } + gjly -= ly * fy; + } + if (lz > 0) { + double fz = ai2 * gz[addrz+g_stride_i-g_stride_l]; + if (iz > 0) { fz -= iz * gz[addrz-g_stride_i-g_stride_l]; } + gilz -= lz * fz; + fz = aj2 * gz[addrz+g_stride_j-g_stride_l]; + if (jz > 0) { fz -= jz * gz[addrz-g_stride_j-g_stride_l]; } + gjlz -= lz * fz; + } + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + + gix = ai2 * gx[addrx+g_stride_i]; + giy = ai2 * gy[addry+g_stride_i]; + giz = ai2 * gz[addrz+g_stride_i]; + if (ix > 0) { gix -= ix * gx[addrx-g_stride_i]; } + if (iy > 0) { giy -= iy * gy[addry-g_stride_i]; } + if (iz > 0) { giz -= iz * gz[addrz-g_stride_i]; } + + gjx = aj2 * gx[addrx+g_stride_j]; + gjy = aj2 * gy[addry+g_stride_j]; + gjz = aj2 * gz[addrz+g_stride_j]; + if (jx > 0) { gjx -= jx * gx[addrx-g_stride_j]; } + if (jy > 0) { gjy -= jy * gy[addry-g_stride_j]; } + if (jz > 0) { gjz -= jz * gz[addrz-g_stride_j]; } + + gkx = ak2 * gx[addrx+g_stride_k]; + gky = ak2 * gy[addry+g_stride_k]; + gkz = ak2 * gz[addrz+g_stride_k]; + if (kx > 0) { gkx -= kx * gx[addrx-g_stride_k]; } + if (ky > 0) { gky -= ky * gy[addry-g_stride_k]; } + if (kz > 0) { gkz -= kz * gz[addrz-g_stride_k]; } + + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + + glx = al2 * gx[addrx+g_stride_l]; + gly = al2 * gy[addry+g_stride_l]; + glz = al2 * gz[addrz+g_stride_l]; + if (lx > 0) { glx -= lx * gx[addrx-g_stride_l]; } + if (ly > 0) { gly -= ly * gy[addry-g_stride_l]; } + if (lz > 0) { glz -= lz * gz[addrz-g_stride_l]; } + + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - switch (at1) { - case 0: nx_at1 = ia; break; - case 1: nx_at1 = ja; break; - case 2: nx_at1 = ka; break; - case 3: nx_at1 = la; break; - } - switch (at2) { - case 0: nx_at2 = ia; break; - case 1: nx_at2 = ja; break; - case 2: nx_at2 = ka; break; - case 3: nx_at2 = la; break; - } int natm = envs.natm; - if (vk != NULL) { - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 0, vk_xx); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 1, vk_xy); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 2, vk_xz); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 3, vk_yx); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 4, vk_yy); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 5, vk_yz); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 6, vk_zx); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 7, vk_zy); - atomicAdd(vk + (nx_at2*natm+nx_at1)*9 + 8, vk_zz); + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} + +__global__ +void rys_ejk_ip2_type12_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.y * blockDim.x + threadIdx.x; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + rys_ejk_ip2_type12_general(envs, jk, bounds, shl_quartet_idx, ntasks); } - if (vj != NULL) { - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 0, vj_xx); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 1, vj_xy); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 2, vj_xz); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 3, vj_yx); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 4, vj_yy); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 5, vj_yz); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 6, vj_zx); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 7, vj_zy); - atomicAdd(vj + (nx_at2*natm+nx_at1)*9 + 8, vj_zz); + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); } + __syncthreads(); } } __global__ -void rys_ejk_ip2_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *pool, uint32_t *batch_head) +void rys_ejk_ip2_type3_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; int t_id = threadIdx.y * blockDim.x + threadIdx.x; @@ -581,10 +1313,18 @@ void rys_ejk_ip2_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, while (batch_id < nbatches) { int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { - rys_ejk_ip2_general(envs, jk, bounds, shl_quartet_idx, ntasks); + rys_ejk_ip2_type3_general(envs, jk, bounds, shl_quartet_idx, ntasks); } if (t_id == 0) { batch_id = atomicAdd(batch_head, 1); diff --git a/gpu4pyscf/lib/gvhf-rys/rys_jk_driver.cu b/gpu4pyscf/lib/gvhf-rys/rys_jk_driver.cu index e70e9b7a..57293be0 100644 --- a/gpu4pyscf/lib/gvhf-rys/rys_jk_driver.cu +++ b/gpu4pyscf/lib/gvhf-rys/rys_jk_driver.cu @@ -25,9 +25,11 @@ extern __global__ void rys_sr_jk_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsI ShellQuartet *pool, uint32_t *batch_head); extern __global__ void rys_jk_ip1_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head); -extern __global__ void rys_ejk_ip1_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +extern __global__ void rys_ejk_ip1_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head); -extern __global__ void rys_ejk_ip2_kernel(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +extern __global__ void rys_ejk_ip2_type12_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head); +extern __global__ void rys_ejk_ip2_type3_kernel(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head); extern int rys_j_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, ShellQuartet *pool, uint32_t *batch_head, @@ -41,11 +43,13 @@ extern int rys_sr_jk_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bou extern int os_jk_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers, double omega); -extern int rys_ejk_ip1_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, - ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers); extern int rys_vjk_ip1_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers); -extern int rys_ejk_ip2_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, +extern int rys_ejk_ip1_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, + ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers); +extern int rys_ejk_ip2_type12_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, + ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers); +extern int rys_ejk_ip2_type3_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers); extern "C" { @@ -247,13 +251,14 @@ int RYS_build_jk_ip1(double *vj, double *vk, double *dm, int n_dm, int nao, int } cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { - fprintf(stderr, "CUDA Error in RYS_build_jk: %s\n", cudaGetErrorString(err)); + fprintf(stderr, "CUDA Error in RYS_build_jk_ip1: %s\n", cudaGetErrorString(err)); return 1; } return 0; } -int RYS_per_atom_jk_ip1(double *vj, double *vk, double *dm, int n_dm, int nao, +int RYS_per_atom_jk_ip1(double *ejk, double j_factor, double k_factor, + double *dm, int n_dm, int nao, RysIntEnvVars envs, int *scheme, int *shls_slice, int ntile_ij_pairs, int ntile_kl_pairs, int *tile_ij_mapping, int *tile_kl_mapping, float *tile_q_cond, @@ -294,7 +299,12 @@ int RYS_per_atom_jk_ip1(double *vj, double *vk, double *dm, int n_dm, int nao, ntile_ij_pairs, ntile_kl_pairs, tile_ij_mapping, tile_kl_mapping, q_cond, dm_cond, cutoff}; - JKMatrix jk = {vj, vk, dm, (uint16_t)n_dm}; + if (n_dm == 1) { // RHF + k_factor *= .5; + } + // *4 for the symmetry (i,j) = (j,i), (k,l) = (l,k) in J contraction + // Additional factor 1/2 from the two-electron Coulomb operator + JKEnergy jk = {ejk, dm, 2.*j_factor, -k_factor, (uint16_t)n_dm}; cudaMemset(batch_head, 0, 2*sizeof(int)); if (!rys_ejk_ip1_unrolled(&envs, &jk, &bounds, pool, batch_head, scheme, workers)) { @@ -303,17 +313,19 @@ int RYS_per_atom_jk_ip1(double *vj, double *vk, double *dm, int n_dm, int nao, int ij_prims = iprim * jprim; dim3 threads(quartets_per_block, gout_stride); int buflen = (nroots*2 + g_size*3 + ij_prims*4) * quartets_per_block; + buflen = MAX(buflen, 9*gout_stride*quartets_per_block); rys_ejk_ip1_kernel<<>>(envs, jk, bounds, pool, batch_head); } cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { - fprintf(stderr, "CUDA Error in RYS_build_jk: %s\n", cudaGetErrorString(err)); + fprintf(stderr, "CUDA Error in RYS_ejk_ip1: %s\n", cudaGetErrorString(err)); return 1; } return 0; } -int RYS_per_atom_jk_ip2(double *vj, double *vk, double *dm, int n_dm, int nao, +int RYS_per_atom_jk_ip2_type12(double *ejk, double j_factor, double k_factor, + double *dm, int n_dm, int nao, RysIntEnvVars envs, int *scheme, int *shls_slice, int ntile_ij_pairs, int ntile_kl_pairs, int *tile_ij_mapping, int *tile_kl_mapping, float *tile_q_cond, @@ -354,20 +366,92 @@ int RYS_per_atom_jk_ip2(double *vj, double *vk, double *dm, int n_dm, int nao, ntile_ij_pairs, ntile_kl_pairs, tile_ij_mapping, tile_kl_mapping, q_cond, dm_cond, cutoff}; - JKMatrix jk = {vj, vk, dm, (uint16_t)n_dm}; + if (n_dm > 1) { // UHF + k_factor *= 2.; + } + // *4 for the symmetry (i,j) = (j,i), (k,l) = (l,k) in J contraction + // Additional factor 1/2 from the two-electron Coulomb operator + JKEnergy jk = {ejk, dm, 4.*j_factor, -k_factor, (uint16_t)n_dm}; cudaMemset(batch_head, 0, 2*sizeof(int)); - if (!rys_ejk_ip2_unrolled(&envs, &jk, &bounds, pool, batch_head, scheme, workers)) { + if (!rys_ejk_ip2_type12_unrolled(&envs, &jk, &bounds, pool, batch_head, scheme, workers)) { int quartets_per_block = scheme[0]; int gout_stride = scheme[1]; int ij_prims = iprim * jprim; dim3 threads(quartets_per_block, gout_stride); int buflen = (nroots*2 + g_size*3 + ij_prims*4) * quartets_per_block; - rys_ejk_ip2_kernel<<>>(envs, jk, bounds, pool, batch_head); + rys_ejk_ip2_type12_kernel<<>>(envs, jk, bounds, pool, batch_head); } cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { - fprintf(stderr, "CUDA Error in RYS_build_jk: %s\n", cudaGetErrorString(err)); + fprintf(stderr, "CUDA Error in RYS_ejk_ip2_type12: %s\n", cudaGetErrorString(err)); + return 1; + } + return 0; +} + +int RYS_per_atom_jk_ip2_type3(double *ejk, double j_factor, double k_factor, + double *dm, int n_dm, int nao, + RysIntEnvVars envs, int *scheme, int *shls_slice, + int ntile_ij_pairs, int ntile_kl_pairs, + int *tile_ij_mapping, int *tile_kl_mapping, float *tile_q_cond, + float *q_cond, float *dm_cond, float cutoff, + ShellQuartet *pool, uint32_t *batch_head, int workers, + int *atm, int natm, int *bas, int nbas, double *env) +{ + uint16_t ish0 = shls_slice[0]; + uint16_t jsh0 = shls_slice[2]; + uint16_t ksh0 = shls_slice[4]; + uint16_t lsh0 = shls_slice[6]; + uint8_t li = bas[ANG_OF + ish0*BAS_SLOTS]; + uint8_t lj = bas[ANG_OF + jsh0*BAS_SLOTS]; + uint8_t lk = bas[ANG_OF + ksh0*BAS_SLOTS]; + uint8_t ll = bas[ANG_OF + lsh0*BAS_SLOTS]; + uint8_t iprim = bas[NPRIM_OF + ish0*BAS_SLOTS]; + uint8_t jprim = bas[NPRIM_OF + jsh0*BAS_SLOTS]; + uint8_t kprim = bas[NPRIM_OF + ksh0*BAS_SLOTS]; + uint8_t lprim = bas[NPRIM_OF + lsh0*BAS_SLOTS]; + uint8_t nfi = (li+1)*(li+2)/2; + uint8_t nfj = (lj+1)*(lj+2)/2; + uint8_t nfk = (lk+1)*(lk+2)/2; + uint8_t nfl = (ll+1)*(ll+2)/2; + uint8_t nfij = nfi * nfj; + uint8_t nfkl = nfk * nfl; + uint8_t order = li + lj + lk + ll; + uint8_t nroots = (order + 2) / 2 + 1; + double omega = env[PTR_RANGE_OMEGA]; + if (omega < 0) { // SR ERIs + nroots *= 2; + } + uint8_t stride_j = li + 2; + uint8_t stride_k = stride_j * (lj + 2); + uint8_t stride_l = stride_k * (lk + 2); + int g_size = stride_l * (uint16_t)(ll + 2); + BoundsInfo bounds = {li, lj, lk, ll, nfi, nfk, nfij, nfkl, + nroots, stride_j, stride_k, stride_l, iprim, jprim, kprim, lprim, + ntile_ij_pairs, ntile_kl_pairs, tile_ij_mapping, tile_kl_mapping, + q_cond, dm_cond, cutoff}; + + if (n_dm > 1) { // UHF + k_factor *= 2.; + } + // *4 for the symmetry (i,j) = (j,i), (k,l) = (l,k) in J contraction + // Additional factor 1/2 from the two-electron Coulomb operator + JKEnergy jk = {ejk, dm, 4.*j_factor, -k_factor, (uint16_t)n_dm}; + cudaMemset(batch_head, 0, 2*sizeof(int)); + + if (!rys_ejk_ip2_type3_unrolled(&envs, &jk, &bounds, pool, batch_head, scheme, workers)) { + int quartets_per_block = scheme[0]; + int gout_stride = scheme[1]; + int ij_prims = iprim * jprim; + dim3 threads(quartets_per_block, gout_stride); + int buflen = (nroots*2 + g_size*3 + ij_prims*4) * quartets_per_block; + buflen = MAX(buflen, 9*gout_stride*quartets_per_block); + rys_ejk_ip2_type3_kernel<<>>(envs, jk, bounds, pool, batch_head); + } + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + fprintf(stderr, "CUDA Error in RYS_ejk_ip2_type3: %s\n", cudaGetErrorString(err)); return 1; } return 0; @@ -384,7 +468,8 @@ void RYS_init_constant(int *g_pair_idx, int *offsets, cudaFuncSetAttribute(rys_sr_jk_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); cudaFuncSetAttribute(rys_jk_ip1_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); cudaFuncSetAttribute(rys_ejk_ip1_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); - cudaFuncSetAttribute(rys_ejk_ip2_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); + cudaFuncSetAttribute(rys_ejk_ip2_type12_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); + cudaFuncSetAttribute(rys_ejk_ip2_type3_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shm_size); } void RYS_init_rysj_constant(int shm_size) diff --git a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip1.cu b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip1.cu index 7b74212d..1f60c839 100644 --- a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip1.cu +++ b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip1.cu @@ -6,7 +6,7 @@ int rys_ejk_ip1_unrolled_max_order = 4; __device__ static -void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -21,8 +21,6 @@ void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 1 * TILE2; @@ -56,11 +54,10 @@ void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 1) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 1; ij += nsq_per_block / TILE2) { int i = ij % 1; int j = ij / 1; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -107,30 +104,18 @@ void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -140,7 +125,7 @@ void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_jl_0_0 = dm[(j0+0)*nao+(l0+0)]; double dm_ik_0_0 = dm[(i0+0)*nao+(k0+0)]; double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -205,132 +190,116 @@ void _rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(1, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(1, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 1; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(1, theta_rr, rw+2*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(1, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 1; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 1; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); - prod_xy = fac * 1; - prod_xz = fac * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * wt * dd; + prod_yz = 1 * wt * dd; double rt_aij = rt_aa * akl; double c0x = xpa - xpq*rt_aij; double trr_10x = c0x * fac; - fxi = ai2 * prod_yz * trr_10x; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double trr_01x = cpx * fac; - fxk = ak2 * prod_yz * trr_01x; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -347,8 +316,16 @@ void rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -367,7 +344,7 @@ void rys_ejk_ip1_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -382,8 +359,6 @@ void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 3 * TILE2; @@ -417,11 +392,10 @@ void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 3) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 3; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -468,30 +442,18 @@ void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -505,7 +467,7 @@ void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; double dm_il_1_0 = dm[(i0+1)*nao+(l0+0)]; double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -570,253 +532,213 @@ void _rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(2, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(2, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 2; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); double rt_aij = rt_aa * akl; double c0x = xpa - xpq*rt_aij; double trr_10x = c0x * fac; - prod_xy = trr_10x * 1; - prod_xz = trr_10x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = 1 * wt * dd; double b10 = .5/aij * (1 - rt_aij); double trr_20x = c0x * trr_10x + 1*b10 * fac; - fxi = ai2 * prod_yz * trr_20x; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double b00 = .5 * rt_aa; double trr_11x = cpx * trr_10x + 1*b00 * fac; - fxk = ak2 * prod_yz * trr_11x; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; - fxl = al2 * prod_yz * hrr_1001x; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * trr_10x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_01x = cpx * fac; - fxk = ak2 * prod_yz * trr_01x; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -833,8 +755,16 @@ void rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -853,7 +783,7 @@ void rys_ejk_ip1_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -868,8 +798,6 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 3 * TILE2; @@ -903,11 +831,10 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 3) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 3; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -954,30 +881,18 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -1003,7 +918,7 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; double dm_il_1_0 = dm[(i0+1)*nao+(l0+0)]; double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -1068,17 +983,25 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(2, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(2, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 2; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -1089,581 +1012,461 @@ void _rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_10x = c0x * fac; double b00 = .5 * rt_aa; double trr_11x = cpx * trr_10x + 1*b00 * fac; - prod_xy = trr_11x * 1; - prod_xz = trr_11x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = 1 * wt * dd; double b10 = .5/aij * (1 - rt_aij); double trr_20x = c0x * trr_10x + 1*b10 * fac; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - fxi = ai2 * prod_yz * trr_21x; + fxi = ai2 * trr_21x; + double trr_01x = cpx * fac; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxj = aj2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - fxk = ak2 * prod_yz * trr_12x; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1011x = trr_12x - xlxk * trr_11x; - fxl = al2 * prod_yz * hrr_1011x; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * trr_11x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxj = aj2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_02x = cpx * trr_01x + 1*b01 * fac; - fxk = ak2 * prod_yz * trr_02x; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0011x = trr_02x - xlxk * trr_01x; - fxl = al2 * prod_yz * hrr_0011x; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * wt; - prod_yz = trr_01y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = trr_01y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; - fxl = al2 * prod_yz * hrr_1001x; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -1680,8 +1483,16 @@ void rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -1700,7 +1511,7 @@ void rys_ejk_ip1_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -1715,8 +1526,6 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 3 * TILE2; @@ -1750,11 +1559,10 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 3) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 3; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -1801,30 +1609,18 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -1870,7 +1666,7 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; double dm_il_2_1 = dm[(i0+2)*nao+(l0+1)]; double dm_il_2_2 = dm[(i0+2)*nao+(l0+2)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -1935,17 +1731,25 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -1960,1621 +1764,1285 @@ void _rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_01x = cpx * fac; double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; double hrr_1011x = trr_12x - xlxk * trr_11x; - prod_xy = hrr_1011x * 1; - prod_xz = hrr_1011x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1011x * 1 * dd; + prod_xz = hrr_1011x * wt * dd; + prod_yz = 1 * wt * dd; double b10 = .5/aij * (1 - rt_aij); double trr_20x = c0x * trr_10x + 1*b10 * fac; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; double hrr_2011x = trr_22x - xlxk * trr_21x; - fxi = ai2 * prod_yz * hrr_2011x; + fxi = ai2 * hrr_2011x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + fxi -= 1 * hrr_0011x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; - fxj = aj2 * prod_yz * hrr_1111x; + fxj = aj2 * hrr_1111x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_02x = cpx * trr_01x + 1*b01 * fac; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; double hrr_1021x = trr_13x - xlxk * trr_12x; - fxk = ak2 * prod_yz * hrr_1021x; + fxk = ak2 * hrr_1021x; + double hrr_1001x = trr_11x - xlxk * trr_10x; + fxk -= 1 * hrr_1001x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; - fxl = al2 * prod_yz * hrr_1012x; + fxl = al2 * hrr_1012x; + fxl -= 1 * trr_11x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0011x = trr_02x - xlxk * trr_01x; - fxi -= 1 * prod_yz * hrr_0011x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - fxk -= 1 * prod_yz * hrr_1001x; - fxl -= 1 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * trr_10y; - prod_xz = hrr_0011x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_1011x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * trr_10y * dd; + prod_xz = hrr_0011x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; - fxj = aj2 * prod_yz * hrr_0111x; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; double hrr_0021x = trr_03x - xlxk * trr_02x; - fxk = ak2 * prod_yz * hrr_0021x; + fxk = ak2 * hrr_0021x; + double hrr_0001x = trr_01x - xlxk * fac; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; - fxl = al2 * prod_yz * hrr_0012x; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - double hrr_0001x = trr_01x - xlxk * fac; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * 1; - prod_xz = hrr_0011x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * 1 * dd; + prod_xz = hrr_0011x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * trr_01y; - prod_xz = hrr_1001x * wt; - prod_yz = trr_01y * wt; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1001x * trr_01y * dd; + prod_xz = hrr_1001x * wt * dd; + prod_yz = trr_01y * wt * dd; double hrr_2001x = trr_21x - xlxk * trr_20x; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxj = aj2 * prod_yz * hrr_1101x; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; - fxl = al2 * prod_yz * hrr_1002x; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0001x; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_11y; - prod_xz = hrr_0001x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * hrr_1001x; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_11y * dd; + prod_xz = hrr_0001x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxj = aj2 * prod_yz * hrr_0101x; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - fxl = al2 * prod_yz * hrr_0002x; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_01y; - prod_xz = hrr_0001x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * 1; - prod_xz = hrr_1001x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_01y * dd; + prod_xz = hrr_0001x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1001x * 1 * dd; + prod_xz = hrr_1001x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0001x; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_10y; - prod_xz = hrr_0001x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * 1; - prod_xz = hrr_0001x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * trr_10y * dd; + prod_xz = hrr_0001x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * 1 * dd; + prod_xz = hrr_0001x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0001y; - prod_xz = trr_11x * wt; - prod_yz = hrr_0001y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_11x * hrr_0001y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = hrr_0001y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxj = aj2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * trr_10x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1001y; - prod_xz = trr_01x * wt; - prod_yz = hrr_1001y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_1001y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = hrr_1001y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxj = aj2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0001y; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0001y; - prod_xz = trr_01x * trr_10z; - prod_yz = hrr_0001y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0011y; - prod_xz = trr_10x * wt; - prod_yz = hrr_0011y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_0001y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = hrr_0001y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_10x * hrr_0011y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_0011y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; double hrr_0021y = trr_03y - ylyk * trr_02y; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1011y; - prod_xz = fac * wt; - prod_yz = hrr_1011y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_1011y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_1011y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; double hrr_2011y = trr_22y - ylyk * trr_21y; - fyi = ai2 * prod_xz * hrr_2011y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * hrr_2011y; + fyi -= 1 * hrr_0011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; - fyj = aj2 * prod_xz * hrr_1111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_1111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; double hrr_1021y = trr_13y - ylyk * trr_12y; - fyk = ak2 * prod_xz * hrr_1021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_1021y; + fyk -= 1 * hrr_1001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; - fyl = al2 * prod_xz * hrr_1012y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0011y; - fyk -= 1 * prod_xz * hrr_1001y; - fyl -= 1 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0011y; - prod_xz = fac * trr_10z; - prod_yz = hrr_0011y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0001y; - prod_xz = trr_10x * trr_01z; - prod_yz = hrr_0001y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1001y; - prod_xz = fac * trr_01z; - prod_yz = hrr_1001y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * hrr_0001y; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0001y; - prod_xz = fac * trr_11z; - prod_yz = hrr_0001y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * hrr_0001z; - prod_yz = 1 * hrr_0001z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1012y; + fyl -= 1 * trr_11y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_0011y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_0011y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_10x * hrr_0001y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = hrr_0001y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_1001y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = hrr_1001y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_0001y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = hrr_0001y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * hrr_0001z * dd; + prod_yz = 1 * hrr_0001z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * trr_10x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * hrr_0001z; - prod_yz = trr_10y * hrr_0001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * hrr_1001z; - prod_yz = 1 * hrr_1001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * hrr_0001z * dd; + prod_yz = trr_10y * hrr_0001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * hrr_1001z * dd; + prod_yz = 1 * hrr_1001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * hrr_0001z; - prod_yz = trr_01y * hrr_0001z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * hrr_0001z; - prod_yz = trr_11y * hrr_0001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * hrr_1001z; - prod_yz = trr_01y * hrr_1001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_0011z; - prod_yz = 1 * hrr_0011z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * hrr_0001z * dd; + prod_yz = trr_01y * hrr_0001z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * hrr_0001z * dd; + prod_yz = trr_11y * hrr_0001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * hrr_1001z * dd; + prod_yz = trr_01y * hrr_1001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_0011z * dd; + prod_yz = 1 * hrr_0011z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; double hrr_0021z = trr_03z - zlzk * trr_02z; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; - fzl = al2 * prod_xy * hrr_0012z; - fxi -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_0011z; - prod_yz = trr_10y * hrr_0011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0012z; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_1011z; - prod_yz = 1 * hrr_1011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_0011z * dd; + prod_yz = trr_10y * hrr_0011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_1011z * dd; + prod_yz = 1 * hrr_1011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; double hrr_2011z = trr_22z - zlzk * trr_21z; - fzi = ai2 * prod_xy * hrr_2011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2011z; + fzi -= 1 * hrr_0011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; - fzj = aj2 * prod_xy * hrr_1111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; double hrr_1021z = trr_13z - zlzk * trr_12z; - fzk = ak2 * prod_xy * hrr_1021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1021z; + fzk -= 1 * hrr_1001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; - fzl = al2 * prod_xy * hrr_1012z; - fzi -= 1 * prod_xy * hrr_0011z; - fzk -= 1 * prod_xy * hrr_1001z; - fzl -= 1 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1012z; + fzl -= 1 * trr_11z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -3591,8 +3059,16 @@ void rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -3611,7 +3087,7 @@ void rys_ejk_ip1_1011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -3626,8 +3102,6 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 9 * TILE2; @@ -3661,11 +3135,10 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 9) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 9; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -3712,30 +3185,18 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -3753,7 +3214,7 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; double dm_il_1_0 = dm[(i0+1)*nao+(l0+0)]; double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -3818,17 +3279,25 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(2, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(2, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 2; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -3838,590 +3307,470 @@ void _rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double b10 = .5/aij * (1 - rt_aij); double trr_20x = c0x * trr_10x + 1*b10 * fac; double hrr_1100x = trr_20x - xjxi * trr_10x; - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxi = ai2 * prod_yz * hrr_2100x; + fxi = ai2 * hrr_2100x; + double hrr_0100x = trr_10x - xjxi * fac; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxj = aj2 * prod_yz * hrr_1200x; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double b00 = .5 * rt_aa; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double trr_11x = cpx * trr_10x + 1*b00 * fac; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxk = ak2 * prod_yz * hrr_1110x; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2001x = trr_21x - xlxk * trr_20x; double hrr_1001x = trr_11x - xlxk * trr_10x; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxl = al2 * prod_yz * hrr_1101x; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0100x = trr_10x - xjxi * fac; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_1100x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxj = aj2 * prod_yz * hrr_0200x; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_01x = cpx * fac; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxk = ak2 * prod_yz * hrr_0110x; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxl = al2 * prod_yz * hrr_0101x; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -4438,8 +3787,16 @@ void rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -4458,7 +3815,7 @@ void rys_ejk_ip1_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -4473,8 +3830,6 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 9 * TILE2; @@ -4508,11 +3863,10 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 9) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 9; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -4559,30 +3913,18 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -4616,7 +3958,7 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; double dm_il_1_0 = dm[(i0+1)*nao+(l0+0)]; double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -4681,17 +4023,25 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -4706,1627 +4056,1291 @@ void _rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double trr_11x = cpx * trr_10x + 1*b00 * fac; double hrr_1110x = trr_21x - xjxi * trr_11x; - prod_xy = hrr_1110x * 1; - prod_xz = hrr_1110x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1110x * 1 * dd; + prod_xz = hrr_1110x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxi = ai2 * prod_yz * hrr_2110x; + fxi = ai2 * hrr_2110x; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + fxi -= 1 * hrr_0110x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; - fxj = aj2 * prod_yz * hrr_1210x; + fxj = aj2 * hrr_1210x; + fxj -= 1 * trr_11x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double b01 = .5/akl * (1 - rt_akl); double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double trr_01x = cpx * fac; double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; double hrr_1120x = trr_22x - xjxi * trr_12x; - fxk = ak2 * prod_yz * hrr_1120x; + fxk = ak2 * hrr_1120x; + double hrr_1100x = trr_20x - xjxi * trr_10x; + fxk -= 1 * hrr_1100x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2011x = trr_22x - xlxk * trr_21x; double hrr_1011x = trr_12x - xlxk * trr_11x; double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; - fxl = al2 * prod_yz * hrr_1111x; + fxl = al2 * hrr_1111x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0110x = trr_11x - xjxi * trr_01x; - fxi -= 1 * prod_yz * hrr_0110x; - fxj -= 1 * prod_yz * trr_11x; - double hrr_1100x = trr_20x - xjxi * trr_10x; - fxk -= 1 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * trr_10y; - prod_xz = hrr_0110x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_1110x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0110x * trr_10y * dd; + prod_xz = hrr_0110x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; - fxj = aj2 * prod_yz * hrr_0210x; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_02x = cpx * trr_01x + 1*b01 * fac; double hrr_0120x = trr_12x - xjxi * trr_02x; - fxk = ak2 * prod_yz * hrr_0120x; + fxk = ak2 * hrr_0120x; + double hrr_0100x = trr_10x - xjxi * fac; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0011x = trr_02x - xlxk * trr_01x; double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; - fxl = al2 * prod_yz * hrr_0111x; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_01x; - double hrr_0100x = trr_10x - xjxi * fac; - fxk -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * 1; - prod_xz = hrr_0110x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0110x * 1 * dd; + prod_xz = hrr_0110x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0100y; - prod_xz = trr_11x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1110x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * hrr_0100y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1011x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1100y; - prod_xz = trr_01x * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * hrr_1100y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0110x; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0011x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0100y; - prod_xz = trr_01x * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * hrr_0100y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * trr_01x; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_01y; - prod_xz = hrr_1100x * wt; - prod_yz = trr_01y * wt; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1100x * trr_01y * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = trr_01y * wt * dd; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2001x = trr_21x - xlxk * trr_20x; double hrr_1001x = trr_11x - xlxk * trr_10x; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxl = al2 * prod_yz * hrr_1101x; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_11y; - prod_xz = hrr_0100x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0100x * trr_11y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxl = al2 * prod_yz * hrr_0101x; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * trr_01y; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_01y; - prod_xz = hrr_0100x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0110y; - prod_xz = trr_10x * wt; - prod_yz = hrr_0110y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0100x * trr_01y * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * hrr_0110y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_0110y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_0120y = trr_12y - yjyi * trr_02y; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1110y; - prod_xz = fac * wt; - prod_yz = hrr_1110y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * hrr_1110y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_1110y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyi = ai2 * prod_xz * hrr_2110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_2110y; + fyi -= 1 * hrr_0110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; - fyj = aj2 * prod_xz * hrr_1210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_1210y; + fyj -= 1 * trr_11y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; double hrr_1120y = trr_22y - yjyi * trr_12y; - fyk = ak2 * prod_xz * hrr_1120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_1120y; + fyk -= 1 * hrr_1100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; - fyl = al2 * prod_xz * hrr_1111y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0110y; - fyj -= 1 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0110y; - prod_xz = fac * trr_10z; - prod_yz = hrr_0110y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * hrr_0100z; - prod_yz = trr_01y * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * hrr_0100z; - prod_yz = trr_11y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * trr_01y; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * hrr_1100z; - prod_yz = trr_01y * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_01y; + fyl = al2 * hrr_1111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * hrr_0110y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_0110y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = trr_01y * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = trr_11y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = trr_01y * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * trr_01z; - prod_yz = hrr_0100y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * trr_01z; - prod_yz = hrr_1100y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * trr_11z; - prod_yz = hrr_0100y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_0110z; - prod_yz = 1 * hrr_0110z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = hrr_0100y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = hrr_1100y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = hrr_0100y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_0110z * dd; + prod_yz = 1 * hrr_0110z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0120z = trr_12z - zjzi * trr_02z; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; - fzl = al2 * prod_xy * hrr_0111z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_0110z; - prod_yz = trr_10y * hrr_0110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0111z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_1110z; - prod_yz = 1 * hrr_1110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_0110z * dd; + prod_yz = trr_10y * hrr_0110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_1110z * dd; + prod_yz = 1 * hrr_1110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzi = ai2 * prod_xy * hrr_2110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2110z; + fzi -= 1 * hrr_0110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; - fzj = aj2 * prod_xy * hrr_1210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1210z; + fzj -= 1 * trr_11z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; double hrr_1120z = trr_22z - zjzi * trr_12z; - fzk = ak2 * prod_xy * hrr_1120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1120z; + fzk -= 1 * hrr_1100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; - fzl = al2 * prod_xy * hrr_1111z; - fzi -= 1 * prod_xy * hrr_0110z; - fzj -= 1 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1111z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -6343,8 +5357,16 @@ void rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -6363,7 +5385,7 @@ void rys_ejk_ip1_1110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -6378,8 +5400,6 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 9 * TILE2; @@ -6413,11 +5433,10 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 9) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 9; ij += nsq_per_block / TILE2) { int i = ij % 3; int j = ij / 3; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -6464,30 +5483,18 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -6545,7 +5552,7 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; double dm_il_2_1 = dm[(i0+2)*nao+(l0+1)]; double dm_il_2_2 = dm[(i0+2)*nao+(l0+2)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -6610,17 +5617,25 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -6641,4753 +5656,3769 @@ void _rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; double hrr_1011x = trr_12x - xlxk * trr_11x; double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; - prod_xy = hrr_1111x * 1; - prod_xz = hrr_1111x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1111x * 1 * dd; + prod_xz = hrr_1111x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; double hrr_3011x = trr_32x - xlxk * trr_31x; double hrr_2111x = hrr_3011x - xjxi * hrr_2011x; - fxi = ai2 * prod_yz * hrr_2111x; + fxi = ai2 * hrr_2111x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + fxi -= 1 * hrr_0111x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1211x = hrr_2111x - xjxi * hrr_1111x; - fxj = aj2 * prod_yz * hrr_1211x; + fxj = aj2 * hrr_1211x; + fxj -= 1 * hrr_1011x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; double hrr_2021x = trr_23x - xlxk * trr_22x; - double trr_02x = cpx * trr_01x + 1*b01 * fac; double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; double hrr_1021x = trr_13x - xlxk * trr_12x; double hrr_1121x = hrr_2021x - xjxi * hrr_1021x; - fxk = ak2 * prod_yz * hrr_1121x; + fxk = ak2 * hrr_1121x; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + fxk -= 1 * hrr_1101x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2012x = hrr_2021x - xlxk * hrr_2011x; double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; double hrr_1112x = hrr_2012x - xjxi * hrr_1012x; - fxl = al2 * prod_yz * hrr_1112x; + fxl = al2 * hrr_1112x; + double hrr_1110x = trr_21x - xjxi * trr_11x; + fxl -= 1 * hrr_1110x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; - fxi -= 1 * prod_yz * hrr_0111x; - fxj -= 1 * prod_yz * hrr_1011x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxk -= 1 * prod_yz * hrr_1101x; - double hrr_1110x = trr_21x - xjxi * trr_11x; - fxl -= 1 * prod_yz * hrr_1110x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0111x * trr_10y; - prod_xz = hrr_0111x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_1111x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0111x * trr_10y * dd; + prod_xz = hrr_0111x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_1111x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0211x = hrr_1111x - xjxi * hrr_0111x; - fxj = aj2 * prod_yz * hrr_0211x; + fxj = aj2 * hrr_0211x; + fxj -= 1 * hrr_0011x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; double hrr_0021x = trr_03x - xlxk * trr_02x; double hrr_0121x = hrr_1021x - xjxi * hrr_0021x; - fxk = ak2 * prod_yz * hrr_0121x; + fxk = ak2 * hrr_0121x; + double hrr_0001x = trr_01x - xlxk * fac; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + fxk -= 1 * hrr_0101x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; double hrr_0112x = hrr_1012x - xjxi * hrr_0012x; - fxl = al2 * prod_yz * hrr_0112x; - double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * hrr_0011x; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxk -= 1 * prod_yz * hrr_0101x; + fxl = al2 * hrr_0112x; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxl -= 1 * prod_yz * hrr_0110x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0111x * 1; - prod_xz = hrr_0111x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_1111x; - fyi = ai2 * prod_xz * trr_10y; + fxl -= 1 * hrr_0110x; + v_lx += fxl * prod_yz; + double hrr_1001y = trr_11y - ylyk * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0111x * 1 * dd; + prod_xz = hrr_0111x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_1111x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0211x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0211x; + fxj -= 1 * hrr_0011x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0121x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0121x; + fxk -= 1 * hrr_0101x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0112x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0112x; + fxl -= 1 * hrr_0110x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * hrr_0011x; - fxk -= 1 * prod_yz * hrr_0101x; - fxl -= 1 * prod_yz * hrr_0110x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1011x * hrr_0100y; - prod_xz = hrr_1011x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * hrr_2011x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1111x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1011x * hrr_0100y * dd; + prod_xz = hrr_1011x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * hrr_2011x; + fxi -= 1 * hrr_0011x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1111x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1021x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1021x; + fxk -= 1 * hrr_1001x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1012x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1012x; + fxl -= 1 * trr_11x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0011x; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * hrr_1001x; - fxl -= 1 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * hrr_1100y; - prod_xz = hrr_0011x * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * hrr_1011x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * hrr_1100y * dd; + prod_xz = hrr_0011x * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0111x; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0021x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0012x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * hrr_0100y; - prod_xz = hrr_0011x * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1011x * 1; - prod_xz = hrr_1011x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * hrr_2011x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1111x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * hrr_0100y * dd; + prod_xz = hrr_0011x * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1011x * 1 * dd; + prod_xz = hrr_1011x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * hrr_2011x; + fxi -= 1 * hrr_0011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_1021x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1021x; + fxk -= 1 * hrr_1001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1012x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1012x; + fxl -= 1 * trr_11x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * hrr_0011x; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * hrr_1001x; - fxl -= 1 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * trr_10y; - prod_xz = hrr_0011x * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * 1; - prod_xz = hrr_0011x * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * trr_10y * dd; + prod_xz = hrr_0011x * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * 1 * dd; + prod_xz = hrr_0011x * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1101x * trr_01y; - prod_xz = hrr_1101x * wt; - prod_yz = trr_01y * wt; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1101x * trr_01y * dd; + prod_xz = hrr_1101x * wt * dd; + prod_yz = trr_01y * wt * dd; double hrr_3001x = trr_31x - xlxk * trr_30x; double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; - fxi = ai2 * prod_yz * hrr_2101x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_2101x; + fxi -= 1 * hrr_0101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1201x = hrr_2101x - xjxi * hrr_1101x; - fxj = aj2 * prod_yz * hrr_1201x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1111x; + fxj = aj2 * hrr_1201x; + fxj -= 1 * hrr_1001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1111x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; double hrr_1102x = hrr_2002x - xjxi * hrr_1002x; - fxl = al2 * prod_yz * hrr_1102x; - double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0101x; - fxj -= 1 * prod_yz * hrr_1001x; - fyk -= 1 * prod_xz * 1; + fxl = al2 * hrr_1102x; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxl -= 1 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0101x * trr_11y; - prod_xz = hrr_0101x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * hrr_1101x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fxl -= 1 * hrr_1100x; + v_lx += fxl * prod_yz; + double hrr_0011y = trr_02y - ylyk * trr_01y; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0101x * trr_11y * dd; + prod_xz = hrr_0101x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * hrr_1101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0201x = hrr_1101x - xjxi * hrr_0101x; - fxj = aj2 * prod_yz * hrr_0201x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0111x; + fxj = aj2 * hrr_0201x; + fxj -= 1 * hrr_0001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0111x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; double hrr_0102x = hrr_1002x - xjxi * hrr_0002x; - fxl = al2 * prod_yz * hrr_0102x; - double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * trr_01y; - fxj -= 1 * prod_yz * hrr_0001x; - fyk -= 1 * prod_xz * trr_10y; + fxl = al2 * hrr_0102x; double hrr_0100x = trr_10x - xjxi * fac; - fxl -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0101x * trr_01y; - prod_xz = hrr_0101x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * hrr_1101x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0201x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0111x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0102x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * hrr_0001x; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * hrr_0110y; - prod_xz = hrr_1001x * wt; - prod_yz = hrr_0110y * wt; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1101x; + fxl -= 1 * hrr_0100x; + v_lx += fxl * prod_yz; + double hrr_1011y = trr_12y - ylyk * trr_11y; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0101x * trr_01y * dd; + prod_xz = hrr_0101x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * hrr_1101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0201x; + fxj -= 1 * hrr_0001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0111x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0102x; + fxl -= 1 * hrr_0100x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1001x * hrr_0110y * dd; + prod_xz = hrr_1001x * wt * dd; + prod_yz = hrr_0110y * wt * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; double hrr_0120y = trr_12y - yjyi * trr_02y; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1002x; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0001x; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * hrr_1110y; - prod_xz = hrr_0001x * wt; - prod_yz = hrr_1110y * wt; - fxi = ai2 * prod_yz * hrr_1001x; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * hrr_1110y * dd; + prod_xz = hrr_0001x * wt * dd; + prod_yz = hrr_1110y * wt * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyi = ai2 * prod_xz * hrr_2110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0101x; + fyi = ai2 * hrr_2110y; + fyi -= 1 * hrr_0110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; - fyj = aj2 * prod_xz * hrr_1210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_1210y; + fyj -= 1 * trr_11y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; double hrr_1120y = trr_22y - yjyi * trr_12y; - fyk = ak2 * prod_xz * hrr_1120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0002x; + fyk = ak2 * hrr_1120y; + fyk -= 1 * hrr_1100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; - fyl = al2 * prod_xz * hrr_1111y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0110y; - fyj -= 1 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * hrr_1100y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * hrr_0110y; - prod_xz = hrr_0001x * trr_10z; - prod_yz = hrr_0110y * trr_10z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * trr_01y; - prod_xz = hrr_1001x * hrr_0100z; - prod_yz = trr_01y * hrr_0100z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * hrr_0001x; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_11y; - prod_xz = hrr_0001x * hrr_0100z; - prod_yz = trr_11y * hrr_0100z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * trr_01y; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_10y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_01y; - prod_xz = hrr_0001x * hrr_1100z; - prod_yz = trr_01y * hrr_1100z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1101x * 1; - prod_xz = hrr_1101x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * hrr_2101x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1201x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1111x; - fyk = ak2 * prod_xz * trr_01y; + fyl = al2 * hrr_1111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * hrr_0110y * dd; + prod_xz = hrr_0001x * trr_10z * dd; + prod_yz = hrr_0110y * trr_10z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1001x * trr_01y * dd; + prod_xz = hrr_1001x * hrr_0100z * dd; + prod_yz = trr_01y * hrr_0100z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_11y * dd; + prod_xz = hrr_0001x * hrr_0100z * dd; + prod_yz = trr_11y * hrr_0100z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_01y * dd; + prod_xz = hrr_0001x * hrr_1100z * dd; + prod_yz = trr_01y * hrr_1100z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1101x * 1 * dd; + prod_xz = hrr_1101x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * hrr_2101x; + fxi -= 1 * hrr_0101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1201x; + fxj -= 1 * hrr_1001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1111x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1102x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1102x; + fxl -= 1 * hrr_1100x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0101x; - fxj -= 1 * prod_yz * hrr_1001x; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0101x * trr_10y; - prod_xz = hrr_0101x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * hrr_1101x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0201x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0111x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0102x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * hrr_0001x; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0101x * 1; - prod_xz = hrr_0101x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * hrr_1101x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0201x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0111x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0101x * trr_10y * dd; + prod_xz = hrr_0101x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * hrr_1101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0201x; + fxj -= 1 * hrr_0001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0111x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0102x; + fxl -= 1 * hrr_0100x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0101x * 1 * dd; + prod_xz = hrr_0101x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * hrr_1101x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0201x; + fxj -= 1 * hrr_0001x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0111x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0102x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0102x; + fxl -= 1 * hrr_0100x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fxj -= 1 * prod_yz * hrr_0001x; - fzk -= 1 * prod_xy * trr_10z; - fxl -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * hrr_0100y; - prod_xz = hrr_1001x * trr_01z; - prod_yz = hrr_0100y * trr_01z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0001x; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * hrr_1100y; - prod_xz = hrr_0001x * trr_01z; - prod_yz = hrr_1100y * trr_01z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * hrr_0100y; - prod_xz = hrr_0001x * trr_11z; - prod_yz = hrr_0100y * trr_11z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_10z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * 1; - prod_xz = hrr_1001x * hrr_0110z; - prod_yz = 1 * hrr_0110z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1001x * hrr_0100y * dd; + prod_xz = hrr_1001x * trr_01z * dd; + prod_yz = hrr_0100y * trr_01z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * hrr_1100y * dd; + prod_xz = hrr_0001x * trr_01z * dd; + prod_yz = hrr_1100y * trr_01z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * hrr_0100y * dd; + prod_xz = hrr_0001x * trr_11z * dd; + prod_yz = hrr_0100y * trr_11z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1001x * 1 * dd; + prod_xz = hrr_1001x * hrr_0110z * dd; + prod_yz = 1 * hrr_0110z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0120z = trr_12z - zjzi * trr_02z; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; - fzl = al2 * prod_xy * hrr_0111z; - fxi -= 1 * prod_yz * hrr_0001x; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_10y; - prod_xz = hrr_0001x * hrr_0110z; - prod_yz = trr_10y * hrr_0110z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0111z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * 1; - prod_xz = hrr_0001x * hrr_1110z; - prod_yz = 1 * hrr_1110z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * trr_10y * dd; + prod_xz = hrr_0001x * hrr_0110z * dd; + prod_yz = trr_10y * hrr_0110z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * 1 * dd; + prod_xz = hrr_0001x * hrr_1110z * dd; + prod_yz = 1 * hrr_1110z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzi = ai2 * prod_xy * hrr_2110z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2110z; + fzi -= 1 * hrr_0110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; - fzj = aj2 * prod_xy * hrr_1210z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1210z; + fzj -= 1 * trr_11z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; double hrr_1120z = trr_22z - zjzi * trr_12z; - fzk = ak2 * prod_xy * hrr_1120z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1120z; + fzk -= 1 * hrr_1100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; - fzl = al2 * prod_xy * hrr_1111z; - fzi -= 1 * prod_xy * hrr_0110z; - fzj -= 1 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * hrr_1100z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1110x * hrr_0001y; - prod_xz = hrr_1110x * wt; - prod_yz = hrr_0001y * wt; + fzl = al2 * hrr_1111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = hrr_1110x * hrr_0001y * dd; + prod_xz = hrr_1110x * wt * dd; + prod_yz = hrr_0001y * wt * dd; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxi = ai2 * prod_yz * hrr_2110x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_2110x; + fxi -= 1 * hrr_0110x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; - fxj = aj2 * prod_yz * hrr_1210x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0100z; + fxj = aj2 * hrr_1210x; + fxj -= 1 * trr_11x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double hrr_1120x = trr_22x - xjxi * trr_12x; - fxk = ak2 * prod_yz * hrr_1120x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1111x; + fxk = ak2 * hrr_1120x; + fxk -= 1 * hrr_1100x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1111x; + v_lx += fxl * prod_yz; double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0110x; - fxj -= 1 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * hrr_1100x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * hrr_1001y; - prod_xz = hrr_0110x * wt; - prod_yz = hrr_1001y * wt; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = hrr_0110x * hrr_1001y * dd; + prod_xz = hrr_0110x * wt * dd; + prod_yz = hrr_1001y * wt * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0100z; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double hrr_0120x = trr_12x - xjxi * trr_02x; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0111x; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0001y; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * hrr_0001y; - prod_xz = hrr_0110x * trr_10z; - prod_yz = hrr_0001y * trr_10z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0101y; - prod_xz = trr_11x * wt; - prod_yz = hrr_0101y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1101y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1110x; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = hrr_0110x * hrr_0001y * dd; + prod_xz = hrr_0110x * trr_10z * dd; + prod_yz = hrr_0001y * trr_10z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_1; + dd += dm_jl_1_1 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_11x * hrr_0101y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = hrr_0101y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_0201y = hrr_1101y - yjyi * hrr_0101y; - fyj = aj2 * prod_xz * hrr_0201y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0111y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_0201y; + fyj -= 1 * hrr_0001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_0102y = hrr_1002y - yjyi * hrr_0002y; - fyl = al2 * prod_xz * hrr_0102y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyj -= 1 * prod_xz * hrr_0001y; - fxk -= 1 * prod_yz * trr_10x; - fyl -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_1; - dd_jl = dm_jl_1_1 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1101y; - prod_xz = trr_01x * wt; - prod_yz = hrr_1101y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_0102y; + fyl -= 1 * hrr_0100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_1; + dd += dm_jl_1_1 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_1101y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = hrr_1101y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double hrr_3001y = trr_31y - ylyk * trr_30y; double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; - fyi = ai2 * prod_xz * hrr_2101y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0110x; + fyi = ai2 * hrr_2101y; + fyi -= 1 * hrr_0101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_1201y = hrr_2101y - yjyi * hrr_1101y; - fyj = aj2 * prod_xz * hrr_1201y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1111y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_1201y; + fyj -= 1 * hrr_1001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; double hrr_1102y = hrr_2002y - yjyi * hrr_1002y; - fyl = al2 * prod_xz * hrr_1102y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0101y; - fyj -= 1 * prod_xz * hrr_1001y; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_1; - dd_jl = dm_jl_1_1 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0101y; - prod_xz = trr_01x * trr_10z; - prod_yz = hrr_0101y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1101y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0201y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0111y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0102y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * hrr_0001y; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_1; - dd_jl = dm_jl_1_1 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0001y; - prod_xz = trr_11x * hrr_0100z; - prod_yz = hrr_0001y * hrr_0100z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * trr_01x; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_1; - dd_jl = dm_jl_2_1 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1001y; - prod_xz = trr_01x * hrr_0100z; - prod_yz = hrr_1001y * hrr_0100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * hrr_0001y; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_1; - dd_jl = dm_jl_2_1 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0001y; - prod_xz = trr_01x * hrr_1100z; - prod_yz = hrr_0001y * hrr_1100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_1; - dd_jl = dm_jl_2_1 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * hrr_0011y; - prod_xz = hrr_1100x * wt; - prod_yz = hrr_0011y * wt; + fyl = al2 * hrr_1102y; + fyl -= 1 * hrr_1100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_1; + dd += dm_jl_1_1 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_0101y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = hrr_0101y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0201y; + fyj -= 1 * hrr_0001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0102y; + fyl -= 1 * hrr_0100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_1; + dd += dm_jl_2_1 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_11x * hrr_0001y * dd; + prod_xz = trr_11x * hrr_0100z * dd; + prod_yz = hrr_0001y * hrr_0100z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_1; + dd += dm_jl_2_1 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_1001y * dd; + prod_xz = trr_01x * hrr_0100z * dd; + prod_yz = hrr_1001y * hrr_0100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_1; + dd += dm_jl_2_1 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_0001y * dd; + prod_xz = trr_01x * hrr_1100z * dd; + prod_yz = hrr_0001y * hrr_1100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = hrr_1100x * hrr_0011y * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = hrr_0011y * wt * dd; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; double hrr_0021y = trr_03y - ylyk * trr_02y; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1101x; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_1011y; - prod_xz = hrr_0100x * wt; - prod_yz = hrr_1011y * wt; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_2011y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = hrr_0100x * hrr_1011y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = hrr_1011y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2011y; + fyi -= 1 * hrr_0011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; double hrr_1021y = trr_13y - ylyk * trr_12y; - fyk = ak2 * prod_xz * hrr_1021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0101x; + fyk = ak2 * hrr_1021y; + fyk -= 1 * hrr_1001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; - fyl = al2 * prod_xz * hrr_1012y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0011y; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * hrr_1001y; - fyl -= 1 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_0011y; - prod_xz = hrr_0100x * trr_10z; - prod_yz = hrr_0011y * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0111y; - prod_xz = trr_10x * wt; - prod_yz = hrr_0111y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1111y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_1012y; + fyl -= 1 * trr_11y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = hrr_0100x * hrr_0011y * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = hrr_0011y * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_0_1; + dd += dm_jl_1_1 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_10x * hrr_0111y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_0111y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1111y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_0211y = hrr_1111y - yjyi * hrr_0111y; - fyj = aj2 * prod_xz * hrr_0211y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_0211y; + fyj -= 1 * hrr_0011y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_0121y = hrr_1021y - yjyi * hrr_0021y; - fyk = ak2 * prod_xz * hrr_0121y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_0121y; + fyk -= 1 * hrr_0101y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_0112y = hrr_1012y - yjyi * hrr_0012y; - fyl = al2 * prod_xz * hrr_0112y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * hrr_0011y; - fyk -= 1 * prod_xz * hrr_0101y; - fyl -= 1 * prod_xz * hrr_0110y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_0_1; - dd_jl = dm_jl_1_1 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1111y; - prod_xz = fac * wt; - prod_yz = hrr_1111y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_0112y; + fyl -= 1 * hrr_0110y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_1_1; + dd += dm_jl_1_1 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_1111y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_1111y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; double hrr_3011y = trr_32y - ylyk * trr_31y; double hrr_2111y = hrr_3011y - yjyi * hrr_2011y; - fyi = ai2 * prod_xz * hrr_2111y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_2111y; + fyi -= 1 * hrr_0111y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_1211y = hrr_2111y - yjyi * hrr_1111y; - fyj = aj2 * prod_xz * hrr_1211y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_1211y; + fyj -= 1 * hrr_1011y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; double hrr_2021y = trr_23y - ylyk * trr_22y; double hrr_1121y = hrr_2021y - yjyi * hrr_1021y; - fyk = ak2 * prod_xz * hrr_1121y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_1121y; + fyk -= 1 * hrr_1101y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2012y = hrr_2021y - ylyk * hrr_2011y; double hrr_1112y = hrr_2012y - yjyi * hrr_1012y; - fyl = al2 * prod_xz * hrr_1112y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 1 * prod_xz * hrr_0111y; - fyj -= 1 * prod_xz * hrr_1011y; - fyk -= 1 * prod_xz * hrr_1101y; - fyl -= 1 * prod_xz * hrr_1110y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_1_1; - dd_jl = dm_jl_1_1 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0111y; - prod_xz = fac * trr_10z; - prod_yz = hrr_0111y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1111y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0211y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0121y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0112y; - fzl = al2 * prod_xy * hrr_1001z; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * hrr_0011y; - fyk -= 1 * prod_xz * hrr_0101y; - fyl -= 1 * prod_xz * hrr_0110y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_2_1; - dd_jl = dm_jl_1_1 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0011y; - prod_xz = trr_10x * hrr_0100z; - prod_yz = hrr_0011y * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_0_1; - dd_jl = dm_jl_2_1 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1011y; - prod_xz = fac * hrr_0100z; - prod_yz = hrr_1011y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2011y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1111y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1021y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1012y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 1 * prod_xz * hrr_0011y; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * hrr_1001y; - fyl -= 1 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_1_1; - dd_jl = dm_jl_2_1 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0011y; - prod_xz = fac * hrr_1100z; - prod_yz = hrr_0011y * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_1101z; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_2_1; - dd_jl = dm_jl_2_1 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * hrr_0001y; - prod_xz = hrr_1100x * trr_01z; - prod_yz = hrr_0001y * trr_01z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_1001y; - prod_xz = hrr_0100x * trr_01z; - prod_yz = hrr_1001y * trr_01z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * hrr_0001y; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_0001y; - prod_xz = hrr_0100x * trr_11z; - prod_yz = hrr_0001y * trr_11z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_10z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0101y; - prod_xz = trr_10x * trr_01z; - prod_yz = hrr_0101y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1101y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0201y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0111y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0102y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * hrr_0001y; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_0_1; - dd_jl = dm_jl_1_1 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1101y; - prod_xz = fac * trr_01z; - prod_yz = hrr_1101y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2101y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1201y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1111y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1102y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 1 * prod_xz * hrr_0101y; - fyj -= 1 * prod_xz * hrr_1001y; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_1_1; - dd_jl = dm_jl_1_1 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0101y; - prod_xz = fac * trr_11z; - prod_yz = hrr_0101y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1101y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0201y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0111y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0102y; - fzl = al2 * prod_xy * hrr_1011z; - fzi -= 1 * prod_xy * trr_01z; - fyj -= 1 * prod_xz * hrr_0001y; - fzk -= 1 * prod_xy * trr_10z; - fyl -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_2_1; - dd_jl = dm_jl_1_1 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0001y; - prod_xz = trr_10x * hrr_0110z; - prod_yz = hrr_0001y * hrr_0110z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0111z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_0_1; - dd_jl = dm_jl_2_1 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1001y; - prod_xz = fac * hrr_0110z; - prod_yz = hrr_1001y * hrr_0110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0111z; - fyi -= 1 * prod_xz * hrr_0001y; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_1_1; - dd_jl = dm_jl_2_1 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0001y; - prod_xz = fac * hrr_1110z; - prod_yz = hrr_0001y * hrr_1110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * hrr_2110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * hrr_1120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1111z; - fzi -= 1 * prod_xy * hrr_0110z; - fzj -= 1 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * hrr_1100z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_2_1; - dd_jl = dm_jl_2_1 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1110x * 1; - prod_xz = hrr_1110x * hrr_0001z; - prod_yz = 1 * hrr_0001z; - fxi = ai2 * prod_yz * hrr_2110x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1210x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * hrr_1120x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1111x; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_1112y; + fyl -= 1 * hrr_1110y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_2_1; + dd += dm_jl_1_1 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_0111y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_0111y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1111y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0211y; + fyj -= 1 * hrr_0011y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0121y; + fyk -= 1 * hrr_0101y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0112y; + fyl -= 1 * hrr_0110y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_0_1; + dd += dm_jl_2_1 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_10x * hrr_0011y * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = hrr_0011y * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_1_1; + dd += dm_jl_2_1 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_1011y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = hrr_1011y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2011y; + fyi -= 1 * hrr_0011y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1021y; + fyk -= 1 * hrr_1001y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1012y; + fyl -= 1 * trr_11y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_2_1; + dd += dm_jl_2_1 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_0011y * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = hrr_0011y * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = hrr_1100x * hrr_0001y * dd; + prod_xz = hrr_1100x * trr_01z * dd; + prod_yz = hrr_0001y * trr_01z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = hrr_0100x * hrr_1001y * dd; + prod_xz = hrr_0100x * trr_01z * dd; + prod_yz = hrr_1001y * trr_01z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = hrr_0100x * hrr_0001y * dd; + prod_xz = hrr_0100x * trr_11z * dd; + prod_yz = hrr_0001y * trr_11z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_0_1; + dd += dm_jl_1_1 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_10x * hrr_0101y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = hrr_0101y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0201y; + fyj -= 1 * hrr_0001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0102y; + fyl -= 1 * hrr_0100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_1_1; + dd += dm_jl_1_1 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_1101y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = hrr_1101y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2101y; + fyi -= 1 * hrr_0101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1201y; + fyj -= 1 * hrr_1001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1102y; + fyl -= 1 * hrr_1100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_2_1; + dd += dm_jl_1_1 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+1)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_0101y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = hrr_0101y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1101y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0201y; + fyj -= 1 * hrr_0001y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0111y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0102y; + fyl -= 1 * hrr_0100y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_0_1; + dd += dm_jl_2_1 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_10x * hrr_0001y * dd; + prod_xz = trr_10x * hrr_0110z * dd; + prod_yz = hrr_0001y * hrr_0110z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_1_1; + dd += dm_jl_2_1 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_1001y * dd; + prod_xz = fac * hrr_0110z * dd; + prod_yz = hrr_1001y * hrr_0110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_2_1; + dd += dm_jl_2_1 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+2)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_0001y * dd; + prod_xz = fac * hrr_1110z * dd; + prod_yz = hrr_0001y * hrr_1110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2110z; + fzi -= 1 * hrr_0110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1210z; + fzj -= 1 * trr_11z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1120z; + fzk -= 1 * hrr_1100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = hrr_1110x * 1 * dd; + prod_xz = hrr_1110x * hrr_0001z * dd; + prod_yz = 1 * hrr_0001z * dd; + fxi = ai2 * hrr_2110x; + fxi -= 1 * hrr_0110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1210x; + fxj -= 1 * trr_11x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1120x; + fxk -= 1 * hrr_1100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * hrr_0110x; - fxj -= 1 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * hrr_1100x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * trr_10y; - prod_xz = hrr_0110x * hrr_0001z; - prod_yz = trr_10y * hrr_0001z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * 1; - prod_xz = hrr_0110x * hrr_1001z; - prod_yz = 1 * hrr_1001z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_0001y; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = hrr_0110x * trr_10y * dd; + prod_xz = hrr_0110x * hrr_0001z * dd; + prod_yz = trr_10y * hrr_0001z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = hrr_0110x * 1 * dd; + prod_xz = hrr_0110x * hrr_1001z * dd; + prod_yz = 1 * hrr_1001z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0100y; - prod_xz = trr_11x * hrr_0001z; - prod_yz = hrr_0100y * hrr_0001z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * trr_01x; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_2; - dd_jl = dm_jl_1_2 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1100y; - prod_xz = trr_01x * hrr_0001z; - prod_yz = hrr_1100y * hrr_0001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_2; - dd_jl = dm_jl_1_2 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0100y; - prod_xz = trr_01x * hrr_1001z; - prod_yz = hrr_0100y * hrr_1001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_2; - dd_jl = dm_jl_1_2 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * hrr_0101z; - prod_yz = 1 * hrr_0101z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1101z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_2; + dd += dm_jl_1_2 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_11x * hrr_0100y * dd; + prod_xz = trr_11x * hrr_0001z * dd; + prod_yz = hrr_0100y * hrr_0001z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_2; + dd += dm_jl_1_2 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * hrr_1100y * dd; + prod_xz = trr_01x * hrr_0001z * dd; + prod_yz = hrr_1100y * hrr_0001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_2; + dd += dm_jl_1_2 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * hrr_0100y * dd; + prod_xz = trr_01x * hrr_1001z * dd; + prod_yz = hrr_0100y * hrr_1001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_2; + dd += dm_jl_2_2 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * hrr_0101z * dd; + prod_yz = 1 * hrr_0101z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0201z = hrr_1101z - zjzi * hrr_0101z; - fzj = aj2 * prod_xy * hrr_0201z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_0111z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_0201z; + fzj -= 1 * hrr_0001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0102z = hrr_1002z - zjzi * hrr_0002z; - fzl = al2 * prod_xy * hrr_0102z; - fxi -= 1 * prod_yz * trr_01x; - fzj -= 1 * prod_xy * hrr_0001z; - fxk -= 1 * prod_yz * trr_10x; - fzl -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_2; - dd_jl = dm_jl_2_2 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * hrr_0101z; - prod_yz = trr_10y * hrr_0101z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1101z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0201z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0111z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0102z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * hrr_0001z; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_2; - dd_jl = dm_jl_2_2 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * hrr_1101z; - prod_yz = 1 * hrr_1101z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0102z; + fzl -= 1 * hrr_0100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_2; + dd += dm_jl_2_2 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * hrr_0101z * dd; + prod_yz = trr_10y * hrr_0101z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0201z; + fzj -= 1 * hrr_0001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0102z; + fzl -= 1 * hrr_0100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_2; + dd += dm_jl_2_2 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * hrr_1101z * dd; + prod_yz = 1 * hrr_1101z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double hrr_3001z = trr_31z - zlzk * trr_30z; double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; - fzi = ai2 * prod_xy * hrr_2101z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2101z; + fzi -= 1 * hrr_0101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1201z = hrr_2101z - zjzi * hrr_1101z; - fzj = aj2 * prod_xy * hrr_1201z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_1111z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_1201z; + fzj -= 1 * hrr_1001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; double hrr_1102z = hrr_2002z - zjzi * hrr_1002z; - fzl = al2 * prod_xy * hrr_1102z; - fzi -= 1 * prod_xy * hrr_0101z; - fzj -= 1 * prod_xy * hrr_1001z; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_2; - dd_jl = dm_jl_2_2 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_01y; - prod_xz = hrr_1100x * hrr_0001z; - prod_yz = trr_01y * hrr_0001z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_11y; - prod_xz = hrr_0100x * hrr_0001z; - prod_yz = trr_11y * hrr_0001z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * trr_01y; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_10y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_01y; - prod_xz = hrr_0100x * hrr_1001z; - prod_yz = trr_01y * hrr_1001z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0110y; - prod_xz = trr_10x * hrr_0001z; - prod_yz = hrr_0110y * hrr_0001z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_0_2; - dd_jl = dm_jl_1_2 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1110y; - prod_xz = fac * hrr_0001z; - prod_yz = hrr_1110y * hrr_0001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2110y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1210y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1120y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1111y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 1 * prod_xz * hrr_0110y; - fyj -= 1 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * hrr_1100y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_1_2; - dd_jl = dm_jl_1_2 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0110y; - prod_xz = fac * hrr_1001z; - prod_yz = hrr_0110y * hrr_1001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_1002z; - fzi -= 1 * prod_xy * hrr_0001z; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_2_2; - dd_jl = dm_jl_1_2 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * hrr_0101z; - prod_yz = trr_01y * hrr_0101z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1101z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0201z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0111z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0102z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * hrr_0001z; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_0_2; - dd_jl = dm_jl_2_2 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * hrr_0101z; - prod_yz = trr_11y * hrr_0101z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1101z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0201z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0111z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0102z; - fyi -= 1 * prod_xz * trr_01y; - fzj -= 1 * prod_xy * hrr_0001z; - fyk -= 1 * prod_xz * trr_10y; - fzl -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_1_2; - dd_jl = dm_jl_2_2 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * hrr_1101z; - prod_yz = trr_01y * hrr_1101z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2101z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1201z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1111z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1102z; - fzi -= 1 * prod_xy * hrr_0101z; - fzj -= 1 * prod_xy * hrr_1001z; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_2_2; - dd_jl = dm_jl_2_2 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * hrr_0011z; - prod_yz = 1 * hrr_0011z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_1102z; + fzl -= 1 * hrr_1100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = hrr_1100x * trr_01y * dd; + prod_xz = hrr_1100x * hrr_0001z * dd; + prod_yz = trr_01y * hrr_0001z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = hrr_0100x * trr_11y * dd; + prod_xz = hrr_0100x * hrr_0001z * dd; + prod_yz = trr_11y * hrr_0001z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = hrr_0100x * trr_01y * dd; + prod_xz = hrr_0100x * hrr_1001z * dd; + prod_yz = trr_01y * hrr_1001z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_0_2; + dd += dm_jl_1_2 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_10x * hrr_0110y * dd; + prod_xz = trr_10x * hrr_0001z * dd; + prod_yz = hrr_0110y * hrr_0001z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_1_2; + dd += dm_jl_1_2 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * hrr_1110y * dd; + prod_xz = fac * hrr_0001z * dd; + prod_yz = hrr_1110y * hrr_0001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2110y; + fyi -= 1 * hrr_0110y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1210y; + fyj -= 1 * trr_11y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1120y; + fyk -= 1 * hrr_1100y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_2_2; + dd += dm_jl_1_2 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * hrr_0110y * dd; + prod_xz = fac * hrr_1001z * dd; + prod_yz = hrr_0110y * hrr_1001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_0_2; + dd += dm_jl_2_2 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * hrr_0101z * dd; + prod_yz = trr_01y * hrr_0101z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0201z; + fzj -= 1 * hrr_0001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0102z; + fzl -= 1 * hrr_0100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_1_2; + dd += dm_jl_2_2 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * hrr_0101z * dd; + prod_yz = trr_11y * hrr_0101z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0201z; + fzj -= 1 * hrr_0001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0102z; + fzl -= 1 * hrr_0100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_2_2; + dd += dm_jl_2_2 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * hrr_1101z * dd; + prod_yz = trr_01y * hrr_1101z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2101z; + fzi -= 1 * hrr_0101z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1201z; + fzj -= 1 * hrr_1001z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1111z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1102z; + fzl -= 1 * hrr_1100z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * hrr_0011z * dd; + prod_yz = 1 * hrr_0011z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; double hrr_0021z = trr_03z - zlzk * trr_02z; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; - fzl = al2 * prod_xy * hrr_0012z; - fxi -= 1 * prod_yz * hrr_0100x; - fxj -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * hrr_0011z; - prod_yz = trr_10y * hrr_0011z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0012z; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * hrr_1011z; - prod_yz = 1 * hrr_1011z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2011z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_1111z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * hrr_0011z * dd; + prod_yz = trr_10y * hrr_0011z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * hrr_1011z * dd; + prod_yz = 1 * hrr_1011z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2011z; + fzi -= 1 * hrr_0011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1111z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; double hrr_1021z = trr_13z - zlzk * trr_12z; - fzk = ak2 * prod_xy * hrr_1021z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1021z; + fzk -= 1 * hrr_1001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; - fzl = al2 * prod_xy * hrr_1012z; - fzi -= 1 * prod_xy * hrr_0011z; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * hrr_1001z; - fzl -= 1 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * hrr_0011z; - prod_yz = hrr_0100y * hrr_0011z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0012z; - fxi -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_0_2; - dd_jl = dm_jl_1_2 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * hrr_0011z; - prod_yz = hrr_1100y * hrr_0011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0012z; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_1_2; - dd_jl = dm_jl_1_2 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * hrr_1011z; - prod_yz = hrr_0100y * hrr_1011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_2011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_1021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1012z; - fzi -= 1 * prod_xy * hrr_0011z; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * hrr_1001z; - fzl -= 1 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_2_2; - dd_jl = dm_jl_1_2 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_0111z; - prod_yz = 1 * hrr_0111z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1111z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_1012z; + fzl -= 1 * trr_11z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_0_2; + dd += dm_jl_1_2 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * hrr_0011z * dd; + prod_yz = hrr_0100y * hrr_0011z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_1_2; + dd += dm_jl_1_2 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * hrr_0011z * dd; + prod_yz = hrr_1100y * hrr_0011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_2_2; + dd += dm_jl_1_2 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+1)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * hrr_1011z * dd; + prod_yz = hrr_0100y * hrr_1011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2011z; + fzi -= 1 * hrr_0011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1021z; + fzk -= 1 * hrr_1001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1012z; + fzl -= 1 * trr_11z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_0_2; + dd += dm_jl_2_2 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_0111z * dd; + prod_yz = 1 * hrr_0111z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1111z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0211z = hrr_1111z - zjzi * hrr_0111z; - fzj = aj2 * prod_xy * hrr_0211z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0211z; + fzj -= 1 * hrr_0011z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0121z = hrr_1021z - zjzi * hrr_0021z; - fzk = ak2 * prod_xy * hrr_0121z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0121z; + fzk -= 1 * hrr_0101z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0112z = hrr_1012z - zjzi * hrr_0012z; - fzl = al2 * prod_xy * hrr_0112z; - fxi -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * hrr_0011z; - fzk -= 1 * prod_xy * hrr_0101z; - fzl -= 1 * prod_xy * hrr_0110z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_0_2; - dd_jl = dm_jl_2_2 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_0111z; - prod_yz = trr_10y * hrr_0111z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1111z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0211z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0121z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0112z; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * hrr_0011z; - fzk -= 1 * prod_xy * hrr_0101z; - fzl -= 1 * prod_xy * hrr_0110z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_1_2; - dd_jl = dm_jl_2_2 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_1111z; - prod_yz = 1 * hrr_1111z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_0112z; + fzl -= 1 * hrr_0110z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_1_2; + dd += dm_jl_2_2 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_0111z * dd; + prod_yz = trr_10y * hrr_0111z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1111z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0211z; + fzj -= 1 * hrr_0011z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0121z; + fzk -= 1 * hrr_0101z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0112z; + fzl -= 1 * hrr_0110z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_2_2; + dd += dm_jl_2_2 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_1111z * dd; + prod_yz = 1 * hrr_1111z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; double hrr_3011z = trr_32z - zlzk * trr_31z; double hrr_2111z = hrr_3011z - zjzi * hrr_2011z; - fzi = ai2 * prod_xy * hrr_2111z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_2111z; + fzi -= 1 * hrr_0111z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1211z = hrr_2111z - zjzi * hrr_1111z; - fzj = aj2 * prod_xy * hrr_1211z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1211z; + fzj -= 1 * hrr_1011z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; double hrr_2021z = trr_23z - zlzk * trr_22z; double hrr_1121z = hrr_2021z - zjzi * hrr_1021z; - fzk = ak2 * prod_xy * hrr_1121z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1121z; + fzk -= 1 * hrr_1101z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2012z = hrr_2021z - zlzk * hrr_2011z; double hrr_1112z = hrr_2012z - zjzi * hrr_1012z; - fzl = al2 * prod_xy * hrr_1112z; - fzi -= 1 * prod_xy * hrr_0111z; - fzj -= 1 * prod_xy * hrr_1011z; - fzk -= 1 * prod_xy * hrr_1101z; - fzl -= 1 * prod_xy * hrr_1110z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_2_2; - dd_jl = dm_jl_2_2 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+2)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_1112z; + fzl -= 1 * hrr_1110z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_1111(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -11404,8 +9435,16 @@ void rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -11424,7 +9463,7 @@ void rys_ejk_ip1_1111(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -11439,8 +9478,6 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 6 * TILE2; @@ -11474,11 +9511,10 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 6) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 6; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -11525,30 +9561,18 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -11568,7 +9592,7 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -11633,17 +9657,25 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(2, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(2, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 2; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -11652,408 +9684,324 @@ void _rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_10x = c0x * fac; double b10 = .5/aij * (1 - rt_aij); double trr_20x = c0x * trr_10x + 1*b10 * fac; - prod_xy = trr_20x * 1; - prod_xz = trr_20x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - fxi = ai2 * prod_yz * trr_30x; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxj = aj2 * prod_yz * hrr_2100x; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double b00 = .5 * rt_aa; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - fxk = ak2 * prod_yz * trr_21x; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2001x = trr_21x - xlxk * trr_20x; - fxl = al2 * prod_yz * hrr_2001x; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * trr_20x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_11x = cpx * trr_10x + 1*b00 * fac; - fxk = ak2 * prod_yz * trr_11x; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; - fxl = al2 * prod_yz * hrr_1001x; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * trr_10x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_01x = cpx * fac; - fxk = ak2 * prod_yz * trr_01x; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -12070,8 +10018,16 @@ void rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -12090,7 +10046,7 @@ void rys_ejk_ip1_2000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -12105,8 +10061,6 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 6 * TILE2; @@ -12140,11 +10094,10 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 6) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 6; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -12191,30 +10144,18 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -12252,7 +10193,7 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -12317,17 +10258,25 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -12340,1095 +10289,867 @@ void _rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_20x = c0x * trr_10x + 1*b10 * fac; double b00 = .5 * rt_aa; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - prod_xy = trr_21x * 1; - prod_xz = trr_21x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_21x * 1 * dd; + prod_xz = trr_21x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - fxi = ai2 * prod_yz * trr_31x; + fxi = ai2 * trr_31x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxj = aj2 * prod_yz * hrr_2110x; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - fxk = ak2 * prod_yz * trr_22x; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2011x = trr_22x - xlxk * trr_21x; - fxl = al2 * prod_yz * hrr_2011x; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_10y; - prod_xz = trr_11x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * trr_21x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * trr_10y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * trr_21x; + double trr_01x = cpx * fac; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxj = aj2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_01x = cpx * fac; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - fxk = ak2 * prod_yz * trr_12x; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1011x = trr_12x - xlxk * trr_11x; - fxl = al2 * prod_yz * hrr_1011x; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_20y; - prod_xz = trr_01x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * trr_11x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_20y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxj = aj2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_02x = cpx * trr_01x + 1*b01 * fac; - fxk = ak2 * prod_yz * trr_02x; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0011x = trr_02x - xlxk * trr_01x; - fxl = al2 * prod_yz * hrr_0011x; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * trr_01y; - prod_xz = trr_20x * wt; - prod_yz = trr_01y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_20x * trr_01y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = trr_01y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxj = aj2 * prod_yz * hrr_2100x; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2001x = trr_21x - xlxk * trr_20x; - fxl = al2 * prod_yz * hrr_2001x; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_11y; - prod_xz = trr_10x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_11y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; - fxl = al2 * prod_yz * hrr_1001x; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_21y; - prod_xz = fac * wt; - prod_yz = trr_21y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_21y * dd; + prod_xz = fac * wt * dd; + prod_yz = trr_21y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * trr_10z; - prod_yz = trr_11y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * trr_20z; - prod_yz = trr_01y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = trr_11y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = trr_01y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * trr_01z; - prod_yz = trr_20y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * trr_11z; - prod_yz = trr_10y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * trr_21z; - prod_yz = 1 * trr_21z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = trr_20y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = trr_10y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * trr_21z * dd; + prod_yz = 1 * trr_21z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -13445,8 +11166,16 @@ void rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -13465,7 +11194,7 @@ void rys_ejk_ip1_2010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -13480,8 +11209,6 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 6 * TILE2; @@ -13515,11 +11242,10 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 6) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 6; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -13566,30 +11292,18 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -13653,7 +11367,7 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; double dm_il_5_1 = dm[(i0+5)*nao+(l0+1)]; double dm_il_5_2 = dm[(i0+5)*nao+(l0+2)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -13718,17 +11432,25 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -13745,3170 +11467,2510 @@ void _rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_11x = cpx * trr_10x + 1*b00 * fac; double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; double hrr_2011x = trr_22x - xlxk * trr_21x; - prod_xy = hrr_2011x * 1; - prod_xz = hrr_2011x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2011x * 1 * dd; + prod_xz = hrr_2011x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; double hrr_3011x = trr_32x - xlxk * trr_31x; - fxi = ai2 * prod_yz * hrr_3011x; + fxi = ai2 * hrr_3011x; + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + fxi -= 2 * hrr_1011x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2111x = hrr_3011x - xjxi * hrr_2011x; - fxj = aj2 * prod_yz * hrr_2111x; + fxj = aj2 * hrr_2111x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; double hrr_2021x = trr_23x - xlxk * trr_22x; - fxk = ak2 * prod_yz * hrr_2021x; + fxk = ak2 * hrr_2021x; + double hrr_2001x = trr_21x - xlxk * trr_20x; + fxk -= 1 * hrr_2001x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2012x = hrr_2021x - xlxk * hrr_2011x; - fxl = al2 * prod_yz * hrr_2012x; + fxl = al2 * hrr_2012x; + fxl -= 1 * trr_21x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_1011x = trr_12x - xlxk * trr_11x; - fxi -= 2 * prod_yz * hrr_1011x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - fxk -= 1 * prod_yz * hrr_2001x; - fxl -= 1 * prod_yz * trr_21x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1011x * trr_10y; - prod_xz = hrr_1011x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_2011x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1011x * trr_10y * dd; + prod_xz = hrr_1011x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_2011x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + fxi -= 1 * hrr_0011x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; - fxj = aj2 * prod_yz * hrr_1111x; + fxj = aj2 * hrr_1111x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_02x = cpx * trr_01x + 1*b01 * fac; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; double hrr_1021x = trr_13x - xlxk * trr_12x; - fxk = ak2 * prod_yz * hrr_1021x; + fxk = ak2 * hrr_1021x; + double hrr_1001x = trr_11x - xlxk * trr_10x; + fxk -= 1 * hrr_1001x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; - fxl = al2 * prod_yz * hrr_1012x; + fxl = al2 * hrr_1012x; + fxl -= 1 * trr_11x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0011x = trr_02x - xlxk * trr_01x; - fxi -= 1 * prod_yz * hrr_0011x; - fyi -= 1 * prod_xz * 1; - double hrr_1001x = trr_11x - xlxk * trr_10x; - fxk -= 1 * prod_yz * hrr_1001x; - fxl -= 1 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1011x * 1; - prod_xz = hrr_1011x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_2011x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1011x * 1 * dd; + prod_xz = hrr_1011x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_2011x; + fxi -= 1 * hrr_0011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1111x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1021x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1021x; + fxk -= 1 * hrr_1001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1012x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1012x; + fxl -= 1 * trr_11x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0011x; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * hrr_1001x; - fxl -= 1 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * trr_20y; - prod_xz = hrr_0011x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * hrr_1011x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * trr_20y * dd; + prod_xz = hrr_0011x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; - fxj = aj2 * prod_yz * hrr_0111x; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; double hrr_0021x = trr_03x - xlxk * trr_02x; - fxk = ak2 * prod_yz * hrr_0021x; + fxk = ak2 * hrr_0021x; + double hrr_0001x = trr_01x - xlxk * fac; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; - fxl = al2 * prod_yz * hrr_0012x; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - double hrr_0001x = trr_01x - xlxk * fac; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * trr_10y; - prod_xz = hrr_0011x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0011x * 1; - prod_xz = hrr_0011x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * hrr_1011x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * trr_10y * dd; + prod_xz = hrr_0011x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0011x * 1 * dd; + prod_xz = hrr_0011x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * hrr_1011x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0111x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0111x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0021x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0021x; + fxk -= 1 * hrr_0001x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0012x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0012x; + fxl -= 1 * trr_01x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * hrr_0001x; - fxl -= 1 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2001x * trr_01y; - prod_xz = hrr_2001x * wt; - prod_yz = trr_01y * wt; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_2001x * trr_01y * dd; + prod_xz = hrr_2001x * wt * dd; + prod_yz = trr_01y * wt * dd; double hrr_3001x = trr_31x - xlxk * trr_30x; - fxi = ai2 * prod_yz * hrr_3001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_3001x; + fxi -= 2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; - fxj = aj2 * prod_yz * hrr_2101x; + fxj = aj2 * hrr_2101x; + v_jx += fxj * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_2011x; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2011x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; - fxl = al2 * prod_yz * hrr_2002x; + fxl = al2 * hrr_2002x; + fxl -= 1 * trr_20x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * hrr_1001x; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * trr_11y; - prod_xz = hrr_1001x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1001x * trr_11y * dd; + prod_xz = hrr_1001x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxj = aj2 * prod_yz * hrr_1101x; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; - fxl = al2 * prod_yz * hrr_1002x; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0001x; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * trr_01y; - prod_xz = hrr_1001x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0001x; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_21y; - prod_xz = hrr_0001x * wt; - prod_yz = trr_21y * wt; - fxi = ai2 * prod_yz * hrr_1001x; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1001x * trr_01y * dd; + prod_xz = hrr_1001x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_21y * dd; + prod_xz = hrr_0001x * wt * dd; + prod_yz = trr_21y * wt * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxj = aj2 * prod_yz * hrr_0101x; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - fxl = al2 * prod_yz * hrr_0002x; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * trr_20y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_11y; - prod_xz = hrr_0001x * trr_10z; - prod_yz = trr_11y * trr_10z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_10y; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_01y; - prod_xz = hrr_0001x * trr_20z; - prod_yz = trr_01y * trr_20z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * 1; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2001x * 1; - prod_xz = hrr_2001x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * hrr_3001x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2101x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_11y * dd; + prod_xz = hrr_0001x * trr_10z * dd; + prod_yz = trr_11y * trr_10z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0001x * trr_01y * dd; + prod_xz = hrr_0001x * trr_20z * dd; + prod_yz = trr_01y * trr_20z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_2001x * 1 * dd; + prod_xz = hrr_2001x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * hrr_3001x; + fxi -= 2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_2011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2002x; + fxl -= 1 * trr_20x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * hrr_1001x; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * trr_10y; - prod_xz = hrr_1001x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0001x; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1001x * 1; - prod_xz = hrr_1001x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * hrr_2001x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1101x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1001x * trr_10y * dd; + prod_xz = hrr_1001x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1001x * 1 * dd; + prod_xz = hrr_1001x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * hrr_2001x; + fxi -= 1 * hrr_0001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_1011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1002x; + fxl -= 1 * trr_10x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * hrr_0001x; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fxl -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_20y; - prod_xz = hrr_0001x * trr_01z; - prod_yz = trr_20y * trr_01z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * trr_10y; - prod_xz = hrr_0001x * trr_11z; - prod_yz = trr_10y * trr_11z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0001x * 1; - prod_xz = hrr_0001x * trr_21z; - prod_yz = 1 * trr_21z; - fxi = ai2 * prod_yz * hrr_1001x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * trr_20y * dd; + prod_xz = hrr_0001x * trr_01z * dd; + prod_yz = trr_20y * trr_01z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * trr_10y * dd; + prod_xz = hrr_0001x * trr_11z * dd; + prod_yz = trr_10y * trr_11z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0001x * 1 * dd; + prod_xz = hrr_0001x * trr_21z * dd; + prod_yz = 1 * trr_21z * dd; + fxi = ai2 * hrr_1001x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0101x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0101x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * hrr_0011x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0011x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0002x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0002x; + fxl -= 1 * fac; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * trr_20z; - fxl -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * hrr_0001y; - prod_xz = trr_21x * wt; - prod_yz = hrr_0001y * wt; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_21x * hrr_0001y * dd; + prod_xz = trr_21x * wt * dd; + prod_yz = hrr_0001y * wt * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxj = aj2 * prod_yz * hrr_2110x; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_22x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2011x; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * trr_20x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_1001y; - prod_xz = trr_11x * wt; - prod_yz = hrr_1001y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_11x * hrr_1001y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = hrr_1001y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxj = aj2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1011x; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * hrr_0001y; - fxk -= 1 * prod_yz * trr_10x; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0001y; - prod_xz = trr_11x * trr_10z; - prod_yz = hrr_0001y * trr_10z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_2001y; - prod_xz = trr_01x * wt; - prod_yz = hrr_2001y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_11x * hrr_0001y * dd; + prod_xz = trr_11x * trr_10z * dd; + prod_yz = hrr_0001y * trr_10z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_1; + dd += dm_jl_0_1 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_2001y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = hrr_2001y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double hrr_3001y = trr_31y - ylyk * trr_30y; - fyi = ai2 * prod_xz * hrr_3001y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * hrr_3001y; + fyi -= 2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxj = aj2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; - fyj = aj2 * prod_xz * hrr_2101y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_2011y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0011x; + fyj = aj2 * hrr_2101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_2011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; - fyl = al2 * prod_xz * hrr_2002y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1001y; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_1; - dd_jl = dm_jl_0_1 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1001y; - prod_xz = trr_01x * trr_10z; - prod_yz = hrr_1001y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0001y; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_1; - dd_jl = dm_jl_0_1 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0001y; - prod_xz = trr_01x * trr_20z; - prod_yz = hrr_0001y * trr_20z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_1; - dd_jl = dm_jl_0_1 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0011y; - prod_xz = trr_20x * wt; - prod_yz = hrr_0011y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_2002y; + fyl -= 1 * trr_20y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_1; + dd += dm_jl_0_1 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_1001y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = hrr_1001y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_1; + dd += dm_jl_0_1 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_0; + } + prod_xy = trr_01x * hrr_0001y * dd; + prod_xz = trr_01x * trr_20z * dd; + prod_yz = hrr_0001y * trr_20z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_20x * hrr_0011y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = hrr_0011y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxj = aj2 * prod_yz * hrr_2100x; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; double hrr_0021y = trr_03y - ylyk * trr_02y; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2001x; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1011y; - prod_xz = trr_10x * wt; - prod_yz = hrr_1011y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2011y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_10x * hrr_1011y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_1011y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2011y; + fyi -= 1 * hrr_0011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; - fyj = aj2 * prod_xz * hrr_1111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; double hrr_1021y = trr_13y - ylyk * trr_12y; - fyk = ak2 * prod_xz * hrr_1021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_1021y; + fyk -= 1 * hrr_1001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; - fyl = al2 * prod_xz * hrr_1012y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0011y; - fyk -= 1 * prod_xz * hrr_1001y; - fyl -= 1 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0011y; - prod_xz = trr_10x * trr_10z; - prod_yz = hrr_0011y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2011y; - prod_xz = fac * wt; - prod_yz = hrr_2011y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1012y; + fyl -= 1 * trr_11y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = trr_10x * hrr_0011y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = hrr_0011y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_1; + dd += dm_jl_0_1 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_2011y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_2011y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; double hrr_3011y = trr_32y - ylyk * trr_31y; - fyi = ai2 * prod_xz * hrr_3011y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * hrr_3011y; + fyi -= 2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2111y = hrr_3011y - yjyi * hrr_2011y; - fyj = aj2 * prod_xz * hrr_2111y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; double hrr_2021y = trr_23y - ylyk * trr_22y; - fyk = ak2 * prod_xz * hrr_2021y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_2021y; + fyk -= 1 * hrr_2001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2012y = hrr_2021y - ylyk * hrr_2011y; - fyl = al2 * prod_xz * hrr_2012y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1011y; - fyk -= 1 * prod_xz * hrr_2001y; - fyl -= 1 * prod_xz * trr_21y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_1; - dd_jl = dm_jl_0_1 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1011y; - prod_xz = fac * trr_10z; - prod_yz = hrr_1011y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2011y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1111y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1021y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1012y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0011y; - fzi -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * hrr_1001y; - fyl -= 1 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_1; - dd_jl = dm_jl_0_1 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0011y; - prod_xz = fac * trr_20z; - prod_yz = hrr_0011y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1011y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0111y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0021y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0012y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * hrr_0001y; - fyl -= 1 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_1; - dd_jl = dm_jl_0_1 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0001y; - prod_xz = trr_20x * trr_01z; - prod_yz = hrr_0001y * trr_01z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_1; - dd_jl = dm_jl_0_1 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1001y; - prod_xz = trr_10x * trr_01z; - prod_yz = hrr_1001y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0001y; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_1; - dd_jl = dm_jl_0_1 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0001y; - prod_xz = trr_10x * trr_11z; - prod_yz = hrr_0001y * trr_11z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_1; - dd_jl = dm_jl_0_1 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2001y; - prod_xz = fac * trr_01z; - prod_yz = hrr_2001y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_3001y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2101y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_2011y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2002y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * hrr_1001y; - fzk -= 1 * prod_xy * wt; - fyl -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_1; - dd_jl = dm_jl_0_1 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1001y; - prod_xz = fac * trr_11z; - prod_yz = hrr_1001y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2001y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1101y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1011y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1002y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * hrr_0001y; - fzi -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * trr_10z; - fyl -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_1; - dd_jl = dm_jl_0_1 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0001y; - prod_xz = fac * trr_21z; - prod_yz = hrr_0001y * trr_21z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1001y; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0101y; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0011y; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0002y; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * trr_20z; - fyl -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_1; - dd_jl = dm_jl_0_1 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+1]; - dd_jl = dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_1_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * 1; - prod_xz = trr_21x * hrr_0001z; - prod_yz = 1 * hrr_0001z; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_2110x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2012y; + fyl -= 1 * trr_21y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_1; + dd += dm_jl_0_1 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_1011y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_1011y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2011y; + fyi -= 1 * hrr_0011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1021y; + fyk -= 1 * hrr_1001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1012y; + fyl -= 1 * trr_11y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_1; + dd += dm_jl_0_1 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_1; + } + prod_xy = fac * hrr_0011y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = hrr_0011y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1011y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0111y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0021y; + fyk -= 1 * hrr_0001y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0012y; + fyl -= 1 * trr_01y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_1; + dd += dm_jl_0_1 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_20x * hrr_0001y * dd; + prod_xz = trr_20x * trr_01z * dd; + prod_yz = hrr_0001y * trr_01z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_1; + dd += dm_jl_0_1 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_10x * hrr_1001y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = hrr_1001y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_1; + dd += dm_jl_0_1 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = trr_10x * hrr_0001y * dd; + prod_xz = trr_10x * trr_11z * dd; + prod_yz = hrr_0001y * trr_11z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_1; + dd += dm_jl_0_1 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_2001y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = hrr_2001y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_3001y; + fyi -= 2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_2011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2002y; + fyl -= 1 * trr_20y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_1; + dd += dm_jl_0_1 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_1001y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = hrr_1001y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2001y; + fyi -= 1 * hrr_0001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1002y; + fyl -= 1 * trr_10y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_1; + dd += dm_jl_0_1 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_1_2; + } + prod_xy = fac * hrr_0001y * dd; + prod_xz = fac * trr_21z * dd; + prod_yz = hrr_0001y * trr_21z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1001y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0101y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0011y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0002y; + fyl -= 1 * 1; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_21x * 1 * dd; + prod_xz = trr_21x * hrr_0001z * dd; + prod_yz = 1 * hrr_0001z * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_22x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_2011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 2 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * trr_20x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_10y; - prod_xz = trr_11x * hrr_0001z; - prod_yz = trr_10y * hrr_0001z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * hrr_1001z; - prod_yz = 1 * hrr_1001z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_11x * trr_10y * dd; + prod_xz = trr_11x * hrr_0001z * dd; + prod_yz = trr_10y * hrr_0001z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * hrr_1001z * dd; + prod_yz = 1 * hrr_1001z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - fzl = al2 * prod_xy * hrr_1002z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * hrr_0001z; - fxk -= 1 * prod_yz * trr_10x; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_20y; - prod_xz = trr_01x * hrr_0001z; - prod_yz = trr_20y * hrr_0001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 2 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_2; - dd_jl = dm_jl_0_2 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * hrr_1001z; - prod_yz = trr_10y * hrr_1001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1002z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0001z; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_2; - dd_jl = dm_jl_0_2 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * hrr_2001z; - prod_yz = 1 * hrr_2001z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_2; + dd += dm_jl_0_2 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * trr_20y * dd; + prod_xz = trr_01x * hrr_0001z * dd; + prod_yz = trr_20y * hrr_0001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_2; + dd += dm_jl_0_2 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * hrr_1001z * dd; + prod_yz = trr_10y * hrr_1001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_2; + dd += dm_jl_0_2 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * hrr_2001z * dd; + prod_yz = 1 * hrr_2001z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double hrr_3001z = trr_31z - zlzk * trr_30z; - fzi = ai2 * prod_xy * hrr_3001z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3001z; + fzi -= 2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; - fzj = aj2 * prod_xy * hrr_2101z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; - fzk = ak2 * prod_xy * hrr_2011z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzj = aj2 * hrr_2101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_2011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; - fzl = al2 * prod_xy * hrr_2002z; - fzi -= 2 * prod_xy * hrr_1001z; - fxk -= 1 * prod_yz * fac; - fzl -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_2; - dd_jl = dm_jl_0_2 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * trr_01y; - prod_xz = trr_20x * hrr_0001z; - prod_yz = trr_01y * hrr_0001z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 2 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_11y; - prod_xz = trr_10x * hrr_0001z; - prod_yz = trr_11y * hrr_0001z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0002z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * hrr_1001z; - prod_yz = trr_01y * hrr_1001z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1002z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0001z; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_21y; - prod_xz = fac * hrr_0001z; - prod_yz = trr_21y * hrr_0001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * hrr_1001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * hrr_0011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0002z; - fyi -= 2 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * trr_20y; - fzl -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_2; - dd_jl = dm_jl_0_2 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * hrr_1001z; - prod_yz = trr_11y * hrr_1001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_2001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_1011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1002z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * hrr_0001z; - fyk -= 1 * prod_xz * trr_10y; - fzl -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_2; - dd_jl = dm_jl_0_2 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * hrr_2001z; - prod_yz = trr_01y * hrr_2001z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_3001z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2101z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_2011z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2002z; - fzi -= 2 * prod_xy * hrr_1001z; - fyk -= 1 * prod_xz * 1; - fzl -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_2; - dd_jl = dm_jl_0_2 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * hrr_0011z; - prod_yz = 1 * hrr_0011z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_2002z; + fzl -= 1 * trr_20z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_20x * trr_01y * dd; + prod_xz = trr_20x * hrr_0001z * dd; + prod_yz = trr_01y * hrr_0001z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_10x * trr_11y * dd; + prod_xz = trr_10x * hrr_0001z * dd; + prod_yz = trr_11y * hrr_0001z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * hrr_1001z * dd; + prod_yz = trr_01y * hrr_1001z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_2; + dd += dm_jl_0_2 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_21y * dd; + prod_xz = fac * hrr_0001z * dd; + prod_yz = trr_21y * hrr_0001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0002z; + fzl -= 1 * wt; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_2; + dd += dm_jl_0_2 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * hrr_1001z * dd; + prod_yz = trr_11y * hrr_1001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2001z; + fzi -= 1 * hrr_0001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1002z; + fzl -= 1 * trr_10z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_2; + dd += dm_jl_0_2 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * hrr_2001z * dd; + prod_yz = trr_01y * hrr_2001z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_3001z; + fzi -= 2 * hrr_1001z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2101z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_2011z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2002z; + fzl -= 1 * trr_20z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_2; + dd += dm_jl_0_2 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * hrr_0011z * dd; + prod_yz = 1 * hrr_0011z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; double hrr_0021z = trr_03z - zlzk * trr_02z; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; - fzl = al2 * prod_xy * hrr_0012z; - fxi -= 2 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_2; - dd_jl = dm_jl_0_2 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * hrr_0011z; - prod_yz = trr_10y * hrr_0011z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0012z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_2; - dd_jl = dm_jl_0_2 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_1011z; - prod_yz = 1 * hrr_1011z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2011z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_2; + dd += dm_jl_0_2 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * hrr_0011z * dd; + prod_yz = trr_10y * hrr_0011z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_2; + dd += dm_jl_0_2 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_1011z * dd; + prod_yz = 1 * hrr_1011z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2011z; + fzi -= 1 * hrr_0011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; - fzj = aj2 * prod_xy * hrr_1111z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; double hrr_1021z = trr_13z - zlzk * trr_12z; - fzk = ak2 * prod_xy * hrr_1021z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1021z; + fzk -= 1 * hrr_1001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; - fzl = al2 * prod_xy * hrr_1012z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0011z; - fzk -= 1 * prod_xy * hrr_1001z; - fzl -= 1 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_2; - dd_jl = dm_jl_0_2 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * hrr_0011z; - prod_yz = trr_20y * hrr_0011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0012z; - fyi -= 2 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * hrr_0001z; - fzl -= 1 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_2; - dd_jl = dm_jl_0_2 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_1011z; - prod_yz = trr_10y * hrr_1011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1012z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0011z; - fzk -= 1 * prod_xy * hrr_1001z; - fzl -= 1 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_2; - dd_jl = dm_jl_0_2 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_2011z; - prod_yz = 1 * hrr_2011z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1012z; + fzl -= 1 * trr_11z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_2; + dd += dm_jl_0_2 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * hrr_0011z * dd; + prod_yz = trr_20y * hrr_0011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0021z; + fzk -= 1 * hrr_0001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0012z; + fzl -= 1 * trr_01z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_2; + dd += dm_jl_0_2 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_1011z * dd; + prod_yz = trr_10y * hrr_1011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2011z; + fzi -= 1 * hrr_0011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1021z; + fzk -= 1 * hrr_1001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1012z; + fzl -= 1 * trr_11z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_2; + dd += dm_jl_0_2 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_2_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_2011z * dd; + prod_yz = 1 * hrr_2011z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; double hrr_3011z = trr_32z - zlzk * trr_31z; - fzi = ai2 * prod_xy * hrr_3011z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3011z; + fzi -= 2 * hrr_1011z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2111z = hrr_3011z - zjzi * hrr_2011z; - fzj = aj2 * prod_xy * hrr_2111z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2111z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; double hrr_2021z = trr_23z - zlzk * trr_22z; - fzk = ak2 * prod_xy * hrr_2021z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2021z; + fzk -= 1 * hrr_2001z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2012z = hrr_2021z - zlzk * hrr_2011z; - fzl = al2 * prod_xy * hrr_2012z; - fzi -= 2 * prod_xy * hrr_1011z; - fzk -= 1 * prod_xy * hrr_2001z; - fzl -= 1 * prod_xy * trr_21z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_2; - dd_jl = dm_jl_0_2 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+2]; - dd_jl = dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_2_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2012z; + fzl -= 1 * trr_21z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -16925,8 +13987,16 @@ void rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -16945,7 +14015,7 @@ void rys_ejk_ip1_2011(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -16960,8 +14030,6 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 6 * TILE2; @@ -16995,11 +14063,10 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 6) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 6; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -17046,30 +14113,18 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -17134,7 +14189,7 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -17199,17 +14254,25 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -17225,2121 +14288,1677 @@ void _rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double b01 = .5/akl * (1 - rt_akl); double trr_11x = cpx * trr_10x + 1*b00 * fac; double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - prod_xy = trr_22x * 1; - prod_xz = trr_22x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_22x * 1 * dd; + prod_xz = trr_22x * wt * dd; + prod_yz = 1 * wt * dd; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; - fxi = ai2 * prod_yz * trr_32x; + fxi = ai2 * trr_32x; + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + fxi -= 2 * trr_12x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2120x = trr_32x - xjxi * trr_22x; - fxj = aj2 * prod_yz * hrr_2120x; + fxj = aj2 * hrr_2120x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; - fxk = ak2 * prod_yz * trr_23x; + fxk = ak2 * trr_23x; + fxk -= 2 * trr_21x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2021x = trr_23x - xlxk * trr_22x; - fxl = al2 * prod_yz * hrr_2021x; + fxl = al2 * hrr_2021x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_12x; - fxk -= 2 * prod_yz * trr_21x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_12x * trr_10y; - prod_xz = trr_12x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * trr_22x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_12x * trr_10y * dd; + prod_xz = trr_12x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * trr_22x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + fxi -= 1 * trr_02x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1120x = trr_22x - xjxi * trr_12x; - fxj = aj2 * prod_yz * hrr_1120x; + fxj = aj2 * hrr_1120x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_02x = cpx * trr_01x + 1*b01 * fac; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; - fxk = ak2 * prod_yz * trr_13x; + fxk = ak2 * trr_13x; + fxk -= 2 * trr_11x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1021x = trr_13x - xlxk * trr_12x; - fxl = al2 * prod_yz * hrr_1021x; + fxl = al2 * hrr_1021x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_02x; - fyi -= 1 * prod_xz * 1; - fxk -= 2 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_12x * 1; - prod_xz = trr_12x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * trr_22x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_12x * 1 * dd; + prod_xz = trr_12x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * trr_22x; + fxi -= 1 * trr_02x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1120x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1120x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_13x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_13x; + fxk -= 2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1021x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1021x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * trr_02x; - fzi -= 1 * prod_xy * wt; - fxk -= 2 * prod_yz * trr_11x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_02x * trr_20y; - prod_xz = trr_02x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * trr_12x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_02x * trr_20y * dd; + prod_xz = trr_02x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * trr_12x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0120x = trr_12x - xjxi * trr_02x; - fxj = aj2 * prod_yz * hrr_0120x; + fxj = aj2 * hrr_0120x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; - fxk = ak2 * prod_yz * trr_03x; + fxk = ak2 * trr_03x; + fxk -= 2 * trr_01x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0021x = trr_03x - xlxk * trr_02x; - fxl = al2 * prod_yz * hrr_0021x; + fxl = al2 * hrr_0021x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - fxk -= 2 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_02x * trr_10y; - prod_xz = trr_02x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * trr_12x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0120x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_03x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0021x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxk -= 2 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_02x * 1; - prod_xz = trr_02x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * trr_12x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_02x * trr_10y * dd; + prod_xz = trr_02x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * trr_12x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0120x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_03x; + fxk -= 2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0021x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_02x * 1 * dd; + prod_xz = trr_02x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * trr_12x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0120x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0120x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_03x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_03x; + fxk -= 2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0021x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0021x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxk -= 2 * prod_yz * trr_01x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * trr_01y; - prod_xz = trr_21x * wt; - prod_yz = trr_01y * wt; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_21x * trr_01y * dd; + prod_xz = trr_21x * wt * dd; + prod_yz = trr_01y * wt * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxj = aj2 * prod_yz * hrr_2110x; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_22x; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2011x = trr_22x - xlxk * trr_21x; - fxl = al2 * prod_yz * hrr_2011x; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * trr_20x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_11y; - prod_xz = trr_11x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_11x * trr_11y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxj = aj2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1011x = trr_12x - xlxk * trr_11x; - fxl = al2 * prod_yz * hrr_1011x; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * trr_01y; - fxk -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_01y; - prod_xz = trr_11x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_21y; - prod_xz = trr_01x * wt; - prod_yz = trr_21y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_11x * trr_01y * dd; + prod_xz = trr_11x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_01x * trr_21y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = trr_21y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxj = aj2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0011x = trr_02x - xlxk * trr_01x; - fxl = al2 * prod_yz * hrr_0011x; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_11y; - fxk -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_11y; - prod_xz = trr_01x * trr_10z; - prod_yz = trr_11y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_01y; - prod_xz = trr_01x * trr_20z; - prod_yz = trr_01y * trr_20z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * 1; - prod_xz = trr_21x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2110x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_01x * trr_11y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = trr_11y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_01x * trr_01y * dd; + prod_xz = trr_01x * trr_20z * dd; + prod_yz = trr_01y * trr_20z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_21x * 1 * dd; + prod_xz = trr_21x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_22x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * trr_20x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_10y; - prod_xz = trr_11x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_11x * trr_10y * dd; + prod_xz = trr_11x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * trr_01z; - fxk -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_20y; - prod_xz = trr_01x * trr_01z; - prod_yz = trr_20y * trr_01z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * trr_11z; - prod_yz = trr_10y * trr_11z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * trr_01z; - fxk -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * trr_21z; - prod_yz = 1 * trr_21z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_01x * trr_20y * dd; + prod_xz = trr_01x * trr_01z * dd; + prod_yz = trr_20y * trr_01z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * trr_11z * dd; + prod_yz = trr_10y * trr_11z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * trr_21z * dd; + prod_yz = 1 * trr_21z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fxk -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * trr_02y; - prod_xz = trr_20x * wt; - prod_yz = trr_02y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_12y; - fzi = ai2 * prod_xy * trr_10z; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = trr_20x * trr_02y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = trr_02y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_12y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2100x = trr_30x - xjxi * trr_20x; - fxj = aj2 * prod_yz * hrr_2100x; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0120y = trr_12y - yjyi * trr_02y; - fyj = aj2 * prod_xz * hrr_0120y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; - fyk = ak2 * prod_xz * trr_03y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_03y; + fyk -= 2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_2001x = trr_21x - xlxk * trr_20x; - fxl = al2 * prod_yz * hrr_2001x; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0021y = trr_03y - ylyk * trr_02y; - fyl = al2 * prod_xz * hrr_0021y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyk -= 2 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_12y; - prod_xz = trr_10x * wt; - prod_yz = trr_12y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_22y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = trr_10x * trr_12y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = trr_12y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_22y; + fyi -= 1 * trr_02y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1100x = trr_20x - xjxi * trr_10x; - fxj = aj2 * prod_yz * hrr_1100x; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1120y = trr_22y - yjyi * trr_12y; - fyj = aj2 * prod_xz * hrr_1120y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; - fyk = ak2 * prod_xz * trr_13y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_13y; + fyk -= 2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; - fxl = al2 * prod_yz * hrr_1001x; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1021y = trr_13y - ylyk * trr_12y; - fyl = al2 * prod_xz * hrr_1021y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * trr_02y; - fyk -= 2 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_02y; - prod_xz = trr_10x * trr_10z; - prod_yz = trr_02y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_12y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0120y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_03y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0021y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyk -= 2 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_22y; - prod_xz = fac * wt; - prod_yz = trr_22y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = trr_10x * trr_02y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = trr_02y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_12y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_03y; + fyk -= 2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = fac * trr_22y * dd; + prod_xz = fac * wt * dd; + prod_yz = trr_22y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; - fyi = ai2 * prod_xz * trr_32y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_32y; + fyi -= 2 * trr_12y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0100x = trr_10x - xjxi * fac; - fxj = aj2 * prod_yz * hrr_0100x; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2120y = trr_32y - yjyi * trr_22y; - fyj = aj2 * prod_xz * hrr_2120y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; - fyk = ak2 * prod_xz * trr_23y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_23y; + fyk -= 2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; - fxl = al2 * prod_yz * hrr_0001x; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_2021y = trr_23y - ylyk * trr_22y; - fyl = al2 * prod_xz * hrr_2021y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_12y; - fyk -= 2 * prod_xz * trr_21y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_12y; - prod_xz = fac * trr_10z; - prod_yz = trr_12y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_22y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1120y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_13y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1021y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * trr_02y; - fzi -= 1 * prod_xy * wt; - fyk -= 2 * prod_xz * trr_11y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_02y; - prod_xz = fac * trr_20z; - prod_yz = trr_02y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_12y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0120y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_03y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0021y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyk -= 2 * prod_xz * trr_01y; - if (vk != NULL) { - dd_jk = dm_jk_0_3 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_3; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+3]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_3; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * trr_01y; - prod_xz = trr_20x * trr_01z; - prod_yz = trr_01y * trr_01z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_11y; - prod_xz = trr_10x * trr_01z; - prod_yz = trr_11y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * trr_11z; - prod_yz = trr_01y * trr_11z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * trr_01z; - fyk -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_21y; - prod_xz = fac * trr_01z; - prod_yz = trr_21y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * trr_20y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * trr_11z; - prod_yz = trr_11y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * trr_01z; - fyk -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * trr_21z; - prod_yz = trr_01y * trr_21z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fyk -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_4 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_4; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+4]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_4; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * trr_02z; - prod_yz = 1 * trr_02z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_12z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = fac * trr_12y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = trr_12y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_22y; + fyi -= 1 * trr_02y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_13y; + fyk -= 2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_3 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_3; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+3] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+3]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_3; + } + prod_xy = fac * trr_02y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = trr_02y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_12y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0120y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_03y; + fyk -= 2 * trr_01y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0021y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = trr_20x * trr_01y * dd; + prod_xz = trr_20x * trr_01z * dd; + prod_yz = trr_01y * trr_01z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = trr_10x * trr_11y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = trr_11y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * trr_11z * dd; + prod_yz = trr_01y * trr_11z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = fac * trr_21y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = trr_21y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = trr_11y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_4 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_4; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+4] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+4]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_4; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * trr_21z * dd; + prod_yz = trr_01y * trr_21z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * trr_02z * dd; + prod_yz = 1 * trr_02z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_12z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0120z = trr_12z - zjzi * trr_02z; - fzj = aj2 * prod_xy * hrr_0120z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; - fzk = ak2 * prod_xy * trr_03z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_03z; + fzk -= 2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0021z = trr_03z - zlzk * trr_02z; - fzl = al2 * prod_xy * hrr_0021z; - fxi -= 2 * prod_yz * trr_10x; - fzk -= 2 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * trr_02z; - prod_yz = trr_10y * trr_02z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_12z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0120z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_03z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0021z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzk -= 2 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * trr_12z; - prod_yz = 1 * trr_12z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_22z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0021z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * trr_02z * dd; + prod_yz = trr_10y * trr_02z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_12z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_03z; + fzk -= 2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0021z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * trr_12z * dd; + prod_yz = 1 * trr_12z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_22z; + fzi -= 1 * trr_02z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1120z = trr_22z - zjzi * trr_12z; - fzj = aj2 * prod_xy * hrr_1120z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; - fzk = ak2 * prod_xy * trr_13z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_13z; + fzk -= 2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1021z = trr_13z - zlzk * trr_12z; - fzl = al2 * prod_xy * hrr_1021z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * trr_02z; - fzk -= 2 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * trr_02z; - prod_yz = trr_20y * trr_02z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_12z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0120z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_03z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0021z; - fyi -= 2 * prod_xz * trr_10y; - fzk -= 2 * prod_xy * trr_01z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * trr_12z; - prod_yz = trr_10y * trr_12z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_22z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1120z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_13z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1021z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * trr_02z; - fzk -= 2 * prod_xy * trr_11z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * trr_22z; - prod_yz = 1 * trr_22z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1021z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * trr_02z * dd; + prod_yz = trr_20y * trr_02z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_12z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_03z; + fzk -= 2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0021z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * trr_12z * dd; + prod_yz = trr_10y * trr_12z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_22z; + fzi -= 1 * trr_02z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_13z; + fzk -= 2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1021z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_5 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_5; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+5]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_5; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * trr_22z * dd; + prod_yz = 1 * trr_22z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; - fzi = ai2 * prod_xy * trr_32z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_32z; + fzi -= 2 * trr_12z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2120z = trr_32z - zjzi * trr_22z; - fzj = aj2 * prod_xy * hrr_2120z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2120z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; - fzk = ak2 * prod_xy * trr_23z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_23z; + fzk -= 2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2021z = trr_23z - zlzk * trr_22z; - fzl = al2 * prod_xy * hrr_2021z; - fzi -= 2 * prod_xy * trr_12z; - fzk -= 2 * prod_xy * trr_21z; - if (vk != NULL) { - dd_jk = dm_jk_0_5 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_5; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+5] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+5]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_5; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2021z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2020(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -19356,8 +15975,16 @@ void rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -19376,7 +16003,7 @@ void rys_ejk_ip1_2020(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -19391,8 +16018,6 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 18 * TILE2; @@ -19426,11 +16051,10 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 18) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 18; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -19477,30 +16101,18 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -19524,7 +16136,7 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -19589,17 +16201,25 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -19610,1105 +16230,877 @@ void _rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_20x = c0x * trr_10x + 1*b10 * fac; double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; double hrr_2100x = trr_30x - xjxi * trr_20x; - prod_xy = hrr_2100x * 1; - prod_xz = hrr_2100x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2100x * 1 * dd; + prod_xz = hrr_2100x * wt * dd; + prod_yz = 1 * wt * dd; double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; double hrr_3100x = trr_40x - xjxi * trr_30x; - fxi = ai2 * prod_yz * hrr_3100x; + fxi = ai2 * hrr_3100x; + double hrr_1100x = trr_20x - xjxi * trr_10x; + fxi -= 2 * hrr_1100x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2200x = hrr_3100x - xjxi * hrr_2100x; - fxj = aj2 * prod_yz * hrr_2200x; + fxj = aj2 * hrr_2200x; + fxj -= 1 * trr_20x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double b00 = .5 * rt_aa; double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double hrr_2110x = trr_31x - xjxi * trr_21x; - fxk = ak2 * prod_yz * hrr_2110x; + fxk = ak2 * hrr_2110x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_3001x = trr_31x - xlxk * trr_30x; double hrr_2001x = trr_21x - xlxk * trr_20x; double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; - fxl = al2 * prod_yz * hrr_2101x; + fxl = al2 * hrr_2101x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_1100x = trr_20x - xjxi * trr_10x; - fxi -= 2 * prod_yz * hrr_1100x; - fxj -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_10y; - prod_xz = hrr_1100x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_2100x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * trr_10y * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_2100x; + double hrr_0100x = trr_10x - xjxi * fac; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxj = aj2 * prod_yz * hrr_1200x; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_11x = cpx * trr_10x + 1*b00 * fac; double hrr_1110x = trr_21x - xjxi * trr_11x; - fxk = ak2 * prod_yz * hrr_1110x; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxl = al2 * prod_yz * hrr_1101x; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0100x = trr_10x - xjxi * fac; - fxi -= 1 * prod_yz * hrr_0100x; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0100x; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_20y; - prod_xz = hrr_0100x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * hrr_1100x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * trr_20y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxj = aj2 * prod_yz * hrr_0200x; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_01x = cpx * fac; double hrr_0110x = trr_11x - xjxi * trr_01x; - fxk = ak2 * prod_yz * hrr_0110x; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxl = al2 * prod_yz * hrr_0101x; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - fxj -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxj -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0100y; - prod_xz = trr_20x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_2100x; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * hrr_0100y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2001x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1100y; - prod_xz = trr_10x * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_1100y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2100y; - prod_xz = fac * wt; - prod_yz = hrr_2100y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_3_0; + dd += dm_jl_1_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_2100y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_2100y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; double hrr_3100y = trr_40y - yjyi * trr_30y; - fyi = ai2 * prod_xz * hrr_3100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_3100y; + fyi -= 2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2200y = hrr_3100y - yjyi * hrr_2100y; - fyj = aj2 * prod_xz * hrr_2200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2200y; + fyj -= 1 * trr_20y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyk = ak2 * prod_xz * hrr_2110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_2110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_3001y = trr_31y - ylyk * trr_30y; double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; - fyl = al2 * prod_xz * hrr_2101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1100y; - fyj -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_3_0; - dd_jl = dm_jl_1_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * trr_10z; - prod_yz = hrr_1100y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0100y; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_4_0; - dd_jl = dm_jl_1_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * trr_20z; - prod_yz = hrr_0100y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_5_0; - dd_jl = dm_jl_1_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_4_0; + dd += dm_jl_1_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_1100y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_5_0; + dd += dm_jl_1_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = hrr_0100y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 2 * prod_yz * trr_10x; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * hrr_0100z; - prod_yz = trr_20y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 2 * prod_xz * trr_10y; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_3_0; - dd_jl = dm_jl_2_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_1100z; - prod_yz = trr_10y * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1101z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_4_0; - dd_jl = dm_jl_2_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_2100z; - prod_yz = 1 * hrr_2100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_3_0; + dd += dm_jl_2_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = trr_20y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_4_0; + dd += dm_jl_2_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = trr_10y * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_5_0; + dd += dm_jl_2_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_2100z * dd; + prod_yz = 1 * hrr_2100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; double hrr_3100z = trr_40z - zjzi * trr_30z; - fzi = ai2 * prod_xy * hrr_3100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3100z; + fzi -= 2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2200z = hrr_3100z - zjzi * hrr_2100z; - fzj = aj2 * prod_xy * hrr_2200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2200z; + fzj -= 1 * trr_20z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzk = ak2 * prod_xy * hrr_2110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_3001z = trr_31z - zlzk * trr_30z; double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; - fzl = al2 * prod_xy * hrr_2101z; - fzi -= 2 * prod_xy * hrr_1100z; - fzj -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_5_0; - dd_jl = dm_jl_2_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2101z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -20725,8 +17117,16 @@ void rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -20745,7 +17145,7 @@ void rys_ejk_ip1_2100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -20760,8 +17160,6 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 18 * TILE2; @@ -20795,11 +17193,10 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 18) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 18; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -20846,30 +17243,18 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; double dm_lk_0_1 = dm[(l0+0)*nao+(k0+1)]; double dm_lk_0_2 = dm[(l0+0)*nao+(k0+2)]; @@ -20915,7 +17300,7 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -20980,17 +17365,25 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -21006,3174 +17399,2514 @@ void _rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double hrr_2110x = trr_31x - xjxi * trr_21x; - prod_xy = hrr_2110x * 1; - prod_xz = hrr_2110x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2110x * 1 * dd; + prod_xz = hrr_2110x * wt * dd; + prod_yz = 1 * wt * dd; double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; double trr_41x = cpx * trr_40x + 4*b00 * trr_30x; double hrr_3110x = trr_41x - xjxi * trr_31x; - fxi = ai2 * prod_yz * hrr_3110x; + fxi = ai2 * hrr_3110x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + fxi -= 2 * hrr_1110x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2210x = hrr_3110x - xjxi * hrr_2110x; - fxj = aj2 * prod_yz * hrr_2210x; + fxj = aj2 * hrr_2210x; + fxj -= 1 * trr_21x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double b01 = .5/akl * (1 - rt_akl); double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; double hrr_2120x = trr_32x - xjxi * trr_22x; - fxk = ak2 * prod_yz * hrr_2120x; + fxk = ak2 * hrr_2120x; + double hrr_2100x = trr_30x - xjxi * trr_20x; + fxk -= 1 * hrr_2100x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_3011x = trr_32x - xlxk * trr_31x; double hrr_2011x = trr_22x - xlxk * trr_21x; double hrr_2111x = hrr_3011x - xjxi * hrr_2011x; - fxl = al2 * prod_yz * hrr_2111x; + fxl = al2 * hrr_2111x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_1110x = trr_21x - xjxi * trr_11x; - fxi -= 2 * prod_yz * hrr_1110x; - fxj -= 1 * prod_yz * trr_21x; - double hrr_2100x = trr_30x - xjxi * trr_20x; - fxk -= 1 * prod_yz * hrr_2100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1110x * trr_10y; - prod_xz = hrr_1110x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_2110x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1110x * trr_10y * dd; + prod_xz = hrr_1110x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_2110x; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + fxi -= 1 * hrr_0110x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; - fxj = aj2 * prod_yz * hrr_1210x; + fxj = aj2 * hrr_1210x; + fxj -= 1 * trr_11x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; - double trr_01x = cpx * fac; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; double hrr_1120x = trr_22x - xjxi * trr_12x; - fxk = ak2 * prod_yz * hrr_1120x; + fxk = ak2 * hrr_1120x; + double hrr_1100x = trr_20x - xjxi * trr_10x; + fxk -= 1 * hrr_1100x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1011x = trr_12x - xlxk * trr_11x; double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; - fxl = al2 * prod_yz * hrr_1111x; + fxl = al2 * hrr_1111x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0110x = trr_11x - xjxi * trr_01x; - fxi -= 1 * prod_yz * hrr_0110x; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_11x; - double hrr_1100x = trr_20x - xjxi * trr_10x; - fxk -= 1 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1110x * 1; - prod_xz = hrr_1110x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_2110x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1110x * 1 * dd; + prod_xz = hrr_1110x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_2110x; + fxi -= 1 * hrr_0110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1210x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1210x; + fxj -= 1 * trr_11x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1120x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1120x; + fxk -= 1 * hrr_1100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1111x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0110x; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_11x; - fxk -= 1 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * trr_20y; - prod_xz = hrr_0110x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * hrr_1110x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0110x * trr_20y * dd; + prod_xz = hrr_0110x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; - fxj = aj2 * prod_yz * hrr_0210x; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_02x = cpx * trr_01x + 1*b01 * fac; double hrr_0120x = trr_12x - xjxi * trr_02x; - fxk = ak2 * prod_yz * hrr_0120x; + fxk = ak2 * hrr_0120x; + double hrr_0100x = trr_10x - xjxi * fac; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0011x = trr_02x - xlxk * trr_01x; double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; - fxl = al2 * prod_yz * hrr_0111x; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - fxj -= 1 * prod_yz * trr_01x; - double hrr_0100x = trr_10x - xjxi * fac; - fxk -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * trr_10y; - prod_xz = hrr_0110x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0110x * 1; - prod_xz = hrr_0110x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * hrr_1110x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0110x * trr_10y * dd; + prod_xz = hrr_0110x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0110x * 1 * dd; + prod_xz = hrr_0110x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * hrr_1110x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0210x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0210x; + fxj -= 1 * trr_01x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0120x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0120x; + fxk -= 1 * hrr_0100x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0111x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0111x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxj -= 1 * prod_yz * trr_01x; - fxk -= 1 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * hrr_0100y; - prod_xz = trr_21x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_2110x; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_21x * hrr_0100y * dd; + prod_xz = trr_21x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_22x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2011x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_11x; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_1100y; - prod_xz = trr_11x * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1110x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * hrr_1100y * dd; + prod_xz = trr_11x * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_12x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1011x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * hrr_0100y; - prod_xz = trr_11x * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_2100y; - prod_xz = trr_01x * wt; - prod_yz = hrr_2100y * wt; - fxi = ai2 * prod_yz * trr_11x; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * hrr_0100y * dd; + prod_xz = trr_11x * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_3_0; + dd += dm_jl_1_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * hrr_2100y * dd; + prod_xz = trr_01x * wt * dd; + prod_yz = hrr_2100y * wt * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; double hrr_3100y = trr_40y - yjyi * trr_30y; - fyi = ai2 * prod_xz * hrr_3100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0110x; + fyi = ai2 * hrr_3100y; + fyi -= 2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; double hrr_2200y = hrr_3100y - yjyi * hrr_2100y; - fyj = aj2 * prod_xz * hrr_2200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_02x; + fyj = aj2 * hrr_2200y; + fyj -= 1 * trr_20y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyk = ak2 * prod_xz * hrr_2110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0011x; + fyk = ak2 * hrr_2110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; double hrr_3001y = trr_31y - ylyk * trr_30y; double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; - fyl = al2 * prod_xz * hrr_2101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1100y; - fyj -= 1 * prod_xz * trr_20y; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_3_0; - dd_jl = dm_jl_1_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_1100y; - prod_xz = trr_01x * trr_10z; - prod_yz = hrr_1100y * trr_10z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0100y; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_10y; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_4_0; - dd_jl = dm_jl_1_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * hrr_0100y; - prod_xz = trr_01x * trr_20z; - prod_yz = hrr_0100y * trr_20z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyj -= 1 * prod_xz * 1; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_5_0; - dd_jl = dm_jl_1_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_21x * 1; - prod_xz = trr_21x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * trr_31x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_2110x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_4_0; + dd += dm_jl_1_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * hrr_1100y * dd; + prod_xz = trr_01x * trr_10z * dd; + prod_yz = hrr_1100y * trr_10z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_5_0; + dd += dm_jl_1_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * hrr_0100y * dd; + prod_xz = trr_01x * trr_20z * dd; + prod_yz = hrr_0100y * trr_20z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_21x * 1 * dd; + prod_xz = trr_21x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * trr_31x; + fxi -= 2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_22x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_22x; + fxk -= 1 * trr_20x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_2011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 2 * prod_yz * trr_11x; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_20x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * trr_10y; - prod_xz = trr_11x * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * trr_01x; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_11x * 1; - prod_xz = trr_11x * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * trr_21x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_1110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * trr_10y * dd; + prod_xz = trr_11x * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_11x * 1 * dd; + prod_xz = trr_11x * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * trr_21x; + fxi -= 1 * trr_01x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_12x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_12x; + fxk -= 1 * trr_10x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_1011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fxi -= 1 * prod_yz * trr_01x; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * trr_10x; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_20y; - prod_xz = trr_01x * hrr_0100z; - prod_yz = trr_20y * hrr_0100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 2 * prod_xz * trr_10y; - fzj -= 1 * prod_xy * wt; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_3_0; - dd_jl = dm_jl_2_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * trr_10y; - prod_xz = trr_01x * hrr_1100z; - prod_yz = trr_10y * hrr_1100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1101z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_4_0; - dd_jl = dm_jl_2_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_01x * 1; - prod_xz = trr_01x * hrr_2100z; - prod_yz = 1 * hrr_2100z; - fxi = ai2 * prod_yz * trr_11x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_3_0; + dd += dm_jl_2_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_20y * dd; + prod_xz = trr_01x * hrr_0100z * dd; + prod_yz = trr_20y * hrr_0100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_4_0; + dd += dm_jl_2_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * trr_10y * dd; + prod_xz = trr_01x * hrr_1100z * dd; + prod_yz = trr_10y * hrr_1100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_5_0; + dd += dm_jl_2_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_01x * 1 * dd; + prod_xz = trr_01x * hrr_2100z * dd; + prod_yz = 1 * hrr_2100z * dd; + fxi = ai2 * trr_11x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; double hrr_3100z = trr_40z - zjzi * trr_30z; - fzi = ai2 * prod_xy * hrr_3100z; - fxj = aj2 * prod_yz * hrr_0110x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3100z; + fzi -= 2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0110x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2200z = hrr_3100z - zjzi * hrr_2100z; - fzj = aj2 * prod_xy * hrr_2200z; - fxk = ak2 * prod_yz * trr_02x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2200z; + fzj -= 1 * trr_20z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_02x; + fxk -= 1 * fac; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzk = ak2 * prod_xy * hrr_2110z; - fxl = al2 * prod_yz * hrr_0011x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0011x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_3001z = trr_31z - zlzk * trr_30z; double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; - fzl = al2 * prod_xy * hrr_2101z; - fzi -= 2 * prod_xy * hrr_1100z; - fzj -= 1 * prod_xy * trr_20z; - fxk -= 1 * prod_yz * fac; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_5_0; - dd_jl = dm_jl_2_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2100x * trr_01y; - prod_xz = hrr_2100x * wt; - prod_yz = trr_01y * wt; + fzl = al2 * hrr_2101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_2100x * trr_01y * dd; + prod_xz = hrr_2100x * wt * dd; + prod_yz = trr_01y * wt * dd; double hrr_3100x = trr_40x - xjxi * trr_30x; - fxi = ai2 * prod_yz * hrr_3100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_10z; + fxi = ai2 * hrr_3100x; + fxi -= 2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2200x = hrr_3100x - xjxi * hrr_2100x; - fxj = aj2 * prod_yz * hrr_2200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_2110x; + fxj = aj2 * hrr_2200x; + fxj -= 1 * trr_20x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2110x; + v_kx += fxk * prod_yz; double trr_02y = cpy * trr_01y + 1*b01 * 1; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_3001x = trr_31x - xlxk * trr_30x; double hrr_2001x = trr_21x - xlxk * trr_20x; double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; - fxl = al2 * prod_yz * hrr_2101x; + fxl = al2 * hrr_2101x; + v_lx += fxl * prod_yz; double hrr_0011y = trr_02y - ylyk * trr_01y; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * hrr_1100x; - fxj -= 1 * prod_yz * trr_20x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_11y; - prod_xz = hrr_1100x * wt; - prod_yz = trr_11y * wt; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1100x * trr_11y * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = trr_11y * wt * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1110x; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; - fxl = al2 * prod_yz * hrr_1101x; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_1011y = trr_12y - ylyk * trr_11y; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0100x; - fyi -= 1 * prod_xz * trr_01y; - fxj -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_01y; - prod_xz = hrr_1100x * trr_10z; - prod_yz = trr_01y * trr_10z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0100x; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_10x; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_21y; - prod_xz = hrr_0100x * wt; - prod_yz = trr_21y * wt; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * trr_10z; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_1100x * trr_01y * dd; + prod_xz = hrr_1100x * trr_10z * dd; + prod_yz = trr_01y * trr_10z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0100x * trr_21y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = trr_21y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0110x; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; - fxl = al2 * prod_yz * hrr_0101x; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_2011y = trr_22y - ylyk * trr_21y; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_11y; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_11y; - prod_xz = hrr_0100x * trr_10z; - prod_yz = trr_11y * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_01y; - prod_xz = hrr_0100x * trr_20z; - prod_yz = trr_01y * trr_20z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxj -= 1 * prod_yz * fac; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_0_1 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0110y; - prod_xz = trr_20x * wt; - prod_yz = hrr_0110y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_2100x; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0100x * trr_11y * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = trr_11y * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_1 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = hrr_0100x * trr_01y * dd; + prod_xz = hrr_0100x * trr_20z * dd; + prod_yz = trr_01y * trr_20z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_20x * hrr_0110y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = hrr_0110y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double hrr_0120y = trr_12y - yjyi * trr_02y; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2001x; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1110y; - prod_xz = trr_10x * wt; - prod_yz = hrr_1110y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * hrr_1110y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_1110y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2110y; + fyi -= 1 * hrr_0110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; - fyj = aj2 * prod_xz * hrr_1210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1210y; + fyj -= 1 * trr_11y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_1120y = trr_22y - yjyi * trr_12y; - fyk = ak2 * prod_xz * hrr_1120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_1120y; + fyk -= 1 * hrr_1100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; - fyl = al2 * prod_xz * hrr_1111y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0110y; - fyj -= 1 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0110y; - prod_xz = trr_10x * trr_10z; - prod_yz = hrr_0110y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2110y; - prod_xz = fac * wt; - prod_yz = hrr_2110y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * hrr_0110y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = hrr_0110y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_3_0; + dd += dm_jl_1_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[9*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * hrr_2110y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_2110y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_41y = cpy * trr_40y + 4*b00 * trr_30y; double hrr_3110y = trr_41y - yjyi * trr_31y; - fyi = ai2 * prod_xz * hrr_3110y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_3110y; + fyi -= 2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2210y = hrr_3110y - yjyi * hrr_2110y; - fyj = aj2 * prod_xz * hrr_2210y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2210y; + fyj -= 1 * trr_21y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; double hrr_2120y = trr_32y - yjyi * trr_22y; - fyk = ak2 * prod_xz * hrr_2120y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_2120y; + fyk -= 1 * hrr_2100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_3011y = trr_32y - ylyk * trr_31y; double hrr_2111y = hrr_3011y - yjyi * hrr_2011y; - fyl = al2 * prod_xz * hrr_2111y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1110y; - fyj -= 1 * prod_xz * trr_21y; - fyk -= 1 * prod_xz * hrr_2100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_3_0; - dd_jl = dm_jl_1_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[9*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1110y; - prod_xz = fac * trr_10z; - prod_yz = hrr_1110y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2110y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1210y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1120y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1111y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0110y; - fzi -= 1 * prod_xy * wt; - fyj -= 1 * prod_xz * trr_11y; - fyk -= 1 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_4_0; - dd_jl = dm_jl_1_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[10*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0110y; - prod_xz = fac * trr_20z; - prod_yz = hrr_0110y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1110y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0210y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0120y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0111y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyj -= 1 * prod_xz * trr_01y; - fyk -= 1 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_1_1 * dm_il_5_0; - dd_jl = dm_jl_1_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[11*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * trr_01y; - prod_xz = trr_20x * hrr_0100z; - prod_yz = trr_01y * hrr_0100z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 2 * prod_yz * trr_10x; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[12*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_11y; - prod_xz = trr_10x * hrr_0100z; - prod_yz = trr_11y * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * trr_01y; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[13*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_01y; - prod_xz = trr_10x * hrr_1100z; - prod_yz = trr_01y * hrr_1100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_1101z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[14*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_21y; - prod_xz = fac * hrr_0100z; - prod_yz = trr_21y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_31y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2110y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_22y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2011y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 2 * prod_xz * trr_11y; - fzj -= 1 * prod_xy * wt; - fyk -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_3_0; - dd_jl = dm_jl_2_0 * dm_ik_3_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[15*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_11y; - prod_xz = fac * hrr_1100z; - prod_yz = trr_11y * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_21y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1110y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_12y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1011y; - fzl = al2 * prod_xy * hrr_1101z; - fyi -= 1 * prod_xz * trr_01y; - fzi -= 1 * prod_xy * hrr_0100z; - fzj -= 1 * prod_xy * trr_10z; - fyk -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_4_0; - dd_jl = dm_jl_2_0 * dm_ik_4_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[16*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_01y; - prod_xz = fac * hrr_2100z; - prod_yz = trr_01y * hrr_2100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_11y; - fzi = ai2 * prod_xy * hrr_3100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0110y; - fzj = aj2 * prod_xy * hrr_2200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_02y; - fzk = ak2 * prod_xy * hrr_2110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0011y; - fzl = al2 * prod_xy * hrr_2101z; - fzi -= 2 * prod_xy * hrr_1100z; - fzj -= 1 * prod_xy * trr_20z; - fyk -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_2_1 * dm_il_5_0; - dd_jl = dm_jl_2_0 * dm_ik_5_1; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[17*TILE2+sh_ij] * dm_lk_0_1; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2100x * 1; - prod_xz = hrr_2100x * trr_01z; - prod_yz = 1 * trr_01z; - fxi = ai2 * prod_yz * hrr_3100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_2110x; - fyk = ak2 * prod_xz * trr_01y; + fyl = al2 * hrr_2111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_4_0; + dd += dm_jl_1_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[10*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * hrr_1110y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_1110y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2110y; + fyi -= 1 * hrr_0110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1210y; + fyj -= 1 * trr_11y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1120y; + fyk -= 1 * hrr_1100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_1 * dm_il_5_0; + dd += dm_jl_1_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[11*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * hrr_0110y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = hrr_0110y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1110y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0210y; + fyj -= 1 * trr_01y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0120y; + fyk -= 1 * hrr_0100y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0111y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[12*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_20x * trr_01y * dd; + prod_xz = trr_20x * hrr_0100z * dd; + prod_yz = trr_01y * hrr_0100z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[13*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_11y * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = trr_11y * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[14*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = trr_10x * trr_01y * dd; + prod_xz = trr_10x * hrr_1100z * dd; + prod_yz = trr_01y * hrr_1100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_3_0; + dd += dm_jl_2_0 * dm_ik_3_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[15*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_21y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = trr_21y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_31y; + fyi -= 2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_22y; + fyk -= 1 * trr_20y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_4_0; + dd += dm_jl_2_0 * dm_ik_4_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[16*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_11y * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = trr_11y * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_21y; + fyi -= 1 * trr_01y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_12y; + fyk -= 1 * trr_10y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_1 * dm_il_5_0; + dd += dm_jl_2_0 * dm_ik_5_1; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+1]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[17*TILE2+sh_ij] * dm_lk_0_1; + } + prod_xy = fac * trr_01y * dd; + prod_xz = fac * hrr_2100z * dd; + prod_yz = trr_01y * hrr_2100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_11y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_3100z; + fzi -= 2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0110y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2200z; + fzj -= 1 * trr_20z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_02y; + fyk -= 1 * 1; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_2110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0011y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_2100x * 1 * dd; + prod_xz = hrr_2100x * trr_01z * dd; + prod_yz = 1 * trr_01z * dd; + fxi = ai2 * hrr_3100x; + fxi -= 2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2200x; + fxj -= 1 * trr_20x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_02z = cpz * trr_01z + 1*b01 * wt; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0011z = trr_02z - zlzk * trr_01z; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * hrr_1100x; - fxj -= 1 * prod_yz * trr_20x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_10y; - prod_xz = hrr_1100x * trr_01z; - prod_yz = trr_10y * trr_01z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * hrr_0100x; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * trr_11z; - prod_yz = 1 * trr_11z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1100x * trr_10y * dd; + prod_xz = hrr_1100x * trr_01z * dd; + prod_yz = trr_10y * trr_01z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * trr_11z * dd; + prod_yz = 1 * trr_11z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1011z = trr_12z - zlzk * trr_11z; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * hrr_0100x; - fzi -= 1 * prod_xy * trr_01z; - fxj -= 1 * prod_yz * trr_10x; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_20y; - prod_xz = hrr_0100x * trr_01z; - prod_yz = trr_20y * trr_01z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * trr_10y; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * trr_11z; - prod_yz = trr_10y * trr_11z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * trr_01z; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * trr_21z; - prod_yz = 1 * trr_21z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0100x * trr_20y * dd; + prod_xz = hrr_0100x * trr_01z * dd; + prod_yz = trr_20y * trr_01z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * trr_11z * dd; + prod_yz = trr_10y * trr_11z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_2 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * trr_21z * dd; + prod_yz = 1 * trr_21z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2011z = trr_22z - zlzk * trr_21z; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fxj -= 1 * prod_yz * fac; - fzk -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_0_2 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0100y; - prod_xz = trr_20x * trr_01z; - prod_yz = hrr_0100y * trr_01z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 2 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1100y; - prod_xz = trr_10x * trr_01z; - prod_yz = hrr_1100y * trr_01z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0011z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * trr_11z; - prod_yz = hrr_0100y * trr_11z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1011z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * trr_01z; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2100y; - prod_xz = fac * trr_01z; - prod_yz = hrr_2100y * trr_01z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_3100y; - fzi = ai2 * prod_xy * trr_11z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2200y; - fzj = aj2 * prod_xy * hrr_0110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_2110y; - fzk = ak2 * prod_xy * trr_02z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2101y; - fzl = al2 * prod_xy * hrr_0011z; - fyi -= 2 * prod_xz * hrr_1100y; - fyj -= 1 * prod_xz * trr_20y; - fzk -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_3_0; - dd_jl = dm_jl_1_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[9*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * trr_11z; - prod_yz = hrr_1100y * trr_11z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_21z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_1110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_12z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_1011z; - fyi -= 1 * prod_xz * hrr_0100y; - fzi -= 1 * prod_xy * trr_01z; - fyj -= 1 * prod_xz * trr_10y; - fzk -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_4_0; - dd_jl = dm_jl_1_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[10*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * trr_21z; - prod_yz = hrr_0100y * trr_21z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_31z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_2110z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_22z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_2011z; - fzi -= 2 * prod_xy * trr_11z; - fyj -= 1 * prod_xz * 1; - fzk -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_1_2 * dm_il_5_0; - dd_jl = dm_jl_1_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[11*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * hrr_0110z; - prod_yz = 1 * hrr_0110z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_20x * hrr_0100y * dd; + prod_xz = trr_20x * trr_01z * dd; + prod_yz = hrr_0100y * trr_01z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * hrr_1100y * dd; + prod_xz = trr_10x * trr_01z * dd; + prod_yz = hrr_1100y * trr_01z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * trr_11z * dd; + prod_yz = hrr_0100y * trr_11z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_3_0; + dd += dm_jl_1_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[9*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * hrr_2100y * dd; + prod_xz = fac * trr_01z * dd; + prod_yz = hrr_2100y * trr_01z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_3100y; + fyi -= 2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2200y; + fyj -= 1 * trr_20y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_2110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_02z; + fzk -= 1 * wt; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_4_0; + dd += dm_jl_1_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[10*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * trr_11z * dd; + prod_yz = hrr_1100y * trr_11z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_21z; + fzi -= 1 * trr_01z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_12z; + fzk -= 1 * trr_10z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_2 * dm_il_5_0; + dd += dm_jl_1_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[11*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * trr_21z * dd; + prod_yz = hrr_0100y * trr_21z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_31z; + fzi -= 2 * trr_11z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2110z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_22z; + fzk -= 1 * trr_20z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2011z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[12*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * hrr_0110z * dd; + prod_yz = 1 * hrr_0110z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0120z = trr_12z - zjzi * trr_02z; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; - fzl = al2 * prod_xy * hrr_0111z; - fxi -= 2 * prod_yz * trr_10x; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[12*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * hrr_0110z; - prod_yz = trr_10y * hrr_0110z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0111z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[13*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_1110z; - prod_yz = 1 * hrr_1110z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2110z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[13*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * hrr_0110z * dd; + prod_yz = trr_10y * hrr_0110z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[14*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_1110z * dd; + prod_yz = 1 * hrr_1110z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2110z; + fzi -= 1 * hrr_0110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; - fzj = aj2 * prod_xy * hrr_1210z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1210z; + fzj -= 1 * trr_11z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_1120z = trr_22z - zjzi * trr_12z; - fzk = ak2 * prod_xy * hrr_1120z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1120z; + fzk -= 1 * hrr_1100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; - fzl = al2 * prod_xy * hrr_1111z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0110z; - fzj -= 1 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[14*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * hrr_0110z; - prod_yz = trr_20y * hrr_0110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0111z; - fyi -= 2 * prod_xz * trr_10y; - fzj -= 1 * prod_xy * trr_01z; - fzk -= 1 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_3_0; - dd_jl = dm_jl_2_0 * dm_ik_3_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[15*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_1110z; - prod_yz = trr_10y * hrr_1110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1111z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0110z; - fzj -= 1 * prod_xy * trr_11z; - fzk -= 1 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_4_0; - dd_jl = dm_jl_2_0 * dm_ik_4_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[16*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_2110z; - prod_yz = 1 * hrr_2110z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_3_0; + dd += dm_jl_2_0 * dm_ik_3_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[15*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * hrr_0110z * dd; + prod_yz = trr_20y * hrr_0110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0210z; + fzj -= 1 * trr_01z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0120z; + fzk -= 1 * hrr_0100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_4_0; + dd += dm_jl_2_0 * dm_ik_4_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[16*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_1110z * dd; + prod_yz = trr_10y * hrr_1110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2110z; + fzi -= 1 * hrr_0110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1210z; + fzj -= 1 * trr_11z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1120z; + fzk -= 1 * hrr_1100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1111z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_2 * dm_il_5_0; + dd += dm_jl_2_0 * dm_ik_5_2; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[17*TILE2+sh_ij] * dm_lk_0_2; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_2110z * dd; + prod_yz = 1 * hrr_2110z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_41z = cpz * trr_40z + 4*b00 * trr_30z; double hrr_3110z = trr_41z - zjzi * trr_31z; - fzi = ai2 * prod_xy * hrr_3110z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3110z; + fzi -= 2 * hrr_1110z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2210z = hrr_3110z - zjzi * hrr_2110z; - fzj = aj2 * prod_xy * hrr_2210z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2210z; + fzj -= 1 * trr_21z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; double hrr_2120z = trr_32z - zjzi * trr_22z; - fzk = ak2 * prod_xy * hrr_2120z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2120z; + fzk -= 1 * hrr_2100z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_3011z = trr_32z - zlzk * trr_31z; double hrr_2111z = hrr_3011z - zjzi * hrr_2011z; - fzl = al2 * prod_xy * hrr_2111z; - fzi -= 2 * prod_xy * hrr_1110z; - fzj -= 1 * prod_xy * trr_21z; - fzk -= 1 * prod_xy * hrr_2100z; - if (vk != NULL) { - dd_jk = dm_jk_2_2 * dm_il_5_0; - dd_jl = dm_jl_2_0 * dm_ik_5_2; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[17*TILE2+sh_ij] * dm_lk_0_2; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2111z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -24190,8 +19923,16 @@ void rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -24210,7 +19951,7 @@ void rys_ejk_ip1_2110(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } __device__ static -void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) { int sq_id = threadIdx.x + blockDim.x * threadIdx.y; @@ -24225,8 +19966,6 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int *bas = envs.bas; double *env = envs.env; double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; double *dm = jk.dm; extern __shared__ double dm_cache[]; double *Rpa_cicj = dm_cache + 36 * TILE2; @@ -24260,11 +19999,10 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; } - int ij = sq_id / TILE2; - if (ij < 36) { + int sh_ij = sq_id % TILE2; + for (int ij = sq_id / TILE2; ij < 36; ij += nsq_per_block / TILE2) { int i = ij % 6; int j = ij / 6; - int sh_ij = sq_id % TILE2; int ish = ish0 + sh_ij / TILE; int jsh = jsh0 + sh_ij % TILE; int i0 = ao_loc[ish]; @@ -24311,30 +20049,18 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double vj_grad_ix = 0; - double vj_grad_iy = 0; - double vj_grad_iz = 0; - double vj_grad_jx = 0; - double vj_grad_jy = 0; - double vj_grad_jz = 0; - double vj_grad_kx = 0; - double vj_grad_ky = 0; - double vj_grad_kz = 0; - double vj_grad_lx = 0; - double vj_grad_ly = 0; - double vj_grad_lz = 0; - double vk_grad_ix = 0; - double vk_grad_iy = 0; - double vk_grad_iz = 0; - double vk_grad_jx = 0; - double vk_grad_jy = 0; - double vk_grad_jz = 0; - double vk_grad_kx = 0; - double vk_grad_ky = 0; - double vk_grad_kz = 0; - double vk_grad_lx = 0; - double vk_grad_ly = 0; - double vk_grad_lz = 0; + double v_ix = 0; + double v_iy = 0; + double v_iz = 0; + double v_jx = 0; + double v_jy = 0; + double v_jz = 0; + double v_kx = 0; + double v_ky = 0; + double v_kz = 0; + double v_lx = 0; + double v_ly = 0; + double v_lz = 0; double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; if (jk.n_dm > 1) { int nao2 = nao * nao; @@ -24364,7 +20090,7 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double dm_il_3_0 = dm[(i0+3)*nao+(l0+0)]; double dm_il_4_0 = dm[(i0+4)*nao+(l0+0)]; double dm_il_5_0 = dm[(i0+5)*nao+(l0+0)]; - double dd_jk, dd_jl, vj_dd, vk_dd; + double dd; double prod_xy; double prod_xz; double prod_yz; @@ -24429,17 +20155,25 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double theta_rr = theta * rr; if (omega == 0) { rys_roots(3, theta_rr, rw); - } else { + } else if (omega > 0) { double theta_fac = omega * omega / (omega * omega + theta); rys_roots(3, theta_fac*theta_rr, rw); fac *= sqrt(theta_fac); for (int irys = 0; irys < 3; ++irys) { rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } } - __syncthreads(); if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { + for (int irys = 0; irys < bounds.nroots; ++irys) { double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; double rt = rw[sq_id + 2*irys *nsq_per_block]; double rt_aa = rt / (aij + akl); @@ -24453,25 +20187,48 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double hrr_3100x = trr_40x - xjxi * trr_30x; double hrr_2100x = trr_30x - xjxi * trr_20x; double hrr_2200x = hrr_3100x - xjxi * hrr_2100x; - prod_xy = hrr_2200x * 1; - prod_xz = hrr_2200x * wt; - prod_yz = 1 * wt; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_0_0; + dd += dm_jl_0_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2200x * 1 * dd; + prod_xz = hrr_2200x * wt * dd; + prod_yz = 1 * wt * dd; double trr_50x = c0x * trr_40x + 4*b10 * trr_30x; double hrr_4100x = trr_50x - xjxi * trr_40x; double hrr_3200x = hrr_4100x - xjxi * hrr_3100x; - fxi = ai2 * prod_yz * hrr_3200x; + fxi = ai2 * hrr_3200x; + double hrr_1100x = trr_20x - xjxi * trr_10x; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + fxi -= 2 * hrr_1200x; + v_ix += fxi * prod_yz; double c0y = ypa - ypq*rt_aij; double trr_10y = c0y * 1; - fyi = ai2 * prod_xz * trr_10y; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double c0z = zpa - zpq*rt_aij; double trr_10z = c0z * wt; - fzi = ai2 * prod_xy * trr_10z; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_2300x = hrr_3200x - xjxi * hrr_2200x; - fxj = aj2 * prod_yz * hrr_2300x; + fxj = aj2 * hrr_2300x; + fxj -= 2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0100y = trr_10y - yjyi * 1; - fyj = aj2 * prod_xz * hrr_0100y; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0100z = trr_10z - zjzi * wt; - fzj = aj2 * prod_xy * hrr_0100z; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double rt_akl = rt_aa * aij; double cpx = xqc + xpq*rt_akl; double b00 = .5 * rt_aa; @@ -24481,2121 +20238,1654 @@ void _rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; double hrr_2110x = trr_31x - xjxi * trr_21x; double hrr_2210x = hrr_3110x - xjxi * hrr_2110x; - fxk = ak2 * prod_yz * hrr_2210x; + fxk = ak2 * hrr_2210x; + v_kx += fxk * prod_yz; double cpy = yqc + ypq*rt_akl; double trr_01y = cpy * 1; - fyk = ak2 * prod_xz * trr_01y; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double cpz = zqc + zpq*rt_akl; double trr_01z = cpz * wt; - fzk = ak2 * prod_xy * trr_01z; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_4001x = trr_41x - xlxk * trr_40x; double hrr_3001x = trr_31x - xlxk * trr_30x; double hrr_3101x = hrr_4001x - xjxi * hrr_3001x; double hrr_2001x = trr_21x - xlxk * trr_20x; double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; double hrr_2201x = hrr_3101x - xjxi * hrr_2101x; - fxl = al2 * prod_yz * hrr_2201x; + fxl = al2 * hrr_2201x; + v_lx += fxl * prod_yz; double hrr_0001y = trr_01y - ylyk * 1; - fyl = al2 * prod_xz * hrr_0001y; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0001z = trr_01z - zlzk * wt; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_1100x = trr_20x - xjxi * trr_10x; - double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; - fxi -= 2 * prod_yz * hrr_1200x; - fxj -= 2 * prod_yz * hrr_2100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1200x * trr_10y; - prod_xz = hrr_1200x * wt; - prod_yz = trr_10y * wt; - fxi = ai2 * prod_yz * hrr_2200x; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_1_0; + dd += dm_jl_0_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1200x * trr_10y * dd; + prod_xz = hrr_1200x * wt * dd; + prod_yz = trr_10y * wt * dd; + fxi = ai2 * hrr_2200x; + double hrr_0100x = trr_10x - xjxi * fac; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + fxi -= 1 * hrr_0200x; + v_ix += fxi * prod_yz; double trr_20y = c0y * trr_10y + 1*b10 * 1; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_1300x = hrr_2200x - xjxi * hrr_1200x; - fxj = aj2 * prod_yz * hrr_1300x; + fxj = aj2 * hrr_1300x; + fxj -= 2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1100y = trr_20y - yjyi * trr_10y; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_11x = cpx * trr_10x + 1*b00 * fac; double hrr_1110x = trr_21x - xjxi * trr_11x; double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; - fxk = ak2 * prod_yz * hrr_1210x; + fxk = ak2 * hrr_1210x; + v_kx += fxk * prod_yz; double trr_11y = cpy * trr_10y + 1*b00 * 1; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_1001x = trr_11x - xlxk * trr_10x; double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; double hrr_1201x = hrr_2101x - xjxi * hrr_1101x; - fxl = al2 * prod_yz * hrr_1201x; + fxl = al2 * hrr_1201x; + v_lx += fxl * prod_yz; double hrr_1001y = trr_11y - ylyk * trr_10y; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0001z; - double hrr_0100x = trr_10x - xjxi * fac; - double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; - fxi -= 1 * prod_yz * hrr_0200x; - fyi -= 1 * prod_xz * 1; - fxj -= 2 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1200x * 1; - prod_xz = hrr_1200x * trr_10z; - prod_yz = 1 * trr_10z; - fxi = ai2 * prod_yz * hrr_2200x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_2_0; + dd += dm_jl_0_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1200x * 1 * dd; + prod_xz = hrr_1200x * trr_10z * dd; + prod_yz = 1 * trr_10z * dd; + fxi = ai2 * hrr_2200x; + fxi -= 1 * hrr_0200x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_20z = c0z * trr_10z + 1*b10 * wt; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1300x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1300x; + fxj -= 2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1100z = trr_20z - zjzi * trr_10z; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1210x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1210x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_11z = cpz * trr_10z + 1*b00 * wt; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1201x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1201x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1001z = trr_11z - zlzk * trr_10z; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0200x; - fzi -= 1 * prod_xy * wt; - fxj -= 2 * prod_yz * hrr_1100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0200x * trr_20y; - prod_xz = hrr_0200x * wt; - prod_yz = trr_20y * wt; - fxi = ai2 * prod_yz * hrr_1200x; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_3_0; + dd += dm_jl_0_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0200x * trr_20y * dd; + prod_xz = hrr_0200x * wt * dd; + prod_yz = trr_20y * wt * dd; + fxi = ai2 * hrr_1200x; + v_ix += fxi * prod_yz; double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * trr_10z; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; double hrr_0300x = hrr_1200x - xjxi * hrr_0200x; - fxj = aj2 * prod_yz * hrr_0300x; + fxj = aj2 * hrr_0300x; + fxj -= 2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2100y = trr_30y - yjyi * trr_20y; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0100z; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; double trr_01x = cpx * fac; double hrr_0110x = trr_11x - xjxi * trr_01x; double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; - fxk = ak2 * prod_yz * hrr_0210x; + fxk = ak2 * hrr_0210x; + v_kx += fxk * prod_yz; double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * trr_01z; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; double hrr_0001x = trr_01x - xlxk * fac; double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; double hrr_0201x = hrr_1101x - xjxi * hrr_0101x; - fxl = al2 * prod_yz * hrr_0201x; + fxl = al2 * hrr_0201x; + v_lx += fxl * prod_yz; double hrr_2001y = trr_21y - ylyk * trr_20y; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * trr_10y; - fxj -= 2 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_3_0; - dd_jl = dm_jl_0_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0200x * trr_10y; - prod_xz = hrr_0200x * trr_10z; - prod_yz = trr_10y * trr_10z; - fxi = ai2 * prod_yz * hrr_1200x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0300x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0210x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0201x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * wt; - fxj -= 2 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_4_0; - dd_jl = dm_jl_0_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0200x * 1; - prod_xz = hrr_0200x * trr_20z; - prod_yz = 1 * trr_20z; - fxi = ai2 * prod_yz * hrr_1200x; - fyi = ai2 * prod_xz * trr_10y; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_4_0; + dd += dm_jl_0_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[4*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0200x * trr_10y * dd; + prod_xz = hrr_0200x * trr_10z * dd; + prod_yz = trr_10y * trr_10z * dd; + fxi = ai2 * hrr_1200x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0300x; + fxj -= 2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0210x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0201x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_0_0 * dm_il_5_0; + dd += dm_jl_0_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0200x * 1 * dd; + prod_xz = hrr_0200x * trr_20z * dd; + prod_yz = 1 * trr_20z * dd; + fxi = ai2 * hrr_1200x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0300x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0300x; + fxj -= 2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2100z = trr_30z - zjzi * trr_20z; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0210x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0210x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0201x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0201x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_2001z = trr_21z - zlzk * trr_20z; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxj -= 2 * prod_yz * hrr_0100x; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_5_0; - dd_jl = dm_jl_0_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2100x * hrr_0100y; - prod_xz = hrr_2100x * wt; - prod_yz = hrr_0100y * wt; - fxi = ai2 * prod_yz * hrr_3100x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_2200x; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_0_0; + dd += dm_jl_1_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2100x * hrr_0100y * dd; + prod_xz = hrr_2100x * wt * dd; + prod_yz = hrr_0100y * wt * dd; + fxi = ai2 * hrr_3100x; + fxi -= 2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2200x; + fxj -= 1 * trr_20x; + v_jx += fxj * prod_yz; double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_2110x; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2110x; + v_kx += fxk * prod_yz; double hrr_0110y = trr_11y - yjyi * trr_01y; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2101x; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2101x; + v_lx += fxl * prod_yz; double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * hrr_1100x; - fxj -= 1 * prod_yz * trr_20x; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_0_0; - dd_jl = dm_jl_1_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * hrr_1100y; - prod_xz = hrr_1100x * wt; - prod_yz = hrr_1100y * wt; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1200x; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_1_0; + dd += dm_jl_1_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * hrr_1100y * dd; + prod_xz = hrr_1100x * wt * dd; + prod_yz = hrr_1100y * wt * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_1110x; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; double hrr_1110y = trr_21y - yjyi * trr_11y; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1101x; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * hrr_0100x; - fyi -= 1 * prod_xz * hrr_0100y; - fxj -= 1 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_1_0; - dd_jl = dm_jl_1_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * hrr_0100y; - prod_xz = hrr_1100x * trr_10z; - prod_yz = hrr_0100y * trr_10z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * hrr_0100x; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_2_0; - dd_jl = dm_jl_1_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_2100y; - prod_xz = hrr_0100x * wt; - prod_yz = hrr_2100y * wt; - fxi = ai2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_2_0; + dd += dm_jl_1_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[8*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * hrr_0100y * dd; + prod_xz = hrr_1100x * trr_10z * dd; + prod_yz = hrr_0100y * trr_10z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_3_0; + dd += dm_jl_1_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * hrr_2100y * dd; + prod_xz = hrr_0100x * wt * dd; + prod_yz = hrr_2100y * wt * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; double hrr_3100y = trr_40y - yjyi * trr_30y; - fyi = ai2 * prod_xz * hrr_3100y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0200x; + fyi = ai2 * hrr_3100y; + fyi -= 2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; double hrr_2200y = hrr_3100y - yjyi * hrr_2100y; - fyj = aj2 * prod_xz * hrr_2200y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * hrr_0110x; + fyj = aj2 * hrr_2200y; + fyj -= 1 * trr_20y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; double hrr_2110y = trr_31y - yjyi * trr_21y; - fyk = ak2 * prod_xz * hrr_2110y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0101x; + fyk = ak2 * hrr_2110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; double hrr_3001y = trr_31y - ylyk * trr_30y; double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; - fyl = al2 * prod_xz * hrr_2101y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1100y; - fxj -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * trr_20y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_3_0; - dd_jl = dm_jl_1_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[9*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_1100y; - prod_xz = hrr_0100x * trr_10z; - prod_yz = hrr_1100y * trr_10z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0100y; - fzi -= 1 * prod_xy * wt; - fxj -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * trr_10y; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_4_0; - dd_jl = dm_jl_1_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * hrr_0100y; - prod_xz = hrr_0100x * trr_20z; - prod_yz = hrr_0100y * trr_20z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fxj -= 1 * prod_yz * fac; - fyj -= 1 * prod_xz * 1; - if (vk != NULL) { - dd_jk = dm_jk_1_0 * dm_il_5_0; - dd_jl = dm_jl_1_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_2100x * 1; - prod_xz = hrr_2100x * hrr_0100z; - prod_yz = 1 * hrr_0100z; - fxi = ai2 * prod_yz * hrr_3100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_2200x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_4_0; + dd += dm_jl_1_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[10*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * hrr_1100y * dd; + prod_xz = hrr_0100x * trr_10z * dd; + prod_yz = hrr_1100y * trr_10z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_1_0 * dm_il_5_0; + dd += dm_jl_1_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[11*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * hrr_0100y * dd; + prod_xz = hrr_0100x * trr_20z * dd; + prod_yz = hrr_0100y * trr_20z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_0_0; + dd += dm_jl_2_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_2100x * 1 * dd; + prod_xz = hrr_2100x * hrr_0100z * dd; + prod_yz = 1 * hrr_0100z * dd; + fxi = ai2 * hrr_3100x; + fxi -= 2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2200x; + fxj -= 1 * trr_20x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_2110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_2110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0110z = trr_11z - zjzi * trr_01z; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_2101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 2 * prod_yz * hrr_1100x; - fxj -= 1 * prod_yz * trr_20x; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_0_0; - dd_jl = dm_jl_2_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[12*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * trr_10y; - prod_xz = hrr_1100x * hrr_0100z; - prod_yz = trr_10y * hrr_0100z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * hrr_0100x; - fyi -= 1 * prod_xz * 1; - fxj -= 1 * prod_yz * trr_10x; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_1_0; - dd_jl = dm_jl_2_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_1100x * 1; - prod_xz = hrr_1100x * hrr_1100z; - prod_yz = 1 * hrr_1100z; - fxi = ai2 * prod_yz * hrr_2100x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_1200x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_1_0; + dd += dm_jl_2_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[13*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * trr_10y * dd; + prod_xz = hrr_1100x * hrr_0100z * dd; + prod_yz = trr_10y * hrr_0100z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_2_0; + dd += dm_jl_2_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_1100x * 1 * dd; + prod_xz = hrr_1100x * hrr_1100z * dd; + prod_yz = 1 * hrr_1100z * dd; + fxi = ai2 * hrr_2100x; + fxi -= 1 * hrr_0100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1200x; + fxj -= 1 * trr_10x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * hrr_1110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_1110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_1110z = trr_21z - zjzi * trr_11z; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_1101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; - fzl = al2 * prod_xy * hrr_1101z; - fxi -= 1 * prod_yz * hrr_0100x; - fzi -= 1 * prod_xy * hrr_0100z; - fxj -= 1 * prod_yz * trr_10x; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_2_0; - dd_jl = dm_jl_2_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[14*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_20y; - prod_xz = hrr_0100x * hrr_0100z; - prod_yz = trr_20y * hrr_0100z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 2 * prod_xz * trr_10y; - fxj -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_3_0; - dd_jl = dm_jl_2_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * trr_10y; - prod_xz = hrr_0100x * hrr_1100z; - prod_yz = trr_10y * hrr_1100z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1101z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0100z; - fxj -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_4_0; - dd_jl = dm_jl_2_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = hrr_0100x * 1; - prod_xz = hrr_0100x * hrr_2100z; - prod_yz = 1 * hrr_2100z; - fxi = ai2 * prod_yz * hrr_1100x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_3_0; + dd += dm_jl_2_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[15*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * trr_20y * dd; + prod_xz = hrr_0100x * hrr_0100z * dd; + prod_yz = trr_20y * hrr_0100z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_4_0; + dd += dm_jl_2_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[16*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * trr_10y * dd; + prod_xz = hrr_0100x * hrr_1100z * dd; + prod_yz = trr_10y * hrr_1100z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_2_0 * dm_il_5_0; + dd += dm_jl_2_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = hrr_0100x * 1 * dd; + prod_xz = hrr_0100x * hrr_2100z * dd; + prod_yz = 1 * hrr_2100z * dd; + fxi = ai2 * hrr_1100x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; double hrr_3100z = trr_40z - zjzi * trr_30z; - fzi = ai2 * prod_xy * hrr_3100z; - fxj = aj2 * prod_yz * hrr_0200x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3100z; + fzi -= 2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0200x; + fxj -= 1 * fac; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2200z = hrr_3100z - zjzi * hrr_2100z; - fzj = aj2 * prod_xy * hrr_2200z; - fxk = ak2 * prod_yz * hrr_0110x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2200z; + fzj -= 1 * trr_20z; + v_jz += fzj * prod_xy; + fxk = ak2 * hrr_0110x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; double hrr_2110z = trr_31z - zjzi * trr_21z; - fzk = ak2 * prod_xy * hrr_2110z; - fxl = al2 * prod_yz * hrr_0101x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0101x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_3001z = trr_31z - zlzk * trr_30z; double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; - fzl = al2 * prod_xy * hrr_2101z; - fzi -= 2 * prod_xy * hrr_1100z; - fxj -= 1 * prod_yz * fac; - fzj -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_2_0 * dm_il_5_0; - dd_jl = dm_jl_2_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[17*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0200y; - prod_xz = trr_20x * wt; - prod_yz = hrr_0200y * wt; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1200y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_2100x; + fzl = al2 * hrr_2101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_0_0; + dd += dm_jl_3_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[18*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * hrr_0200y * dd; + prod_xz = trr_20x * wt * dd; + prod_yz = hrr_0200y * wt * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; double hrr_0300y = hrr_1200y - yjyi * hrr_0200y; - fyj = aj2 * prod_xz * hrr_0300y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_21x; + fyj = aj2 * hrr_0300y; + fyj -= 2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; - fyk = ak2 * prod_xz * hrr_0210y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_2001x; + fyk = ak2 * hrr_0210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; double hrr_0201y = hrr_1101y - yjyi * hrr_0101y; - fyl = al2 * prod_xz * hrr_0201y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 2 * prod_yz * trr_10x; - fyj -= 2 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_0_0; - dd_jl = dm_jl_3_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[18*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1200y; - prod_xz = trr_10x * wt; - prod_yz = hrr_1200y * wt; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2200y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_1100x; + fyl = al2 * hrr_0201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_1_0; + dd += dm_jl_3_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[19*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_1200y * dd; + prod_xz = trr_10x * wt * dd; + prod_yz = hrr_1200y * wt * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2200y; + fyi -= 1 * hrr_0200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; double hrr_1300y = hrr_2200y - yjyi * hrr_1200y; - fyj = aj2 * prod_xz * hrr_1300y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_11x; + fyj = aj2 * hrr_1300y; + fyj -= 2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; - fyk = ak2 * prod_xz * hrr_1210y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_1001x; + fyk = ak2 * hrr_1210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; double hrr_1201y = hrr_2101y - yjyi * hrr_1101y; - fyl = al2 * prod_xz * hrr_1201y; - fzl = al2 * prod_xy * hrr_0001z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0200y; - fyj -= 2 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_1_0; - dd_jl = dm_jl_3_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[19*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0200y; - prod_xz = trr_10x * trr_10z; - prod_yz = hrr_0200y * trr_10z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1200y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0300y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0210y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0201y; - fzl = al2 * prod_xy * hrr_1001z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * wt; - fyj -= 2 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_2_0; - dd_jl = dm_jl_3_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[20*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2200y; - prod_xz = fac * wt; - prod_yz = hrr_2200y * wt; - fxi = ai2 * prod_yz * trr_10x; + fyl = al2 * hrr_1201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_2_0; + dd += dm_jl_3_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[20*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_0200y * dd; + prod_xz = trr_10x * trr_10z * dd; + prod_yz = hrr_0200y * trr_10z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0300y; + fyj -= 2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_3_0; + dd += dm_jl_3_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[21*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_2200y * dd; + prod_xz = fac * wt * dd; + prod_yz = hrr_2200y * wt * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; double trr_50y = c0y * trr_40y + 4*b10 * trr_30y; double hrr_4100y = trr_50y - yjyi * trr_40y; double hrr_3200y = hrr_4100y - yjyi * hrr_3100y; - fyi = ai2 * prod_xz * hrr_3200y; - fzi = ai2 * prod_xy * trr_10z; - fxj = aj2 * prod_yz * hrr_0100x; + fyi = ai2 * hrr_3200y; + fyi -= 2 * hrr_1200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; double hrr_2300y = hrr_3200y - yjyi * hrr_2200y; - fyj = aj2 * prod_xz * hrr_2300y; - fzj = aj2 * prod_xy * hrr_0100z; - fxk = ak2 * prod_yz * trr_01x; + fyj = aj2 * hrr_2300y; + fyj -= 2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; double trr_41y = cpy * trr_40y + 4*b00 * trr_30y; double hrr_3110y = trr_41y - yjyi * trr_31y; double hrr_2210y = hrr_3110y - yjyi * hrr_2110y; - fyk = ak2 * prod_xz * hrr_2210y; - fzk = ak2 * prod_xy * trr_01z; - fxl = al2 * prod_yz * hrr_0001x; + fyk = ak2 * hrr_2210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_01z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; double hrr_4001y = trr_41y - ylyk * trr_40y; double hrr_3101y = hrr_4001y - yjyi * hrr_3001y; double hrr_2201y = hrr_3101y - yjyi * hrr_2101y; - fyl = al2 * prod_xz * hrr_2201y; - fzl = al2 * prod_xy * hrr_0001z; - fyi -= 2 * prod_xz * hrr_1200y; - fyj -= 2 * prod_xz * hrr_2100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_3_0; - dd_jl = dm_jl_3_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[21*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1200y; - prod_xz = fac * trr_10z; - prod_yz = hrr_1200y * trr_10z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2200y; - fzi = ai2 * prod_xy * trr_20z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1300y; - fzj = aj2 * prod_xy * hrr_1100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1210y; - fzk = ak2 * prod_xy * trr_11z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1201y; - fzl = al2 * prod_xy * hrr_1001z; - fyi -= 1 * prod_xz * hrr_0200y; - fzi -= 1 * prod_xy * wt; - fyj -= 2 * prod_xz * hrr_1100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_4_0; - dd_jl = dm_jl_3_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[22*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0200y; - prod_xz = fac * trr_20z; - prod_yz = hrr_0200y * trr_20z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1200y; - fzi = ai2 * prod_xy * trr_30z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0300y; - fzj = aj2 * prod_xy * hrr_2100z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0210y; - fzk = ak2 * prod_xy * trr_21z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0201y; - fzl = al2 * prod_xy * hrr_2001z; - fzi -= 2 * prod_xy * trr_10z; - fyj -= 2 * prod_xz * hrr_0100y; - if (vk != NULL) { - dd_jk = dm_jk_3_0 * dm_il_5_0; - dd_jl = dm_jl_3_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[23*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * hrr_0100y; - prod_xz = trr_20x * hrr_0100z; - prod_yz = hrr_0100y * hrr_0100z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 2 * prod_yz * trr_10x; - fyj -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_0_0; - dd_jl = dm_jl_4_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[24*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_1100y; - prod_xz = trr_10x * hrr_0100z; - prod_yz = hrr_1100y * hrr_0100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_0101z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * hrr_0100y; - fyj -= 1 * prod_xz * trr_10y; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_1_0; - dd_jl = dm_jl_4_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[25*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * hrr_0100y; - prod_xz = trr_10x * hrr_1100z; - prod_yz = hrr_0100y * hrr_1100z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_1101z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0100z; - fyj -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_2_0; - dd_jl = dm_jl_4_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[26*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_2100y; - prod_xz = fac * hrr_0100z; - prod_yz = hrr_2100y * hrr_0100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_3100y; - fzi = ai2 * prod_xy * hrr_1100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2200y; - fzj = aj2 * prod_xy * hrr_0200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_2110y; - fzk = ak2 * prod_xy * hrr_0110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2101y; - fzl = al2 * prod_xy * hrr_0101z; - fyi -= 2 * prod_xz * hrr_1100y; - fyj -= 1 * prod_xz * trr_20y; - fzj -= 1 * prod_xy * wt; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_3_0; - dd_jl = dm_jl_4_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[27*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_1100y; - prod_xz = fac * hrr_1100z; - prod_yz = hrr_1100y * hrr_1100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_2100y; - fzi = ai2 * prod_xy * hrr_2100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1200y; - fzj = aj2 * prod_xy * hrr_1200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_1110y; - fzk = ak2 * prod_xy * hrr_1110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1101y; - fzl = al2 * prod_xy * hrr_1101z; - fyi -= 1 * prod_xz * hrr_0100y; - fzi -= 1 * prod_xy * hrr_0100z; - fyj -= 1 * prod_xz * trr_10y; - fzj -= 1 * prod_xy * trr_10z; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_4_0; - dd_jl = dm_jl_4_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[28*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * hrr_0100y; - prod_xz = fac * hrr_2100z; - prod_yz = hrr_0100y * hrr_2100z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * hrr_1100y; - fzi = ai2 * prod_xy * hrr_3100z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0200y; - fzj = aj2 * prod_xy * hrr_2200z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * hrr_0110y; - fzk = ak2 * prod_xy * hrr_2110z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0101y; - fzl = al2 * prod_xy * hrr_2101z; - fzi -= 2 * prod_xy * hrr_1100z; - fyj -= 1 * prod_xz * 1; - fzj -= 1 * prod_xy * trr_20z; - if (vk != NULL) { - dd_jk = dm_jk_4_0 * dm_il_5_0; - dd_jl = dm_jl_4_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[29*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_20x * 1; - prod_xz = trr_20x * hrr_0200z; - prod_yz = 1 * hrr_0200z; - fxi = ai2 * prod_yz * trr_30x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_1200z; - fxj = aj2 * prod_yz * hrr_2100x; - fyj = aj2 * prod_xz * hrr_0100y; + fyl = al2 * hrr_2201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_4_0; + dd += dm_jl_3_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[22*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_1200y * dd; + prod_xz = fac * trr_10z * dd; + prod_yz = hrr_1200y * trr_10z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2200y; + fyi -= 1 * hrr_0200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_20z; + fzi -= 1 * wt; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1300y; + fyj -= 2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_11z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_3_0 * dm_il_5_0; + dd += dm_jl_3_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+3)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+3)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[23*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_0200y * dd; + prod_xz = fac * trr_20z * dd; + prod_yz = hrr_0200y * trr_20z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1200y; + v_iy += fyi * prod_xz; + fzi = ai2 * trr_30z; + fzi -= 2 * trr_10z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0300y; + fyj -= 2 * hrr_0100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0210y; + v_ky += fyk * prod_xz; + fzk = ak2 * trr_21z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0201y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2001z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_0_0; + dd += dm_jl_4_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[24*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * hrr_0100y * dd; + prod_xz = trr_20x * hrr_0100z * dd; + prod_yz = hrr_0100y * hrr_0100z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_1_0; + dd += dm_jl_4_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[25*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_1100y * dd; + prod_xz = trr_10x * hrr_0100z * dd; + prod_yz = hrr_1100y * hrr_0100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_2_0; + dd += dm_jl_4_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[26*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * hrr_0100y * dd; + prod_xz = trr_10x * hrr_1100z * dd; + prod_yz = hrr_0100y * hrr_1100z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_3_0; + dd += dm_jl_4_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[27*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_2100y * dd; + prod_xz = fac * hrr_0100z * dd; + prod_yz = hrr_2100y * hrr_0100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_3100y; + fyi -= 2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2200y; + fyj -= 1 * trr_20y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0200z; + fzj -= 1 * wt; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_2110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_4_0; + dd += dm_jl_4_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[28*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_1100y * dd; + prod_xz = fac * hrr_1100z * dd; + prod_yz = hrr_1100y * hrr_1100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_2100y; + fyi -= 1 * hrr_0100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2100z; + fzi -= 1 * hrr_0100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1200y; + fyj -= 1 * trr_10y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1200z; + fzj -= 1 * trr_10z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_1110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_4_0 * dm_il_5_0; + dd += dm_jl_4_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+4)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+4)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[29*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * hrr_0100y * dd; + prod_xz = fac * hrr_2100z * dd; + prod_yz = hrr_0100y * hrr_2100z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * hrr_1100y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_3100z; + fzi -= 2 * hrr_1100z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0200y; + fyj -= 1 * 1; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_2200z; + fzj -= 1 * trr_20z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * hrr_0110y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_2110z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0101y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_2101z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_0_0; + dd += dm_jl_5_0 * dm_ik_0_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[30*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_20x * 1 * dd; + prod_xz = trr_20x * hrr_0200z * dd; + prod_yz = 1 * hrr_0200z * dd; + fxi = ai2 * trr_30x; + fxi -= 2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_2100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_0300z = hrr_1200z - zjzi * hrr_0200z; - fzj = aj2 * prod_xy * hrr_0300z; - fxk = ak2 * prod_yz * trr_21x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_0300z; + fzj -= 2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_21x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; - fzk = ak2 * prod_xy * hrr_0210z; - fxl = al2 * prod_yz * hrr_2001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_0210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_2001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_0201z = hrr_1101z - zjzi * hrr_0101z; - fzl = al2 * prod_xy * hrr_0201z; - fxi -= 2 * prod_yz * trr_10x; - fzj -= 2 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_0_0; - dd_jl = dm_jl_5_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[30*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * trr_10y; - prod_xz = trr_10x * hrr_0200z; - prod_yz = trr_10y * hrr_0200z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_1200z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_0300z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_0210z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_0201z; - fxi -= 1 * prod_yz * fac; - fyi -= 1 * prod_xz * 1; - fzj -= 2 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_1_0; - dd_jl = dm_jl_5_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[31*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = trr_10x * 1; - prod_xz = trr_10x * hrr_1200z; - prod_yz = 1 * hrr_1200z; - fxi = ai2 * prod_yz * trr_20x; - fyi = ai2 * prod_xz * trr_10y; - fzi = ai2 * prod_xy * hrr_2200z; - fxj = aj2 * prod_yz * hrr_1100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzl = al2 * hrr_0201z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_1_0; + dd += dm_jl_5_0 * dm_ik_1_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[31*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * trr_10y * dd; + prod_xz = trr_10x * hrr_0200z * dd; + prod_yz = trr_10y * hrr_0200z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0300z; + fzj -= 2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0201z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_2_0; + dd += dm_jl_5_0 * dm_ik_2_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[32*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = trr_10x * 1 * dd; + prod_xz = trr_10x * hrr_1200z * dd; + prod_yz = 1 * hrr_1200z * dd; + fxi = ai2 * trr_20x; + fxi -= 1 * fac; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2200z; + fzi -= 1 * hrr_0200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_1100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_1300z = hrr_2200z - zjzi * hrr_1200z; - fzj = aj2 * prod_xy * hrr_1300z; - fxk = ak2 * prod_yz * trr_11x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_1300z; + fzj -= 2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_11x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; - fzk = ak2 * prod_xy * hrr_1210z; - fxl = al2 * prod_yz * hrr_1001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_1210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_1001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_1201z = hrr_2101z - zjzi * hrr_1101z; - fzl = al2 * prod_xy * hrr_1201z; - fxi -= 1 * prod_yz * fac; - fzi -= 1 * prod_xy * hrr_0200z; - fzj -= 2 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_2_0; - dd_jl = dm_jl_5_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[32*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_20y; - prod_xz = fac * hrr_0200z; - prod_yz = trr_20y * hrr_0200z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_30y; - fzi = ai2 * prod_xy * hrr_1200z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_2100y; - fzj = aj2 * prod_xy * hrr_0300z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_21y; - fzk = ak2 * prod_xy * hrr_0210z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_2001y; - fzl = al2 * prod_xy * hrr_0201z; - fyi -= 2 * prod_xz * trr_10y; - fzj -= 2 * prod_xy * hrr_0100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_3_0; - dd_jl = dm_jl_5_0 * dm_ik_3_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[33*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * trr_10y; - prod_xz = fac * hrr_1200z; - prod_yz = trr_10y * hrr_1200z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_20y; - fzi = ai2 * prod_xy * hrr_2200z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_1100y; - fzj = aj2 * prod_xy * hrr_1300z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_11y; - fzk = ak2 * prod_xy * hrr_1210z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_1001y; - fzl = al2 * prod_xy * hrr_1201z; - fyi -= 1 * prod_xz * 1; - fzi -= 1 * prod_xy * hrr_0200z; - fzj -= 2 * prod_xy * hrr_1100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_4_0; - dd_jl = dm_jl_5_0 * dm_ik_4_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[34*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } - prod_xy = fac * 1; - prod_xz = fac * hrr_2200z; - prod_yz = 1 * hrr_2200z; - fxi = ai2 * prod_yz * trr_10x; - fyi = ai2 * prod_xz * trr_10y; + fzl = al2 * hrr_1201z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_3_0; + dd += dm_jl_5_0 * dm_ik_3_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+3)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+3)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[33*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_20y * dd; + prod_xz = fac * hrr_0200z * dd; + prod_yz = trr_20y * hrr_0200z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_30y; + fyi -= 2 * trr_10y; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_1200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_2100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_0300z; + fzj -= 2 * hrr_0100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_21y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_0210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_2001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_0201z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_4_0; + dd += dm_jl_5_0 * dm_ik_4_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+4)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+4)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[34*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * trr_10y * dd; + prod_xz = fac * hrr_1200z * dd; + prod_yz = trr_10y * hrr_1200z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_20y; + fyi -= 1 * 1; + v_iy += fyi * prod_xz; + fzi = ai2 * hrr_2200z; + fzi -= 1 * hrr_0200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_1100y; + v_jy += fyj * prod_xz; + fzj = aj2 * hrr_1300z; + fzj -= 2 * hrr_1100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_11y; + v_ky += fyk * prod_xz; + fzk = ak2 * hrr_1210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_1001y; + v_ly += fyl * prod_xz; + fzl = al2 * hrr_1201z; + v_lz += fzl * prod_xy; + dd = 0.; + if (jk.k_factor != 0) { + dd = dm_jk_5_0 * dm_il_5_0; + dd += dm_jl_5_0 * dm_ik_5_0; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; + dd += dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; + } + dd *= jk.k_factor; + } + if (jk.j_factor != 0) { + dd += jk.j_factor * dm_cache[35*TILE2+sh_ij] * dm_lk_0_0; + } + prod_xy = fac * 1 * dd; + prod_xz = fac * hrr_2200z * dd; + prod_yz = 1 * hrr_2200z * dd; + fxi = ai2 * trr_10x; + v_ix += fxi * prod_yz; + fyi = ai2 * trr_10y; + v_iy += fyi * prod_xz; double trr_50z = c0z * trr_40z + 4*b10 * trr_30z; double hrr_4100z = trr_50z - zjzi * trr_40z; double hrr_3200z = hrr_4100z - zjzi * hrr_3100z; - fzi = ai2 * prod_xy * hrr_3200z; - fxj = aj2 * prod_yz * hrr_0100x; - fyj = aj2 * prod_xz * hrr_0100y; + fzi = ai2 * hrr_3200z; + fzi -= 2 * hrr_1200z; + v_iz += fzi * prod_xy; + fxj = aj2 * hrr_0100x; + v_jx += fxj * prod_yz; + fyj = aj2 * hrr_0100y; + v_jy += fyj * prod_xz; double hrr_2300z = hrr_3200z - zjzi * hrr_2200z; - fzj = aj2 * prod_xy * hrr_2300z; - fxk = ak2 * prod_yz * trr_01x; - fyk = ak2 * prod_xz * trr_01y; + fzj = aj2 * hrr_2300z; + fzj -= 2 * hrr_2100z; + v_jz += fzj * prod_xy; + fxk = ak2 * trr_01x; + v_kx += fxk * prod_yz; + fyk = ak2 * trr_01y; + v_ky += fyk * prod_xz; double trr_41z = cpz * trr_40z + 4*b00 * trr_30z; double hrr_3110z = trr_41z - zjzi * trr_31z; double hrr_2210z = hrr_3110z - zjzi * hrr_2110z; - fzk = ak2 * prod_xy * hrr_2210z; - fxl = al2 * prod_yz * hrr_0001x; - fyl = al2 * prod_xz * hrr_0001y; + fzk = ak2 * hrr_2210z; + v_kz += fzk * prod_xy; + fxl = al2 * hrr_0001x; + v_lx += fxl * prod_yz; + fyl = al2 * hrr_0001y; + v_ly += fyl * prod_xz; double hrr_4001z = trr_41z - zlzk * trr_40z; double hrr_3101z = hrr_4001z - zjzi * hrr_3001z; double hrr_2201z = hrr_3101z - zjzi * hrr_2101z; - fzl = al2 * prod_xy * hrr_2201z; - fzi -= 2 * prod_xy * hrr_1200z; - fzj -= 2 * prod_xy * hrr_2100z; - if (vk != NULL) { - dd_jk = dm_jk_5_0 * dm_il_5_0; - dd_jl = dm_jl_5_0 * dm_ik_5_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+5)*nao+k0+0] * dm[(nao+i0+5)*nao+l0+0]; - dd_jl = dm[(nao+j0+5)*nao+l0+0] * dm[(nao+i0+5)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - vk_grad_ix += fxi * vk_dd; - vk_grad_iy += fyi * vk_dd; - vk_grad_iz += fzi * vk_dd; - vk_grad_jx += fxj * vk_dd; - vk_grad_jy += fyj * vk_dd; - vk_grad_jz += fzj * vk_dd; - vk_grad_kx += fxk * vk_dd; - vk_grad_ky += fyk * vk_dd; - vk_grad_kz += fzk * vk_dd; - vk_grad_lx += fxl * vk_dd; - vk_grad_ly += fyl * vk_dd; - vk_grad_lz += fzl * vk_dd; - } - if (vj != NULL) { - vj_dd = dm_cache[35*TILE2+sh_ij] * dm_lk_0_0; - vj_grad_ix += fxi * vj_dd; - vj_grad_iy += fyi * vj_dd; - vj_grad_iz += fzi * vj_dd; - vj_grad_jx += fxj * vj_dd; - vj_grad_jy += fyj * vj_dd; - vj_grad_jz += fzj * vj_dd; - vj_grad_kx += fxk * vj_dd; - vj_grad_ky += fyk * vj_dd; - vj_grad_kz += fzk * vj_dd; - vj_grad_lx += fxl * vj_dd; - vj_grad_ly += fyl * vj_dd; - vj_grad_lz += fzl * vj_dd; - } + fzl = al2 * hrr_2201z; + v_lz += fzl * prod_xy; } } } } + if (task_id >= ntasks) { + continue; + } int ia = bas[ish*BAS_SLOTS+ATOM_OF]; int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - if (vj != NULL) { - atomicAdd(vj+ia*3+0, vj_grad_ix); - atomicAdd(vj+ia*3+1, vj_grad_iy); - atomicAdd(vj+ia*3+2, vj_grad_iz); - atomicAdd(vj+ja*3+0, vj_grad_jx); - atomicAdd(vj+ja*3+1, vj_grad_jy); - atomicAdd(vj+ja*3+2, vj_grad_jz); - atomicAdd(vj+ka*3+0, vj_grad_kx); - atomicAdd(vj+ka*3+1, vj_grad_ky); - atomicAdd(vj+ka*3+2, vj_grad_kz); - atomicAdd(vj+la*3+0, vj_grad_lx); - atomicAdd(vj+la*3+1, vj_grad_ly); - atomicAdd(vj+la*3+2, vj_grad_lz); - } - if (vk != NULL) { - atomicAdd(vk+ia*3+0, vk_grad_ix); - atomicAdd(vk+ia*3+1, vk_grad_iy); - atomicAdd(vk+ia*3+2, vk_grad_iz); - atomicAdd(vk+ja*3+0, vk_grad_jx); - atomicAdd(vk+ja*3+1, vk_grad_jy); - atomicAdd(vk+ja*3+2, vk_grad_jz); - atomicAdd(vk+ka*3+0, vk_grad_kx); - atomicAdd(vk+ka*3+1, vk_grad_ky); - atomicAdd(vk+ka*3+2, vk_grad_kz); - atomicAdd(vk+la*3+0, vk_grad_lx); - atomicAdd(vk+la*3+1, vk_grad_ly); - atomicAdd(vk+la*3+2, vk_grad_lz); - } + double *ejk = jk.ejk; + atomicAdd(ejk+ia*3+0, v_ix); + atomicAdd(ejk+ia*3+1, v_iy); + atomicAdd(ejk+ia*3+2, v_iz); + atomicAdd(ejk+ja*3+0, v_jx); + atomicAdd(ejk+ja*3+1, v_jy); + atomicAdd(ejk+ja*3+2, v_jz); + atomicAdd(ejk+ka*3+0, v_kx); + atomicAdd(ejk+ka*3+1, v_ky); + atomicAdd(ejk+ka*3+2, v_kz); + atomicAdd(ejk+la*3+0, v_lx); + atomicAdd(ejk+la*3+1, v_ly); + atomicAdd(ejk+la*3+2, v_lz); } } __global__ -void rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, +void rys_ejk_ip1_2200(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, ShellQuartet *pool, uint32_t *batch_head) { int b_id = blockIdx.x; @@ -26612,8 +21902,16 @@ void rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, int batch_ij = batch_id / nbatches_kl; int batch_kl = batch_id % nbatches_kl; int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } if (ntasks > 0) { int tile_ij = bounds.tile_ij_mapping[batch_ij]; int nbas_tiles = nbas / TILE; @@ -26631,7 +21929,7 @@ void rys_ejk_ip1_2200(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, } } -int rys_ejk_ip1_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, +int rys_ejk_ip1_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers) { int li = bounds->li; @@ -26639,7 +21937,7 @@ int rys_ejk_ip1_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, int lk = bounds->lk; int ll = bounds->ll; int threads = scheme[0] * scheme[1]; - int nroots = (li + lj + lk + ll + 1) / 2 + 1; + int nroots = bounds->nroots; int iprim = bounds->iprim; int jprim = bounds->jprim; int ij_prims = iprim * jprim; diff --git a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2.cu b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2.cu deleted file mode 100644 index 650770f7..00000000 --- a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2.cu +++ /dev/null @@ -1,30942 +0,0 @@ -#include "vhf.cuh" -#include "rys_roots_unrolled.cu" -#include "create_tasks_ip1.cu" -int rys_ejk_ip2_unrolled_lmax = 1; -int rys_ejk_ip2_unrolled_max_order = 2; - - -__device__ static -void _rys_ejk_ip2_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) -{ - int sq_id = threadIdx.x + blockDim.x * threadIdx.y; - int nsq_per_block = blockDim.x * blockDim.y; - int iprim = bounds.iprim; - int jprim = bounds.jprim; - int kprim = bounds.kprim; - int lprim = bounds.lprim; - int *ao_loc = envs.ao_loc; - int nbas = envs.nbas; - int nao = ao_loc[nbas]; - int *bas = envs.bas; - double *env = envs.env; - double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; - extern __shared__ double dm_cache[]; - double *Rpa_cicj = dm_cache + 1 * TILE2; - double *rw = Rpa_cicj + iprim*jprim*TILE2*4; - for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { - int ijp = n / TILE2; - int sh_ij = n % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int ip = ijp / jprim; - int jp = ijp % jprim; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; - double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double aj_aij = aj / aij; - double xjxi = rj[0] - ri[0]; - double yjyi = rj[1] - ri[1]; - double zjzi = rj[2] - ri[2]; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; - Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; - Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; - double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); - Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; - } - - int ij = sq_id / TILE2; - if (ij < 1) { - int i = ij % 1; - int j = ij / 1; - int sh_ij = sq_id % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - if (jk.n_dm == 1) { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i]; - } else { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i] + dm[(nao+j0+j)*nao+i0+i]; - } - } - - for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { - __syncthreads(); - int task_id = task0 + sq_id; - double fac_sym = PI_FAC; - ShellQuartet sq; - if (task_id >= ntasks) { - // To avoid __syncthreads blocking blocking idle warps, all remaining - // threads compute a valid shell quartet with zero normalization factor - sq = shl_quartet_idx[0]; - fac_sym = 0.; - } else { - sq = shl_quartet_idx[task_id]; - } - int ish = sq.i; - int jsh = sq.j; - int ksh = sq.k; - int lsh = sq.l; - int sh_ij = (ish % TILE) * TILE + (jsh % TILE); - if (ish == jsh) fac_sym *= .5; - if (ksh == lsh) fac_sym *= .5; - if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - int k0 = ao_loc[ksh]; - int l0 = ao_loc[lsh]; - int natm = envs.natm; - int ia = bas[ish*BAS_SLOTS+ATOM_OF]; - int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; - int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; - int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; - double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; - double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; - double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; - double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double dd_jk, dd_jl, vj_dd, vk_dd; - double g1, g2, g3, prod; - double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; - if (jk.n_dm > 1) { - int nao2 = nao * nao; - dm_lk_0_0 += dm[nao2+(l0+0)*nao+(k0+0)]; - } - double dm_jk_0_0 = dm[(j0+0)*nao+(k0+0)]; - double dm_jl_0_0 = dm[(j0+0)*nao+(l0+0)]; - double dm_ik_0_0 = dm[(i0+0)*nao+(k0+0)]; - double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; - - double vk_00xx = 0; - double vj_00xx = 0; - double vk_00xy = 0; - double vj_00xy = 0; - double vk_00xz = 0; - double vj_00xz = 0; - double vk_00yx = 0; - double vj_00yx = 0; - double vk_00yy = 0; - double vj_00yy = 0; - double vk_00yz = 0; - double vj_00yz = 0; - double vk_00zx = 0; - double vj_00zx = 0; - double vk_00zy = 0; - double vj_00zy = 0; - double vk_00zz = 0; - double vj_00zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ia)*9 + 0, vk_00xx); - atomicAdd(vk + (ia*natm+ia)*9 + 1, vk_00xy); - atomicAdd(vk + (ia*natm+ia)*9 + 2, vk_00xz); - atomicAdd(vk + (ia*natm+ia)*9 + 3, vk_00yx); - atomicAdd(vk + (ia*natm+ia)*9 + 4, vk_00yy); - atomicAdd(vk + (ia*natm+ia)*9 + 5, vk_00yz); - atomicAdd(vk + (ia*natm+ia)*9 + 6, vk_00zx); - atomicAdd(vk + (ia*natm+ia)*9 + 7, vk_00zy); - atomicAdd(vk + (ia*natm+ia)*9 + 8, vk_00zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ia)*9 + 0, vj_00xx); - atomicAdd(vj + (ia*natm+ia)*9 + 1, vj_00xy); - atomicAdd(vj + (ia*natm+ia)*9 + 2, vj_00xz); - atomicAdd(vj + (ia*natm+ia)*9 + 3, vj_00yx); - atomicAdd(vj + (ia*natm+ia)*9 + 4, vj_00yy); - atomicAdd(vj + (ia*natm+ia)*9 + 5, vj_00yz); - atomicAdd(vj + (ia*natm+ia)*9 + 6, vj_00zx); - atomicAdd(vj + (ia*natm+ia)*9 + 7, vj_00zy); - atomicAdd(vj + (ia*natm+ia)*9 + 8, vj_00zz); - } - - double vk_01xx = 0; - double vj_01xx = 0; - double vk_01xy = 0; - double vj_01xy = 0; - double vk_01xz = 0; - double vj_01xz = 0; - double vk_01yx = 0; - double vj_01yx = 0; - double vk_01yy = 0; - double vj_01yy = 0; - double vk_01yz = 0; - double vj_01yz = 0; - double vk_01zx = 0; - double vj_01zx = 0; - double vk_01zy = 0; - double vj_01zy = 0; - double vk_01zz = 0; - double vj_01zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * 1 * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * fac * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * fac * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ja)*9 + 0, vk_01xx); - atomicAdd(vk + (ia*natm+ja)*9 + 1, vk_01xy); - atomicAdd(vk + (ia*natm+ja)*9 + 2, vk_01xz); - atomicAdd(vk + (ia*natm+ja)*9 + 3, vk_01yx); - atomicAdd(vk + (ia*natm+ja)*9 + 4, vk_01yy); - atomicAdd(vk + (ia*natm+ja)*9 + 5, vk_01yz); - atomicAdd(vk + (ia*natm+ja)*9 + 6, vk_01zx); - atomicAdd(vk + (ia*natm+ja)*9 + 7, vk_01zy); - atomicAdd(vk + (ia*natm+ja)*9 + 8, vk_01zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ja)*9 + 0, vj_01xx); - atomicAdd(vj + (ia*natm+ja)*9 + 1, vj_01xy); - atomicAdd(vj + (ia*natm+ja)*9 + 2, vj_01xz); - atomicAdd(vj + (ia*natm+ja)*9 + 3, vj_01yx); - atomicAdd(vj + (ia*natm+ja)*9 + 4, vj_01yy); - atomicAdd(vj + (ia*natm+ja)*9 + 5, vj_01yz); - atomicAdd(vj + (ia*natm+ja)*9 + 6, vj_01zx); - atomicAdd(vj + (ia*natm+ja)*9 + 7, vj_01zy); - atomicAdd(vj + (ia*natm+ja)*9 + 8, vj_01zz); - } - - double vk_02xx = 0; - double vj_02xx = 0; - double vk_02xy = 0; - double vj_02xy = 0; - double vk_02xz = 0; - double vj_02xz = 0; - double vk_02yx = 0; - double vj_02yx = 0; - double vk_02yy = 0; - double vj_02yy = 0; - double vk_02yz = 0; - double vj_02yz = 0; - double vk_02zx = 0; - double vj_02zx = 0; - double vk_02zy = 0; - double vj_02zy = 0; - double vk_02zz = 0; - double vj_02zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * 1 * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_01x = cpx * fac; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * fac * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * fac * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ka)*9 + 0, vk_02xx); - atomicAdd(vk + (ia*natm+ka)*9 + 1, vk_02xy); - atomicAdd(vk + (ia*natm+ka)*9 + 2, vk_02xz); - atomicAdd(vk + (ia*natm+ka)*9 + 3, vk_02yx); - atomicAdd(vk + (ia*natm+ka)*9 + 4, vk_02yy); - atomicAdd(vk + (ia*natm+ka)*9 + 5, vk_02yz); - atomicAdd(vk + (ia*natm+ka)*9 + 6, vk_02zx); - atomicAdd(vk + (ia*natm+ka)*9 + 7, vk_02zy); - atomicAdd(vk + (ia*natm+ka)*9 + 8, vk_02zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ka)*9 + 0, vj_02xx); - atomicAdd(vj + (ia*natm+ka)*9 + 1, vj_02xy); - atomicAdd(vj + (ia*natm+ka)*9 + 2, vj_02xz); - atomicAdd(vj + (ia*natm+ka)*9 + 3, vj_02yx); - atomicAdd(vj + (ia*natm+ka)*9 + 4, vj_02yy); - atomicAdd(vj + (ia*natm+ka)*9 + 5, vj_02yz); - atomicAdd(vj + (ia*natm+ka)*9 + 6, vj_02zx); - atomicAdd(vj + (ia*natm+ka)*9 + 7, vj_02zy); - atomicAdd(vj + (ia*natm+ka)*9 + 8, vj_02zz); - } - - double vk_03xx = 0; - double vj_03xx = 0; - double vk_03xy = 0; - double vj_03xy = 0; - double vk_03xz = 0; - double vj_03xz = 0; - double vk_03yx = 0; - double vj_03yx = 0; - double vk_03yy = 0; - double vj_03yy = 0; - double vk_03yz = 0; - double vj_03yz = 0; - double vk_03zx = 0; - double vj_03zx = 0; - double vk_03zy = 0; - double vj_03zy = 0; - double vk_03zz = 0; - double vj_03zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * 1 * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * fac * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * fac * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+la)*9 + 0, vk_03xx); - atomicAdd(vk + (ia*natm+la)*9 + 1, vk_03xy); - atomicAdd(vk + (ia*natm+la)*9 + 2, vk_03xz); - atomicAdd(vk + (ia*natm+la)*9 + 3, vk_03yx); - atomicAdd(vk + (ia*natm+la)*9 + 4, vk_03yy); - atomicAdd(vk + (ia*natm+la)*9 + 5, vk_03yz); - atomicAdd(vk + (ia*natm+la)*9 + 6, vk_03zx); - atomicAdd(vk + (ia*natm+la)*9 + 7, vk_03zy); - atomicAdd(vk + (ia*natm+la)*9 + 8, vk_03zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+la)*9 + 0, vj_03xx); - atomicAdd(vj + (ia*natm+la)*9 + 1, vj_03xy); - atomicAdd(vj + (ia*natm+la)*9 + 2, vj_03xz); - atomicAdd(vj + (ia*natm+la)*9 + 3, vj_03yx); - atomicAdd(vj + (ia*natm+la)*9 + 4, vj_03yy); - atomicAdd(vj + (ia*natm+la)*9 + 5, vj_03yz); - atomicAdd(vj + (ia*natm+la)*9 + 6, vj_03zx); - atomicAdd(vj + (ia*natm+la)*9 + 7, vj_03zy); - atomicAdd(vj + (ia*natm+la)*9 + 8, vj_03zz); - } - - double vk_10xx = 0; - double vj_10xx = 0; - double vk_10xy = 0; - double vj_10xy = 0; - double vk_10xz = 0; - double vj_10xz = 0; - double vk_10yx = 0; - double vj_10yx = 0; - double vk_10yy = 0; - double vj_10yy = 0; - double vk_10yz = 0; - double vj_10yz = 0; - double vk_10zx = 0; - double vj_10zx = 0; - double vk_10zy = 0; - double vj_10zy = 0; - double vk_10zz = 0; - double vj_10zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * 1 * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * fac * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * fac * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ia)*9 + 0, vk_10xx); - atomicAdd(vk + (ja*natm+ia)*9 + 1, vk_10xy); - atomicAdd(vk + (ja*natm+ia)*9 + 2, vk_10xz); - atomicAdd(vk + (ja*natm+ia)*9 + 3, vk_10yx); - atomicAdd(vk + (ja*natm+ia)*9 + 4, vk_10yy); - atomicAdd(vk + (ja*natm+ia)*9 + 5, vk_10yz); - atomicAdd(vk + (ja*natm+ia)*9 + 6, vk_10zx); - atomicAdd(vk + (ja*natm+ia)*9 + 7, vk_10zy); - atomicAdd(vk + (ja*natm+ia)*9 + 8, vk_10zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ia)*9 + 0, vj_10xx); - atomicAdd(vj + (ja*natm+ia)*9 + 1, vj_10xy); - atomicAdd(vj + (ja*natm+ia)*9 + 2, vj_10xz); - atomicAdd(vj + (ja*natm+ia)*9 + 3, vj_10yx); - atomicAdd(vj + (ja*natm+ia)*9 + 4, vj_10yy); - atomicAdd(vj + (ja*natm+ia)*9 + 5, vj_10yz); - atomicAdd(vj + (ja*natm+ia)*9 + 6, vj_10zx); - atomicAdd(vj + (ja*natm+ia)*9 + 7, vj_10zy); - atomicAdd(vj + (ja*natm+ia)*9 + 8, vj_10zz); - } - - double vk_11xx = 0; - double vj_11xx = 0; - double vk_11xy = 0; - double vj_11xy = 0; - double vk_11xz = 0; - double vj_11xz = 0; - double vk_11yx = 0; - double vj_11yx = 0; - double vk_11yy = 0; - double vj_11yy = 0; - double vk_11yz = 0; - double vj_11yz = 0; - double vk_11zx = 0; - double vj_11zx = 0; - double vk_11zy = 0; - double vj_11zy = 0; - double vk_11zz = 0; - double vj_11zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * 1 * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * fac * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * fac * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ja)*9 + 0, vk_11xx); - atomicAdd(vk + (ja*natm+ja)*9 + 1, vk_11xy); - atomicAdd(vk + (ja*natm+ja)*9 + 2, vk_11xz); - atomicAdd(vk + (ja*natm+ja)*9 + 3, vk_11yx); - atomicAdd(vk + (ja*natm+ja)*9 + 4, vk_11yy); - atomicAdd(vk + (ja*natm+ja)*9 + 5, vk_11yz); - atomicAdd(vk + (ja*natm+ja)*9 + 6, vk_11zx); - atomicAdd(vk + (ja*natm+ja)*9 + 7, vk_11zy); - atomicAdd(vk + (ja*natm+ja)*9 + 8, vk_11zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ja)*9 + 0, vj_11xx); - atomicAdd(vj + (ja*natm+ja)*9 + 1, vj_11xy); - atomicAdd(vj + (ja*natm+ja)*9 + 2, vj_11xz); - atomicAdd(vj + (ja*natm+ja)*9 + 3, vj_11yx); - atomicAdd(vj + (ja*natm+ja)*9 + 4, vj_11yy); - atomicAdd(vj + (ja*natm+ja)*9 + 5, vj_11yz); - atomicAdd(vj + (ja*natm+ja)*9 + 6, vj_11zx); - atomicAdd(vj + (ja*natm+ja)*9 + 7, vj_11zy); - atomicAdd(vj + (ja*natm+ja)*9 + 8, vj_11zz); - } - - double vk_12xx = 0; - double vj_12xx = 0; - double vk_12xy = 0; - double vj_12xy = 0; - double vk_12xz = 0; - double vj_12xz = 0; - double vk_12yx = 0; - double vj_12yx = 0; - double vk_12yy = 0; - double vj_12yy = 0; - double vk_12yz = 0; - double vj_12yz = 0; - double vk_12zx = 0; - double vj_12zx = 0; - double vk_12zy = 0; - double vj_12zy = 0; - double vk_12zz = 0; - double vj_12zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * 1 * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * fac * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * fac * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ka)*9 + 0, vk_12xx); - atomicAdd(vk + (ja*natm+ka)*9 + 1, vk_12xy); - atomicAdd(vk + (ja*natm+ka)*9 + 2, vk_12xz); - atomicAdd(vk + (ja*natm+ka)*9 + 3, vk_12yx); - atomicAdd(vk + (ja*natm+ka)*9 + 4, vk_12yy); - atomicAdd(vk + (ja*natm+ka)*9 + 5, vk_12yz); - atomicAdd(vk + (ja*natm+ka)*9 + 6, vk_12zx); - atomicAdd(vk + (ja*natm+ka)*9 + 7, vk_12zy); - atomicAdd(vk + (ja*natm+ka)*9 + 8, vk_12zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ka)*9 + 0, vj_12xx); - atomicAdd(vj + (ja*natm+ka)*9 + 1, vj_12xy); - atomicAdd(vj + (ja*natm+ka)*9 + 2, vj_12xz); - atomicAdd(vj + (ja*natm+ka)*9 + 3, vj_12yx); - atomicAdd(vj + (ja*natm+ka)*9 + 4, vj_12yy); - atomicAdd(vj + (ja*natm+ka)*9 + 5, vj_12yz); - atomicAdd(vj + (ja*natm+ka)*9 + 6, vj_12zx); - atomicAdd(vj + (ja*natm+ka)*9 + 7, vj_12zy); - atomicAdd(vj + (ja*natm+ka)*9 + 8, vj_12zz); - } - - double vk_13xx = 0; - double vj_13xx = 0; - double vk_13xy = 0; - double vj_13xy = 0; - double vk_13xz = 0; - double vj_13xz = 0; - double vk_13yx = 0; - double vj_13yx = 0; - double vk_13yy = 0; - double vj_13yy = 0; - double vk_13yz = 0; - double vj_13yz = 0; - double vk_13zx = 0; - double vj_13zx = 0; - double vk_13zy = 0; - double vj_13zy = 0; - double vk_13zz = 0; - double vj_13zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * 1 * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * fac * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * fac * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+la)*9 + 0, vk_13xx); - atomicAdd(vk + (ja*natm+la)*9 + 1, vk_13xy); - atomicAdd(vk + (ja*natm+la)*9 + 2, vk_13xz); - atomicAdd(vk + (ja*natm+la)*9 + 3, vk_13yx); - atomicAdd(vk + (ja*natm+la)*9 + 4, vk_13yy); - atomicAdd(vk + (ja*natm+la)*9 + 5, vk_13yz); - atomicAdd(vk + (ja*natm+la)*9 + 6, vk_13zx); - atomicAdd(vk + (ja*natm+la)*9 + 7, vk_13zy); - atomicAdd(vk + (ja*natm+la)*9 + 8, vk_13zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+la)*9 + 0, vj_13xx); - atomicAdd(vj + (ja*natm+la)*9 + 1, vj_13xy); - atomicAdd(vj + (ja*natm+la)*9 + 2, vj_13xz); - atomicAdd(vj + (ja*natm+la)*9 + 3, vj_13yx); - atomicAdd(vj + (ja*natm+la)*9 + 4, vj_13yy); - atomicAdd(vj + (ja*natm+la)*9 + 5, vj_13yz); - atomicAdd(vj + (ja*natm+la)*9 + 6, vj_13zx); - atomicAdd(vj + (ja*natm+la)*9 + 7, vj_13zy); - atomicAdd(vj + (ja*natm+la)*9 + 8, vj_13zz); - } - - double vk_20xx = 0; - double vj_20xx = 0; - double vk_20xy = 0; - double vj_20xy = 0; - double vk_20xz = 0; - double vj_20xz = 0; - double vk_20yx = 0; - double vj_20yx = 0; - double vk_20yy = 0; - double vj_20yy = 0; - double vk_20yz = 0; - double vj_20yz = 0; - double vk_20zx = 0; - double vj_20zx = 0; - double vk_20zy = 0; - double vj_20zy = 0; - double vk_20zz = 0; - double vj_20zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * 1 * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - double trr_01x = cpx * fac; - g1 = ak*2 * trr_01x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * fac * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * fac * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ia)*9 + 0, vk_20xx); - atomicAdd(vk + (ka*natm+ia)*9 + 1, vk_20xy); - atomicAdd(vk + (ka*natm+ia)*9 + 2, vk_20xz); - atomicAdd(vk + (ka*natm+ia)*9 + 3, vk_20yx); - atomicAdd(vk + (ka*natm+ia)*9 + 4, vk_20yy); - atomicAdd(vk + (ka*natm+ia)*9 + 5, vk_20yz); - atomicAdd(vk + (ka*natm+ia)*9 + 6, vk_20zx); - atomicAdd(vk + (ka*natm+ia)*9 + 7, vk_20zy); - atomicAdd(vk + (ka*natm+ia)*9 + 8, vk_20zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ia)*9 + 0, vj_20xx); - atomicAdd(vj + (ka*natm+ia)*9 + 1, vj_20xy); - atomicAdd(vj + (ka*natm+ia)*9 + 2, vj_20xz); - atomicAdd(vj + (ka*natm+ia)*9 + 3, vj_20yx); - atomicAdd(vj + (ka*natm+ia)*9 + 4, vj_20yy); - atomicAdd(vj + (ka*natm+ia)*9 + 5, vj_20yz); - atomicAdd(vj + (ka*natm+ia)*9 + 6, vj_20zx); - atomicAdd(vj + (ka*natm+ia)*9 + 7, vj_20zy); - atomicAdd(vj + (ka*natm+ia)*9 + 8, vj_20zz); - } - - double vk_21xx = 0; - double vj_21xx = 0; - double vk_21xy = 0; - double vj_21xy = 0; - double vk_21xz = 0; - double vj_21xz = 0; - double vk_21yx = 0; - double vj_21yx = 0; - double vk_21yy = 0; - double vj_21yy = 0; - double vk_21yz = 0; - double vj_21yz = 0; - double vk_21zx = 0; - double vj_21zx = 0; - double vk_21zy = 0; - double vj_21zy = 0; - double vk_21zz = 0; - double vj_21zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * 1 * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * fac * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * fac * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ja)*9 + 0, vk_21xx); - atomicAdd(vk + (ka*natm+ja)*9 + 1, vk_21xy); - atomicAdd(vk + (ka*natm+ja)*9 + 2, vk_21xz); - atomicAdd(vk + (ka*natm+ja)*9 + 3, vk_21yx); - atomicAdd(vk + (ka*natm+ja)*9 + 4, vk_21yy); - atomicAdd(vk + (ka*natm+ja)*9 + 5, vk_21yz); - atomicAdd(vk + (ka*natm+ja)*9 + 6, vk_21zx); - atomicAdd(vk + (ka*natm+ja)*9 + 7, vk_21zy); - atomicAdd(vk + (ka*natm+ja)*9 + 8, vk_21zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ja)*9 + 0, vj_21xx); - atomicAdd(vj + (ka*natm+ja)*9 + 1, vj_21xy); - atomicAdd(vj + (ka*natm+ja)*9 + 2, vj_21xz); - atomicAdd(vj + (ka*natm+ja)*9 + 3, vj_21yx); - atomicAdd(vj + (ka*natm+ja)*9 + 4, vj_21yy); - atomicAdd(vj + (ka*natm+ja)*9 + 5, vj_21yz); - atomicAdd(vj + (ka*natm+ja)*9 + 6, vj_21zx); - atomicAdd(vj + (ka*natm+ja)*9 + 7, vj_21zy); - atomicAdd(vj + (ka*natm+ja)*9 + 8, vj_21zz); - } - - double vk_22xx = 0; - double vj_22xx = 0; - double vk_22xy = 0; - double vj_22xy = 0; - double vk_22xz = 0; - double vj_22xz = 0; - double vk_22yx = 0; - double vj_22yx = 0; - double vk_22yy = 0; - double vj_22yy = 0; - double vk_22yz = 0; - double vj_22yz = 0; - double vk_22zx = 0; - double vj_22zx = 0; - double vk_22zy = 0; - double vj_22zy = 0; - double vk_22zz = 0; - double vj_22zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double trr_01x = cpx * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_02x = cpx * trr_01x + 1*b01 * fac; - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * 1 * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * fac * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * fac * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ka)*9 + 0, vk_22xx); - atomicAdd(vk + (ka*natm+ka)*9 + 1, vk_22xy); - atomicAdd(vk + (ka*natm+ka)*9 + 2, vk_22xz); - atomicAdd(vk + (ka*natm+ka)*9 + 3, vk_22yx); - atomicAdd(vk + (ka*natm+ka)*9 + 4, vk_22yy); - atomicAdd(vk + (ka*natm+ka)*9 + 5, vk_22yz); - atomicAdd(vk + (ka*natm+ka)*9 + 6, vk_22zx); - atomicAdd(vk + (ka*natm+ka)*9 + 7, vk_22zy); - atomicAdd(vk + (ka*natm+ka)*9 + 8, vk_22zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ka)*9 + 0, vj_22xx); - atomicAdd(vj + (ka*natm+ka)*9 + 1, vj_22xy); - atomicAdd(vj + (ka*natm+ka)*9 + 2, vj_22xz); - atomicAdd(vj + (ka*natm+ka)*9 + 3, vj_22yx); - atomicAdd(vj + (ka*natm+ka)*9 + 4, vj_22yy); - atomicAdd(vj + (ka*natm+ka)*9 + 5, vj_22yz); - atomicAdd(vj + (ka*natm+ka)*9 + 6, vj_22zx); - atomicAdd(vj + (ka*natm+ka)*9 + 7, vj_22zy); - atomicAdd(vj + (ka*natm+ka)*9 + 8, vj_22zz); - } - - double vk_23xx = 0; - double vj_23xx = 0; - double vk_23xy = 0; - double vj_23xy = 0; - double vk_23xz = 0; - double vj_23xz = 0; - double vk_23yx = 0; - double vj_23yx = 0; - double vk_23yy = 0; - double vj_23yy = 0; - double vk_23yz = 0; - double vj_23yz = 0; - double vk_23zx = 0; - double vj_23zx = 0; - double vk_23zy = 0; - double vj_23zy = 0; - double vk_23zz = 0; - double vj_23zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double trr_01x = cpx * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * 1 * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - double hrr_0001x = trr_01x - xlxk * fac; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * fac * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * fac * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+la)*9 + 0, vk_23xx); - atomicAdd(vk + (ka*natm+la)*9 + 1, vk_23xy); - atomicAdd(vk + (ka*natm+la)*9 + 2, vk_23xz); - atomicAdd(vk + (ka*natm+la)*9 + 3, vk_23yx); - atomicAdd(vk + (ka*natm+la)*9 + 4, vk_23yy); - atomicAdd(vk + (ka*natm+la)*9 + 5, vk_23yz); - atomicAdd(vk + (ka*natm+la)*9 + 6, vk_23zx); - atomicAdd(vk + (ka*natm+la)*9 + 7, vk_23zy); - atomicAdd(vk + (ka*natm+la)*9 + 8, vk_23zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+la)*9 + 0, vj_23xx); - atomicAdd(vj + (ka*natm+la)*9 + 1, vj_23xy); - atomicAdd(vj + (ka*natm+la)*9 + 2, vj_23xz); - atomicAdd(vj + (ka*natm+la)*9 + 3, vj_23yx); - atomicAdd(vj + (ka*natm+la)*9 + 4, vj_23yy); - atomicAdd(vj + (ka*natm+la)*9 + 5, vj_23yz); - atomicAdd(vj + (ka*natm+la)*9 + 6, vj_23zx); - atomicAdd(vj + (ka*natm+la)*9 + 7, vj_23zy); - atomicAdd(vj + (ka*natm+la)*9 + 8, vj_23zz); - } - - double vk_30xx = 0; - double vj_30xx = 0; - double vk_30xy = 0; - double vj_30xy = 0; - double vk_30xz = 0; - double vj_30xz = 0; - double vk_30yx = 0; - double vj_30yx = 0; - double vk_30yy = 0; - double vj_30yy = 0; - double vk_30yz = 0; - double vj_30yz = 0; - double vk_30zx = 0; - double vj_30zx = 0; - double vk_30zy = 0; - double vj_30zy = 0; - double vk_30zz = 0; - double vj_30zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * 1 * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - g1 = al*2 * hrr_0001x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * fac * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * fac * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ia)*9 + 0, vk_30xx); - atomicAdd(vk + (la*natm+ia)*9 + 1, vk_30xy); - atomicAdd(vk + (la*natm+ia)*9 + 2, vk_30xz); - atomicAdd(vk + (la*natm+ia)*9 + 3, vk_30yx); - atomicAdd(vk + (la*natm+ia)*9 + 4, vk_30yy); - atomicAdd(vk + (la*natm+ia)*9 + 5, vk_30yz); - atomicAdd(vk + (la*natm+ia)*9 + 6, vk_30zx); - atomicAdd(vk + (la*natm+ia)*9 + 7, vk_30zy); - atomicAdd(vk + (la*natm+ia)*9 + 8, vk_30zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ia)*9 + 0, vj_30xx); - atomicAdd(vj + (la*natm+ia)*9 + 1, vj_30xy); - atomicAdd(vj + (la*natm+ia)*9 + 2, vj_30xz); - atomicAdd(vj + (la*natm+ia)*9 + 3, vj_30yx); - atomicAdd(vj + (la*natm+ia)*9 + 4, vj_30yy); - atomicAdd(vj + (la*natm+ia)*9 + 5, vj_30yz); - atomicAdd(vj + (la*natm+ia)*9 + 6, vj_30zx); - atomicAdd(vj + (la*natm+ia)*9 + 7, vj_30zy); - atomicAdd(vj + (la*natm+ia)*9 + 8, vj_30zz); - } - - double vk_31xx = 0; - double vj_31xx = 0; - double vk_31xy = 0; - double vj_31xy = 0; - double vk_31xz = 0; - double vj_31xz = 0; - double vk_31yx = 0; - double vj_31yx = 0; - double vk_31yy = 0; - double vj_31yy = 0; - double vk_31yz = 0; - double vj_31yz = 0; - double vk_31zx = 0; - double vj_31zx = 0; - double vk_31zy = 0; - double vj_31zy = 0; - double vk_31zz = 0; - double vj_31zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * 1 * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * fac * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * fac * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ja)*9 + 0, vk_31xx); - atomicAdd(vk + (la*natm+ja)*9 + 1, vk_31xy); - atomicAdd(vk + (la*natm+ja)*9 + 2, vk_31xz); - atomicAdd(vk + (la*natm+ja)*9 + 3, vk_31yx); - atomicAdd(vk + (la*natm+ja)*9 + 4, vk_31yy); - atomicAdd(vk + (la*natm+ja)*9 + 5, vk_31yz); - atomicAdd(vk + (la*natm+ja)*9 + 6, vk_31zx); - atomicAdd(vk + (la*natm+ja)*9 + 7, vk_31zy); - atomicAdd(vk + (la*natm+ja)*9 + 8, vk_31zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ja)*9 + 0, vj_31xx); - atomicAdd(vj + (la*natm+ja)*9 + 1, vj_31xy); - atomicAdd(vj + (la*natm+ja)*9 + 2, vj_31xz); - atomicAdd(vj + (la*natm+ja)*9 + 3, vj_31yx); - atomicAdd(vj + (la*natm+ja)*9 + 4, vj_31yy); - atomicAdd(vj + (la*natm+ja)*9 + 5, vj_31yz); - atomicAdd(vj + (la*natm+ja)*9 + 6, vj_31zx); - atomicAdd(vj + (la*natm+ja)*9 + 7, vj_31zy); - atomicAdd(vj + (la*natm+ja)*9 + 8, vj_31zz); - } - - double vk_32xx = 0; - double vj_32xx = 0; - double vk_32xy = 0; - double vj_32xy = 0; - double vk_32xz = 0; - double vj_32xz = 0; - double vk_32yx = 0; - double vj_32yx = 0; - double vk_32yy = 0; - double vj_32yy = 0; - double vk_32yz = 0; - double vj_32yz = 0; - double vk_32zx = 0; - double vj_32zx = 0; - double vk_32zy = 0; - double vj_32zy = 0; - double vk_32zz = 0; - double vj_32zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double trr_01x = cpx * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * 1 * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_0001x = trr_01x - xlxk * fac; - g1 = al*2 * hrr_0001x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * fac * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * fac * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ka)*9 + 0, vk_32xx); - atomicAdd(vk + (la*natm+ka)*9 + 1, vk_32xy); - atomicAdd(vk + (la*natm+ka)*9 + 2, vk_32xz); - atomicAdd(vk + (la*natm+ka)*9 + 3, vk_32yx); - atomicAdd(vk + (la*natm+ka)*9 + 4, vk_32yy); - atomicAdd(vk + (la*natm+ka)*9 + 5, vk_32yz); - atomicAdd(vk + (la*natm+ka)*9 + 6, vk_32zx); - atomicAdd(vk + (la*natm+ka)*9 + 7, vk_32zy); - atomicAdd(vk + (la*natm+ka)*9 + 8, vk_32zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ka)*9 + 0, vj_32xx); - atomicAdd(vj + (la*natm+ka)*9 + 1, vj_32xy); - atomicAdd(vj + (la*natm+ka)*9 + 2, vj_32xz); - atomicAdd(vj + (la*natm+ka)*9 + 3, vj_32yx); - atomicAdd(vj + (la*natm+ka)*9 + 4, vj_32yy); - atomicAdd(vj + (la*natm+ka)*9 + 5, vj_32yz); - atomicAdd(vj + (la*natm+ka)*9 + 6, vj_32zx); - atomicAdd(vj + (la*natm+ka)*9 + 7, vj_32zy); - atomicAdd(vj + (la*natm+ka)*9 + 8, vj_32zz); - } - - double vk_33xx = 0; - double vj_33xx = 0; - double vk_33xy = 0; - double vj_33xy = 0; - double vk_33xz = 0; - double vj_33xz = 0; - double vk_33yx = 0; - double vj_33yx = 0; - double vk_33yy = 0; - double vj_33yy = 0; - double vk_33yz = 0; - double vj_33yz = 0; - double vk_33zx = 0; - double vj_33zx = 0; - double vk_33zy = 0; - double vj_33zy = 0; - double vk_33zz = 0; - double vj_33zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double trr_01x = cpx * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * 1 * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * fac * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * fac * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+la)*9 + 0, vk_33xx); - atomicAdd(vk + (la*natm+la)*9 + 1, vk_33xy); - atomicAdd(vk + (la*natm+la)*9 + 2, vk_33xz); - atomicAdd(vk + (la*natm+la)*9 + 3, vk_33yx); - atomicAdd(vk + (la*natm+la)*9 + 4, vk_33yy); - atomicAdd(vk + (la*natm+la)*9 + 5, vk_33yz); - atomicAdd(vk + (la*natm+la)*9 + 6, vk_33zx); - atomicAdd(vk + (la*natm+la)*9 + 7, vk_33zy); - atomicAdd(vk + (la*natm+la)*9 + 8, vk_33zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+la)*9 + 0, vj_33xx); - atomicAdd(vj + (la*natm+la)*9 + 1, vj_33xy); - atomicAdd(vj + (la*natm+la)*9 + 2, vj_33xz); - atomicAdd(vj + (la*natm+la)*9 + 3, vj_33yx); - atomicAdd(vj + (la*natm+la)*9 + 4, vj_33yy); - atomicAdd(vj + (la*natm+la)*9 + 5, vj_33yz); - atomicAdd(vj + (la*natm+la)*9 + 6, vj_33zx); - atomicAdd(vj + (la*natm+la)*9 + 7, vj_33zy); - atomicAdd(vj + (la*natm+la)*9 + 8, vj_33zz); - } - } -} -__global__ -void rys_ejk_ip2_0000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *pool, uint32_t *batch_head) -{ - int b_id = blockIdx.x; - int t_id = threadIdx.x + blockDim.x * threadIdx.y; - ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; - __shared__ int batch_id; - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - } - __syncthreads(); - int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; - int nbatches = bounds.ntile_ij_pairs * nbatches_kl; - while (batch_id < nbatches) { - int batch_ij = batch_id / nbatches_kl; - int batch_kl = batch_id % nbatches_kl; - int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, - batch_ij, batch_kl); - if (ntasks > 0) { - int tile_ij = bounds.tile_ij_mapping[batch_ij]; - int nbas_tiles = nbas / TILE; - int tile_i = tile_ij / nbas_tiles; - int tile_j = tile_ij % nbas_tiles; - int ish0 = tile_i * TILE; - int jsh0 = tile_j * TILE; - _rys_ejk_ip2_0000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); - } - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - atomicAdd(batch_head+1, ntasks); - } - __syncthreads(); - } -} - -__device__ static -void _rys_ejk_ip2_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) -{ - int sq_id = threadIdx.x + blockDim.x * threadIdx.y; - int nsq_per_block = blockDim.x * blockDim.y; - int iprim = bounds.iprim; - int jprim = bounds.jprim; - int kprim = bounds.kprim; - int lprim = bounds.lprim; - int *ao_loc = envs.ao_loc; - int nbas = envs.nbas; - int nao = ao_loc[nbas]; - int *bas = envs.bas; - double *env = envs.env; - double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; - extern __shared__ double dm_cache[]; - double *Rpa_cicj = dm_cache + 3 * TILE2; - double *rw = Rpa_cicj + iprim*jprim*TILE2*4; - for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { - int ijp = n / TILE2; - int sh_ij = n % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int ip = ijp / jprim; - int jp = ijp % jprim; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; - double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double aj_aij = aj / aij; - double xjxi = rj[0] - ri[0]; - double yjyi = rj[1] - ri[1]; - double zjzi = rj[2] - ri[2]; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; - Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; - Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; - double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); - Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; - } - - int ij = sq_id / TILE2; - if (ij < 3) { - int i = ij % 3; - int j = ij / 3; - int sh_ij = sq_id % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - if (jk.n_dm == 1) { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i]; - } else { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i] + dm[(nao+j0+j)*nao+i0+i]; - } - } - - for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { - __syncthreads(); - int task_id = task0 + sq_id; - double fac_sym = PI_FAC; - ShellQuartet sq; - if (task_id >= ntasks) { - // To avoid __syncthreads blocking blocking idle warps, all remaining - // threads compute a valid shell quartet with zero normalization factor - sq = shl_quartet_idx[0]; - fac_sym = 0.; - } else { - sq = shl_quartet_idx[task_id]; - } - int ish = sq.i; - int jsh = sq.j; - int ksh = sq.k; - int lsh = sq.l; - int sh_ij = (ish % TILE) * TILE + (jsh % TILE); - if (ish == jsh) fac_sym *= .5; - if (ksh == lsh) fac_sym *= .5; - if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - int k0 = ao_loc[ksh]; - int l0 = ao_loc[lsh]; - int natm = envs.natm; - int ia = bas[ish*BAS_SLOTS+ATOM_OF]; - int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; - int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; - int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; - double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; - double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; - double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; - double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double dd_jk, dd_jl, vj_dd, vk_dd; - double g1, g2, g3, prod; - double dm_lk_0_0 = dm[(l0+0)*nao+(k0+0)]; - if (jk.n_dm > 1) { - int nao2 = nao * nao; - dm_lk_0_0 += dm[nao2+(l0+0)*nao+(k0+0)]; - } - double dm_jk_0_0 = dm[(j0+0)*nao+(k0+0)]; - double dm_jl_0_0 = dm[(j0+0)*nao+(l0+0)]; - double dm_ik_0_0 = dm[(i0+0)*nao+(k0+0)]; - double dm_ik_1_0 = dm[(i0+1)*nao+(k0+0)]; - double dm_ik_2_0 = dm[(i0+2)*nao+(k0+0)]; - double dm_il_0_0 = dm[(i0+0)*nao+(l0+0)]; - double dm_il_1_0 = dm[(i0+1)*nao+(l0+0)]; - double dm_il_2_0 = dm[(i0+2)*nao+(l0+0)]; - - double vk_00xx = 0; - double vj_00xx = 0; - double vk_00xy = 0; - double vj_00xy = 0; - double vk_00xz = 0; - double vj_00xz = 0; - double vk_00yx = 0; - double vj_00yx = 0; - double vk_00yy = 0; - double vj_00yy = 0; - double vk_00yz = 0; - double vj_00yz = 0; - double vk_00zx = 0; - double vj_00zx = 0; - double vk_00zy = 0; - double vj_00zy = 0; - double vk_00zz = 0; - double vj_00zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - g3 = ai*2 * (ai*2 * trr_30x - 3 * trr_10x); - prod = g3 * 1 * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * trr_10x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * trr_10x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * trr_10y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - g3 = ai*2 * (ai*2 * trr_30y - 3 * trr_10y); - prod = g3 * fac * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * trr_10y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * trr_10z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * trr_10z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - g3 = ai*2 * (ai*2 * trr_30z - 3 * trr_10z); - prod = g3 * fac * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ia)*9 + 0, vk_00xx); - atomicAdd(vk + (ia*natm+ia)*9 + 1, vk_00xy); - atomicAdd(vk + (ia*natm+ia)*9 + 2, vk_00xz); - atomicAdd(vk + (ia*natm+ia)*9 + 3, vk_00yx); - atomicAdd(vk + (ia*natm+ia)*9 + 4, vk_00yy); - atomicAdd(vk + (ia*natm+ia)*9 + 5, vk_00yz); - atomicAdd(vk + (ia*natm+ia)*9 + 6, vk_00zx); - atomicAdd(vk + (ia*natm+ia)*9 + 7, vk_00zy); - atomicAdd(vk + (ia*natm+ia)*9 + 8, vk_00zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ia)*9 + 0, vj_00xx); - atomicAdd(vj + (ia*natm+ia)*9 + 1, vj_00xy); - atomicAdd(vj + (ia*natm+ia)*9 + 2, vj_00xz); - atomicAdd(vj + (ia*natm+ia)*9 + 3, vj_00yx); - atomicAdd(vj + (ia*natm+ia)*9 + 4, vj_00yy); - atomicAdd(vj + (ia*natm+ia)*9 + 5, vj_00yz); - atomicAdd(vj + (ia*natm+ia)*9 + 6, vj_00zx); - atomicAdd(vj + (ia*natm+ia)*9 + 7, vj_00zy); - atomicAdd(vj + (ia*natm+ia)*9 + 8, vj_00zz); - } - - double vk_01xx = 0; - double vj_01xx = 0; - double vk_01xy = 0; - double vj_01xy = 0; - double vk_01xz = 0; - double vj_01xz = 0; - double vk_01yx = 0; - double vj_01yx = 0; - double vk_01yy = 0; - double vj_01yy = 0; - double vk_01yz = 0; - double vj_01yz = 0; - double vk_01zx = 0; - double vj_01zx = 0; - double vk_01zy = 0; - double vj_01zy = 0; - double vk_01zz = 0; - double vj_01zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 = aj*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - prod = g3 * 1 * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * trr_10x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * trr_10x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * trr_10y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = aj*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * fac * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * fac * trr_10y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * 1 * trr_10z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * fac * trr_10z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = aj*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * fac * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ja)*9 + 0, vk_01xx); - atomicAdd(vk + (ia*natm+ja)*9 + 1, vk_01xy); - atomicAdd(vk + (ia*natm+ja)*9 + 2, vk_01xz); - atomicAdd(vk + (ia*natm+ja)*9 + 3, vk_01yx); - atomicAdd(vk + (ia*natm+ja)*9 + 4, vk_01yy); - atomicAdd(vk + (ia*natm+ja)*9 + 5, vk_01yz); - atomicAdd(vk + (ia*natm+ja)*9 + 6, vk_01zx); - atomicAdd(vk + (ia*natm+ja)*9 + 7, vk_01zy); - atomicAdd(vk + (ia*natm+ja)*9 + 8, vk_01zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ja)*9 + 0, vj_01xx); - atomicAdd(vj + (ia*natm+ja)*9 + 1, vj_01xy); - atomicAdd(vj + (ia*natm+ja)*9 + 2, vj_01xz); - atomicAdd(vj + (ia*natm+ja)*9 + 3, vj_01yx); - atomicAdd(vj + (ia*natm+ja)*9 + 4, vj_01yy); - atomicAdd(vj + (ia*natm+ja)*9 + 5, vj_01yz); - atomicAdd(vj + (ia*natm+ja)*9 + 6, vj_01zx); - atomicAdd(vj + (ia*natm+ja)*9 + 7, vj_01zy); - atomicAdd(vj + (ia*natm+ja)*9 + 8, vj_01zz); - } - - double vk_02xx = 0; - double vj_02xx = 0; - double vk_02xy = 0; - double vj_02xy = 0; - double vk_02xz = 0; - double vj_02xz = 0; - double vk_02yx = 0; - double vj_02yx = 0; - double vk_02yy = 0; - double vj_02yy = 0; - double vk_02yz = 0; - double vj_02yz = 0; - double vk_02zx = 0; - double vj_02zx = 0; - double vk_02zy = 0; - double vj_02zy = 0; - double vk_02zz = 0; - double vj_02zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double trr_01x = cpx * fac; - g3 = ak*2 * (ai*2 * trr_21x - 1 * trr_01x); - prod = g3 * 1 * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * trr_10x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * trr_10x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * trr_10y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ak*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * fac * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * fac * trr_10y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * 1 * trr_10z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * fac * trr_10z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ak*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * fac * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ka)*9 + 0, vk_02xx); - atomicAdd(vk + (ia*natm+ka)*9 + 1, vk_02xy); - atomicAdd(vk + (ia*natm+ka)*9 + 2, vk_02xz); - atomicAdd(vk + (ia*natm+ka)*9 + 3, vk_02yx); - atomicAdd(vk + (ia*natm+ka)*9 + 4, vk_02yy); - atomicAdd(vk + (ia*natm+ka)*9 + 5, vk_02yz); - atomicAdd(vk + (ia*natm+ka)*9 + 6, vk_02zx); - atomicAdd(vk + (ia*natm+ka)*9 + 7, vk_02zy); - atomicAdd(vk + (ia*natm+ka)*9 + 8, vk_02zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ka)*9 + 0, vj_02xx); - atomicAdd(vj + (ia*natm+ka)*9 + 1, vj_02xy); - atomicAdd(vj + (ia*natm+ka)*9 + 2, vj_02xz); - atomicAdd(vj + (ia*natm+ka)*9 + 3, vj_02yx); - atomicAdd(vj + (ia*natm+ka)*9 + 4, vj_02yy); - atomicAdd(vj + (ia*natm+ka)*9 + 5, vj_02yz); - atomicAdd(vj + (ia*natm+ka)*9 + 6, vj_02zx); - atomicAdd(vj + (ia*natm+ka)*9 + 7, vj_02zy); - atomicAdd(vj + (ia*natm+ka)*9 + 8, vj_02zz); - } - - double vk_03xx = 0; - double vj_03xx = 0; - double vk_03xy = 0; - double vj_03xy = 0; - double vk_03xz = 0; - double vj_03xz = 0; - double vk_03yx = 0; - double vj_03yx = 0; - double vk_03yy = 0; - double vj_03yy = 0; - double vk_03yz = 0; - double vj_03yz = 0; - double vk_03zx = 0; - double vj_03zx = 0; - double vk_03zy = 0; - double vj_03zy = 0; - double vk_03zz = 0; - double vj_03zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - g3 = al*2 * (ai*2 * hrr_2001x - 1 * hrr_0001x); - prod = g3 * 1 * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * trr_10x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * trr_10x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * trr_10y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = al*2 * (ai*2 * hrr_2001y - 1 * hrr_0001y); - prod = g3 * fac * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * fac * trr_10y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * 1 * trr_10z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * fac * trr_10z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = al*2 * (ai*2 * hrr_2001z - 1 * hrr_0001z); - prod = g3 * fac * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+la)*9 + 0, vk_03xx); - atomicAdd(vk + (ia*natm+la)*9 + 1, vk_03xy); - atomicAdd(vk + (ia*natm+la)*9 + 2, vk_03xz); - atomicAdd(vk + (ia*natm+la)*9 + 3, vk_03yx); - atomicAdd(vk + (ia*natm+la)*9 + 4, vk_03yy); - atomicAdd(vk + (ia*natm+la)*9 + 5, vk_03yz); - atomicAdd(vk + (ia*natm+la)*9 + 6, vk_03zx); - atomicAdd(vk + (ia*natm+la)*9 + 7, vk_03zy); - atomicAdd(vk + (ia*natm+la)*9 + 8, vk_03zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+la)*9 + 0, vj_03xx); - atomicAdd(vj + (ia*natm+la)*9 + 1, vj_03xy); - atomicAdd(vj + (ia*natm+la)*9 + 2, vj_03xz); - atomicAdd(vj + (ia*natm+la)*9 + 3, vj_03yx); - atomicAdd(vj + (ia*natm+la)*9 + 4, vj_03yy); - atomicAdd(vj + (ia*natm+la)*9 + 5, vj_03yz); - atomicAdd(vj + (ia*natm+la)*9 + 6, vj_03zx); - atomicAdd(vj + (ia*natm+la)*9 + 7, vj_03zy); - atomicAdd(vj + (ia*natm+la)*9 + 8, vj_03zz); - } - - double vk_10xx = 0; - double vj_10xx = 0; - double vk_10xy = 0; - double vj_10xy = 0; - double vk_10xz = 0; - double vj_10xz = 0; - double vk_10yx = 0; - double vj_10yx = 0; - double vk_10yy = 0; - double vj_10yy = 0; - double vk_10yz = 0; - double vj_10yz = 0; - double vk_10zx = 0; - double vj_10zx = 0; - double vk_10zy = 0; - double vj_10zy = 0; - double vk_10zz = 0; - double vj_10zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g3 = ai*2 * aj*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 -= 1 * aj*2 * hrr_0100x; - prod = g3 * 1 * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g1 = aj*2 * hrr_1100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * trr_10x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * trr_10x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * trr_10y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = ai*2 * aj*2 * hrr_2100y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * fac * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * fac * trr_10y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * 1 * trr_10z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * fac * trr_10z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = ai*2 * aj*2 * hrr_2100z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * fac * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ia)*9 + 0, vk_10xx); - atomicAdd(vk + (ja*natm+ia)*9 + 1, vk_10xy); - atomicAdd(vk + (ja*natm+ia)*9 + 2, vk_10xz); - atomicAdd(vk + (ja*natm+ia)*9 + 3, vk_10yx); - atomicAdd(vk + (ja*natm+ia)*9 + 4, vk_10yy); - atomicAdd(vk + (ja*natm+ia)*9 + 5, vk_10yz); - atomicAdd(vk + (ja*natm+ia)*9 + 6, vk_10zx); - atomicAdd(vk + (ja*natm+ia)*9 + 7, vk_10zy); - atomicAdd(vk + (ja*natm+ia)*9 + 8, vk_10zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ia)*9 + 0, vj_10xx); - atomicAdd(vj + (ja*natm+ia)*9 + 1, vj_10xy); - atomicAdd(vj + (ja*natm+ia)*9 + 2, vj_10xz); - atomicAdd(vj + (ja*natm+ia)*9 + 3, vj_10yx); - atomicAdd(vj + (ja*natm+ia)*9 + 4, vj_10yy); - atomicAdd(vj + (ja*natm+ia)*9 + 5, vj_10yz); - atomicAdd(vj + (ja*natm+ia)*9 + 6, vj_10zx); - atomicAdd(vj + (ja*natm+ia)*9 + 7, vj_10zy); - atomicAdd(vj + (ja*natm+ia)*9 + 8, vj_10zz); - } - - double vk_11xx = 0; - double vj_11xx = 0; - double vk_11xy = 0; - double vj_11xy = 0; - double vk_11xz = 0; - double vj_11xz = 0; - double vk_11yx = 0; - double vj_11yx = 0; - double vk_11yy = 0; - double vj_11yy = 0; - double vk_11yz = 0; - double vj_11yz = 0; - double vk_11zx = 0; - double vj_11zx = 0; - double vk_11zy = 0; - double vj_11zy = 0; - double vk_11zz = 0; - double vj_11zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g3 = aj*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - prod = g3 * 1 * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * trr_10x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * trr_10x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * trr_10y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g3 = aj*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * fac * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * fac * trr_10y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * 1 * trr_10z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * fac * trr_10z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g3 = aj*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * fac * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ja)*9 + 0, vk_11xx); - atomicAdd(vk + (ja*natm+ja)*9 + 1, vk_11xy); - atomicAdd(vk + (ja*natm+ja)*9 + 2, vk_11xz); - atomicAdd(vk + (ja*natm+ja)*9 + 3, vk_11yx); - atomicAdd(vk + (ja*natm+ja)*9 + 4, vk_11yy); - atomicAdd(vk + (ja*natm+ja)*9 + 5, vk_11yz); - atomicAdd(vk + (ja*natm+ja)*9 + 6, vk_11zx); - atomicAdd(vk + (ja*natm+ja)*9 + 7, vk_11zy); - atomicAdd(vk + (ja*natm+ja)*9 + 8, vk_11zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ja)*9 + 0, vj_11xx); - atomicAdd(vj + (ja*natm+ja)*9 + 1, vj_11xy); - atomicAdd(vj + (ja*natm+ja)*9 + 2, vj_11xz); - atomicAdd(vj + (ja*natm+ja)*9 + 3, vj_11yx); - atomicAdd(vj + (ja*natm+ja)*9 + 4, vj_11yy); - atomicAdd(vj + (ja*natm+ja)*9 + 5, vj_11yz); - atomicAdd(vj + (ja*natm+ja)*9 + 6, vj_11zx); - atomicAdd(vj + (ja*natm+ja)*9 + 7, vj_11zy); - atomicAdd(vj + (ja*natm+ja)*9 + 8, vj_11zz); - } - - double vk_12xx = 0; - double vj_12xx = 0; - double vk_12xy = 0; - double vj_12xy = 0; - double vk_12xz = 0; - double vj_12xz = 0; - double vk_12yx = 0; - double vj_12yx = 0; - double vk_12yy = 0; - double vj_12yy = 0; - double vk_12yz = 0; - double vj_12yz = 0; - double vk_12zx = 0; - double vj_12zx = 0; - double vk_12zy = 0; - double vj_12zy = 0; - double vk_12zz = 0; - double vj_12zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g3 = ak*2 * aj*2 * hrr_1110x; - prod = g3 * 1 * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g1 = aj*2 * hrr_1100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * trr_10x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * trr_10x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * trr_10y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ak*2 * aj*2 * hrr_1110y; - prod = g3 * fac * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * fac * trr_10y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * 1 * trr_10z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * fac * trr_10z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ak*2 * aj*2 * hrr_1110z; - prod = g3 * fac * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ka)*9 + 0, vk_12xx); - atomicAdd(vk + (ja*natm+ka)*9 + 1, vk_12xy); - atomicAdd(vk + (ja*natm+ka)*9 + 2, vk_12xz); - atomicAdd(vk + (ja*natm+ka)*9 + 3, vk_12yx); - atomicAdd(vk + (ja*natm+ka)*9 + 4, vk_12yy); - atomicAdd(vk + (ja*natm+ka)*9 + 5, vk_12yz); - atomicAdd(vk + (ja*natm+ka)*9 + 6, vk_12zx); - atomicAdd(vk + (ja*natm+ka)*9 + 7, vk_12zy); - atomicAdd(vk + (ja*natm+ka)*9 + 8, vk_12zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ka)*9 + 0, vj_12xx); - atomicAdd(vj + (ja*natm+ka)*9 + 1, vj_12xy); - atomicAdd(vj + (ja*natm+ka)*9 + 2, vj_12xz); - atomicAdd(vj + (ja*natm+ka)*9 + 3, vj_12yx); - atomicAdd(vj + (ja*natm+ka)*9 + 4, vj_12yy); - atomicAdd(vj + (ja*natm+ka)*9 + 5, vj_12yz); - atomicAdd(vj + (ja*natm+ka)*9 + 6, vj_12zx); - atomicAdd(vj + (ja*natm+ka)*9 + 7, vj_12zy); - atomicAdd(vj + (ja*natm+ka)*9 + 8, vj_12zz); - } - - double vk_13xx = 0; - double vj_13xx = 0; - double vk_13xy = 0; - double vj_13xy = 0; - double vk_13xz = 0; - double vj_13xz = 0; - double vk_13yx = 0; - double vj_13yx = 0; - double vk_13yy = 0; - double vj_13yy = 0; - double vk_13yz = 0; - double vj_13yz = 0; - double vk_13zx = 0; - double vj_13zx = 0; - double vk_13zy = 0; - double vj_13zy = 0; - double vk_13zz = 0; - double vj_13zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g3 = al*2 * aj*2 * hrr_1101x; - prod = g3 * 1 * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g1 = aj*2 * hrr_1100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * trr_10x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * trr_10x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * trr_10y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = al*2 * aj*2 * hrr_1101y; - prod = g3 * fac * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * fac * trr_10y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * 1 * trr_10z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * fac * trr_10z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = al*2 * aj*2 * hrr_1101z; - prod = g3 * fac * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+la)*9 + 0, vk_13xx); - atomicAdd(vk + (ja*natm+la)*9 + 1, vk_13xy); - atomicAdd(vk + (ja*natm+la)*9 + 2, vk_13xz); - atomicAdd(vk + (ja*natm+la)*9 + 3, vk_13yx); - atomicAdd(vk + (ja*natm+la)*9 + 4, vk_13yy); - atomicAdd(vk + (ja*natm+la)*9 + 5, vk_13yz); - atomicAdd(vk + (ja*natm+la)*9 + 6, vk_13zx); - atomicAdd(vk + (ja*natm+la)*9 + 7, vk_13zy); - atomicAdd(vk + (ja*natm+la)*9 + 8, vk_13zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+la)*9 + 0, vj_13xx); - atomicAdd(vj + (ja*natm+la)*9 + 1, vj_13xy); - atomicAdd(vj + (ja*natm+la)*9 + 2, vj_13xz); - atomicAdd(vj + (ja*natm+la)*9 + 3, vj_13yx); - atomicAdd(vj + (ja*natm+la)*9 + 4, vj_13yy); - atomicAdd(vj + (ja*natm+la)*9 + 5, vj_13yz); - atomicAdd(vj + (ja*natm+la)*9 + 6, vj_13zx); - atomicAdd(vj + (ja*natm+la)*9 + 7, vj_13zy); - atomicAdd(vj + (ja*natm+la)*9 + 8, vj_13zz); - } - - double vk_20xx = 0; - double vj_20xx = 0; - double vk_20xy = 0; - double vj_20xy = 0; - double vk_20xz = 0; - double vj_20xz = 0; - double vk_20yx = 0; - double vj_20yx = 0; - double vk_20yy = 0; - double vj_20yy = 0; - double vk_20yz = 0; - double vj_20yz = 0; - double vk_20zx = 0; - double vj_20zx = 0; - double vk_20zy = 0; - double vj_20zy = 0; - double vk_20zz = 0; - double vj_20zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - g3 = ai*2 * ak*2 * trr_21x; - double trr_01x = cpx * fac; - g3 -= 1 * ak*2 * trr_01x; - prod = g3 * 1 * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - g1 = ak*2 * trr_11x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * trr_10x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * trr_10x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * trr_10y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ai*2 * ak*2 * trr_21y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * fac * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * fac * trr_10y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * 1 * trr_10z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * fac * trr_10z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ai*2 * ak*2 * trr_21z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * fac * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ia)*9 + 0, vk_20xx); - atomicAdd(vk + (ka*natm+ia)*9 + 1, vk_20xy); - atomicAdd(vk + (ka*natm+ia)*9 + 2, vk_20xz); - atomicAdd(vk + (ka*natm+ia)*9 + 3, vk_20yx); - atomicAdd(vk + (ka*natm+ia)*9 + 4, vk_20yy); - atomicAdd(vk + (ka*natm+ia)*9 + 5, vk_20yz); - atomicAdd(vk + (ka*natm+ia)*9 + 6, vk_20zx); - atomicAdd(vk + (ka*natm+ia)*9 + 7, vk_20zy); - atomicAdd(vk + (ka*natm+ia)*9 + 8, vk_20zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ia)*9 + 0, vj_20xx); - atomicAdd(vj + (ka*natm+ia)*9 + 1, vj_20xy); - atomicAdd(vj + (ka*natm+ia)*9 + 2, vj_20xz); - atomicAdd(vj + (ka*natm+ia)*9 + 3, vj_20yx); - atomicAdd(vj + (ka*natm+ia)*9 + 4, vj_20yy); - atomicAdd(vj + (ka*natm+ia)*9 + 5, vj_20yz); - atomicAdd(vj + (ka*natm+ia)*9 + 6, vj_20zx); - atomicAdd(vj + (ka*natm+ia)*9 + 7, vj_20zy); - atomicAdd(vj + (ka*natm+ia)*9 + 8, vj_20zz); - } - - double vk_21xx = 0; - double vj_21xx = 0; - double vk_21xy = 0; - double vj_21xy = 0; - double vk_21xz = 0; - double vj_21xz = 0; - double vk_21yx = 0; - double vj_21yx = 0; - double vk_21yy = 0; - double vj_21yy = 0; - double vk_21yz = 0; - double vj_21yz = 0; - double vk_21zx = 0; - double vj_21zx = 0; - double vk_21zy = 0; - double vj_21zy = 0; - double vk_21zz = 0; - double vj_21zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g3 = aj*2 * ak*2 * hrr_1110x; - prod = g3 * 1 * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * trr_10x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * trr_10x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * trr_10y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = aj*2 * ak*2 * hrr_1110y; - prod = g3 * fac * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * fac * trr_10y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * 1 * trr_10z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * fac * trr_10z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = aj*2 * ak*2 * hrr_1110z; - prod = g3 * fac * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ja)*9 + 0, vk_21xx); - atomicAdd(vk + (ka*natm+ja)*9 + 1, vk_21xy); - atomicAdd(vk + (ka*natm+ja)*9 + 2, vk_21xz); - atomicAdd(vk + (ka*natm+ja)*9 + 3, vk_21yx); - atomicAdd(vk + (ka*natm+ja)*9 + 4, vk_21yy); - atomicAdd(vk + (ka*natm+ja)*9 + 5, vk_21yz); - atomicAdd(vk + (ka*natm+ja)*9 + 6, vk_21zx); - atomicAdd(vk + (ka*natm+ja)*9 + 7, vk_21zy); - atomicAdd(vk + (ka*natm+ja)*9 + 8, vk_21zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ja)*9 + 0, vj_21xx); - atomicAdd(vj + (ka*natm+ja)*9 + 1, vj_21xy); - atomicAdd(vj + (ka*natm+ja)*9 + 2, vj_21xz); - atomicAdd(vj + (ka*natm+ja)*9 + 3, vj_21yx); - atomicAdd(vj + (ka*natm+ja)*9 + 4, vj_21yy); - atomicAdd(vj + (ka*natm+ja)*9 + 5, vj_21yz); - atomicAdd(vj + (ka*natm+ja)*9 + 6, vj_21zx); - atomicAdd(vj + (ka*natm+ja)*9 + 7, vj_21zy); - atomicAdd(vj + (ka*natm+ja)*9 + 8, vj_21zz); - } - - double vk_22xx = 0; - double vj_22xx = 0; - double vk_22xy = 0; - double vj_22xy = 0; - double vk_22xz = 0; - double vj_22xz = 0; - double vk_22yx = 0; - double vj_22yx = 0; - double vk_22yy = 0; - double vj_22yy = 0; - double vk_22yz = 0; - double vj_22yz = 0; - double vk_22zx = 0; - double vj_22zx = 0; - double vk_22zy = 0; - double vj_22zy = 0; - double vk_22zz = 0; - double vj_22zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - g3 = ak*2 * (ak*2 * trr_12x - 1 * trr_10x); - prod = g3 * 1 * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * trr_10x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * trr_10x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - g3 = ak*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * fac * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * fac * trr_10y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * fac * trr_10z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - g3 = ak*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * fac * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ka)*9 + 0, vk_22xx); - atomicAdd(vk + (ka*natm+ka)*9 + 1, vk_22xy); - atomicAdd(vk + (ka*natm+ka)*9 + 2, vk_22xz); - atomicAdd(vk + (ka*natm+ka)*9 + 3, vk_22yx); - atomicAdd(vk + (ka*natm+ka)*9 + 4, vk_22yy); - atomicAdd(vk + (ka*natm+ka)*9 + 5, vk_22yz); - atomicAdd(vk + (ka*natm+ka)*9 + 6, vk_22zx); - atomicAdd(vk + (ka*natm+ka)*9 + 7, vk_22zy); - atomicAdd(vk + (ka*natm+ka)*9 + 8, vk_22zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ka)*9 + 0, vj_22xx); - atomicAdd(vj + (ka*natm+ka)*9 + 1, vj_22xy); - atomicAdd(vj + (ka*natm+ka)*9 + 2, vj_22xz); - atomicAdd(vj + (ka*natm+ka)*9 + 3, vj_22yx); - atomicAdd(vj + (ka*natm+ka)*9 + 4, vj_22yy); - atomicAdd(vj + (ka*natm+ka)*9 + 5, vj_22yz); - atomicAdd(vj + (ka*natm+ka)*9 + 6, vj_22zx); - atomicAdd(vj + (ka*natm+ka)*9 + 7, vj_22zy); - atomicAdd(vj + (ka*natm+ka)*9 + 8, vj_22zz); - } - - double vk_23xx = 0; - double vj_23xx = 0; - double vk_23xy = 0; - double vj_23xy = 0; - double vk_23xz = 0; - double vj_23xz = 0; - double vk_23yx = 0; - double vj_23yx = 0; - double vk_23yy = 0; - double vj_23yy = 0; - double vk_23yz = 0; - double vj_23yz = 0; - double vk_23zx = 0; - double vj_23zx = 0; - double vk_23zy = 0; - double vj_23zy = 0; - double vk_23zz = 0; - double vj_23zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g3 = al*2 * ak*2 * hrr_1011x; - prod = g3 * 1 * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * trr_10x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * trr_10x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 = al*2 * ak*2 * hrr_0011x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0001x = trr_01x - xlxk * fac; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = al*2 * ak*2 * hrr_1011y; - prod = g3 * fac * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * fac * trr_10y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = al*2 * ak*2 * hrr_0011x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * fac * trr_10z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = al*2 * ak*2 * hrr_1011z; - prod = g3 * fac * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+la)*9 + 0, vk_23xx); - atomicAdd(vk + (ka*natm+la)*9 + 1, vk_23xy); - atomicAdd(vk + (ka*natm+la)*9 + 2, vk_23xz); - atomicAdd(vk + (ka*natm+la)*9 + 3, vk_23yx); - atomicAdd(vk + (ka*natm+la)*9 + 4, vk_23yy); - atomicAdd(vk + (ka*natm+la)*9 + 5, vk_23yz); - atomicAdd(vk + (ka*natm+la)*9 + 6, vk_23zx); - atomicAdd(vk + (ka*natm+la)*9 + 7, vk_23zy); - atomicAdd(vk + (ka*natm+la)*9 + 8, vk_23zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+la)*9 + 0, vj_23xx); - atomicAdd(vj + (ka*natm+la)*9 + 1, vj_23xy); - atomicAdd(vj + (ka*natm+la)*9 + 2, vj_23xz); - atomicAdd(vj + (ka*natm+la)*9 + 3, vj_23yx); - atomicAdd(vj + (ka*natm+la)*9 + 4, vj_23yy); - atomicAdd(vj + (ka*natm+la)*9 + 5, vj_23yz); - atomicAdd(vj + (ka*natm+la)*9 + 6, vj_23zx); - atomicAdd(vj + (ka*natm+la)*9 + 7, vj_23zy); - atomicAdd(vj + (ka*natm+la)*9 + 8, vj_23zz); - } - - double vk_30xx = 0; - double vj_30xx = 0; - double vk_30xy = 0; - double vj_30xy = 0; - double vk_30xz = 0; - double vj_30xz = 0; - double vk_30yx = 0; - double vj_30yx = 0; - double vk_30yy = 0; - double vj_30yy = 0; - double vk_30yz = 0; - double vj_30yz = 0; - double vk_30zx = 0; - double vj_30zx = 0; - double vk_30zy = 0; - double vj_30zy = 0; - double vk_30zz = 0; - double vj_30zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - g3 = ai*2 * al*2 * hrr_2001x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - g3 -= 1 * al*2 * hrr_0001x; - prod = g3 * 1 * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g1 = al*2 * hrr_1001x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * trr_10x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * trr_10x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * trr_10y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = ai*2 * al*2 * hrr_2001y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * fac * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * fac * trr_10y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * 1 * trr_10z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * fac * trr_10z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = ai*2 * al*2 * hrr_2001z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * fac * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ia)*9 + 0, vk_30xx); - atomicAdd(vk + (la*natm+ia)*9 + 1, vk_30xy); - atomicAdd(vk + (la*natm+ia)*9 + 2, vk_30xz); - atomicAdd(vk + (la*natm+ia)*9 + 3, vk_30yx); - atomicAdd(vk + (la*natm+ia)*9 + 4, vk_30yy); - atomicAdd(vk + (la*natm+ia)*9 + 5, vk_30yz); - atomicAdd(vk + (la*natm+ia)*9 + 6, vk_30zx); - atomicAdd(vk + (la*natm+ia)*9 + 7, vk_30zy); - atomicAdd(vk + (la*natm+ia)*9 + 8, vk_30zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ia)*9 + 0, vj_30xx); - atomicAdd(vj + (la*natm+ia)*9 + 1, vj_30xy); - atomicAdd(vj + (la*natm+ia)*9 + 2, vj_30xz); - atomicAdd(vj + (la*natm+ia)*9 + 3, vj_30yx); - atomicAdd(vj + (la*natm+ia)*9 + 4, vj_30yy); - atomicAdd(vj + (la*natm+ia)*9 + 5, vj_30yz); - atomicAdd(vj + (la*natm+ia)*9 + 6, vj_30zx); - atomicAdd(vj + (la*natm+ia)*9 + 7, vj_30zy); - atomicAdd(vj + (la*natm+ia)*9 + 8, vj_30zz); - } - - double vk_31xx = 0; - double vj_31xx = 0; - double vk_31xy = 0; - double vj_31xy = 0; - double vk_31xz = 0; - double vj_31xz = 0; - double vk_31yx = 0; - double vj_31yx = 0; - double vk_31yy = 0; - double vj_31yy = 0; - double vk_31yz = 0; - double vj_31yz = 0; - double vk_31zx = 0; - double vj_31zx = 0; - double vk_31zy = 0; - double vj_31zy = 0; - double vk_31zz = 0; - double vj_31zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g3 = aj*2 * al*2 * hrr_1101x; - prod = g3 * 1 * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * trr_10x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * trr_10x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * trr_10y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = aj*2 * al*2 * hrr_1101y; - prod = g3 * fac * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * fac * trr_10y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * 1 * trr_10z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * fac * trr_10z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = aj*2 * al*2 * hrr_1101z; - prod = g3 * fac * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ja)*9 + 0, vk_31xx); - atomicAdd(vk + (la*natm+ja)*9 + 1, vk_31xy); - atomicAdd(vk + (la*natm+ja)*9 + 2, vk_31xz); - atomicAdd(vk + (la*natm+ja)*9 + 3, vk_31yx); - atomicAdd(vk + (la*natm+ja)*9 + 4, vk_31yy); - atomicAdd(vk + (la*natm+ja)*9 + 5, vk_31yz); - atomicAdd(vk + (la*natm+ja)*9 + 6, vk_31zx); - atomicAdd(vk + (la*natm+ja)*9 + 7, vk_31zy); - atomicAdd(vk + (la*natm+ja)*9 + 8, vk_31zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ja)*9 + 0, vj_31xx); - atomicAdd(vj + (la*natm+ja)*9 + 1, vj_31xy); - atomicAdd(vj + (la*natm+ja)*9 + 2, vj_31xz); - atomicAdd(vj + (la*natm+ja)*9 + 3, vj_31yx); - atomicAdd(vj + (la*natm+ja)*9 + 4, vj_31yy); - atomicAdd(vj + (la*natm+ja)*9 + 5, vj_31yz); - atomicAdd(vj + (la*natm+ja)*9 + 6, vj_31zx); - atomicAdd(vj + (la*natm+ja)*9 + 7, vj_31zy); - atomicAdd(vj + (la*natm+ja)*9 + 8, vj_31zz); - } - - double vk_32xx = 0; - double vj_32xx = 0; - double vk_32xy = 0; - double vj_32xy = 0; - double vk_32xz = 0; - double vj_32xz = 0; - double vk_32yx = 0; - double vj_32yx = 0; - double vk_32yy = 0; - double vj_32yy = 0; - double vk_32yz = 0; - double vj_32yz = 0; - double vk_32zx = 0; - double vj_32zx = 0; - double vk_32zy = 0; - double vj_32zy = 0; - double vk_32zz = 0; - double vj_32zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g3 = ak*2 * al*2 * hrr_1011x; - prod = g3 * 1 * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g1 = al*2 * hrr_1001x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * trr_10x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * trr_10x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 = ak*2 * al*2 * hrr_0011x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_0001x = trr_01x - xlxk * fac; - g1 = al*2 * hrr_0001x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = ak*2 * al*2 * hrr_1011y; - prod = g3 * fac * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * fac * trr_10y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = ak*2 * al*2 * hrr_0011x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * fac * trr_10z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = ak*2 * al*2 * hrr_1011z; - prod = g3 * fac * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ka)*9 + 0, vk_32xx); - atomicAdd(vk + (la*natm+ka)*9 + 1, vk_32xy); - atomicAdd(vk + (la*natm+ka)*9 + 2, vk_32xz); - atomicAdd(vk + (la*natm+ka)*9 + 3, vk_32yx); - atomicAdd(vk + (la*natm+ka)*9 + 4, vk_32yy); - atomicAdd(vk + (la*natm+ka)*9 + 5, vk_32yz); - atomicAdd(vk + (la*natm+ka)*9 + 6, vk_32zx); - atomicAdd(vk + (la*natm+ka)*9 + 7, vk_32zy); - atomicAdd(vk + (la*natm+ka)*9 + 8, vk_32zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ka)*9 + 0, vj_32xx); - atomicAdd(vj + (la*natm+ka)*9 + 1, vj_32xy); - atomicAdd(vj + (la*natm+ka)*9 + 2, vj_32xz); - atomicAdd(vj + (la*natm+ka)*9 + 3, vj_32yx); - atomicAdd(vj + (la*natm+ka)*9 + 4, vj_32yy); - atomicAdd(vj + (la*natm+ka)*9 + 5, vj_32yz); - atomicAdd(vj + (la*natm+ka)*9 + 6, vj_32zx); - atomicAdd(vj + (la*natm+ka)*9 + 7, vj_32zy); - atomicAdd(vj + (la*natm+ka)*9 + 8, vj_32zz); - } - - double vk_33xx = 0; - double vj_33xx = 0; - double vk_33xy = 0; - double vj_33xy = 0; - double vk_33xz = 0; - double vj_33xz = 0; - double vk_33yx = 0; - double vj_33yx = 0; - double vk_33yy = 0; - double vj_33yy = 0; - double vk_33yz = 0; - double vj_33yz = 0; - double vk_33zx = 0; - double vj_33zx = 0; - double vk_33zy = 0; - double vj_33zy = 0; - double vk_33zz = 0; - double vj_33zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(2, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(2, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 2; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 2; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_0_0; - dd_jl = dm_jl_0_0 * dm_ik_0_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm_lk_0_0; - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; - g3 = al*2 * (al*2 * hrr_1002x - 1 * trr_10x); - prod = g3 * 1 * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * trr_10x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * trr_10x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_1_0; - dd_jl = dm_jl_0_0 * dm_ik_1_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm_lk_0_0; - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - g3 = al*2 * (al*2 * hrr_1002y - 1 * trr_10y); - prod = g3 * fac * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * fac * trr_10y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm_jk_0_0 * dm_il_2_0; - dd_jl = dm_jl_0_0 * dm_ik_2_0; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm_lk_0_0; - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * fac * trr_10z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - g3 = al*2 * (al*2 * hrr_1002z - 1 * trr_10z); - prod = g3 * fac * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+la)*9 + 0, vk_33xx); - atomicAdd(vk + (la*natm+la)*9 + 1, vk_33xy); - atomicAdd(vk + (la*natm+la)*9 + 2, vk_33xz); - atomicAdd(vk + (la*natm+la)*9 + 3, vk_33yx); - atomicAdd(vk + (la*natm+la)*9 + 4, vk_33yy); - atomicAdd(vk + (la*natm+la)*9 + 5, vk_33yz); - atomicAdd(vk + (la*natm+la)*9 + 6, vk_33zx); - atomicAdd(vk + (la*natm+la)*9 + 7, vk_33zy); - atomicAdd(vk + (la*natm+la)*9 + 8, vk_33zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+la)*9 + 0, vj_33xx); - atomicAdd(vj + (la*natm+la)*9 + 1, vj_33xy); - atomicAdd(vj + (la*natm+la)*9 + 2, vj_33xz); - atomicAdd(vj + (la*natm+la)*9 + 3, vj_33yx); - atomicAdd(vj + (la*natm+la)*9 + 4, vj_33yy); - atomicAdd(vj + (la*natm+la)*9 + 5, vj_33yz); - atomicAdd(vj + (la*natm+la)*9 + 6, vj_33zx); - atomicAdd(vj + (la*natm+la)*9 + 7, vj_33zy); - atomicAdd(vj + (la*natm+la)*9 + 8, vj_33zz); - } - } -} -__global__ -void rys_ejk_ip2_1000(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *pool, uint32_t *batch_head) -{ - int b_id = blockIdx.x; - int t_id = threadIdx.x + blockDim.x * threadIdx.y; - ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; - __shared__ int batch_id; - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - } - __syncthreads(); - int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; - int nbatches = bounds.ntile_ij_pairs * nbatches_kl; - while (batch_id < nbatches) { - int batch_ij = batch_id / nbatches_kl; - int batch_kl = batch_id % nbatches_kl; - int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, - batch_ij, batch_kl); - if (ntasks > 0) { - int tile_ij = bounds.tile_ij_mapping[batch_ij]; - int nbas_tiles = nbas / TILE; - int tile_i = tile_ij / nbas_tiles; - int tile_j = tile_ij % nbas_tiles; - int ish0 = tile_i * TILE; - int jsh0 = tile_j * TILE; - _rys_ejk_ip2_1000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); - } - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - atomicAdd(batch_head+1, ntasks); - } - __syncthreads(); - } -} - -__device__ static -void _rys_ejk_ip2_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) -{ - int sq_id = threadIdx.x + blockDim.x * threadIdx.y; - int nsq_per_block = blockDim.x * blockDim.y; - int iprim = bounds.iprim; - int jprim = bounds.jprim; - int kprim = bounds.kprim; - int lprim = bounds.lprim; - int *ao_loc = envs.ao_loc; - int nbas = envs.nbas; - int nao = ao_loc[nbas]; - int *bas = envs.bas; - double *env = envs.env; - double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; - extern __shared__ double dm_cache[]; - double *Rpa_cicj = dm_cache + 3 * TILE2; - double *rw = Rpa_cicj + iprim*jprim*TILE2*4; - for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { - int ijp = n / TILE2; - int sh_ij = n % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int ip = ijp / jprim; - int jp = ijp % jprim; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; - double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double aj_aij = aj / aij; - double xjxi = rj[0] - ri[0]; - double yjyi = rj[1] - ri[1]; - double zjzi = rj[2] - ri[2]; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; - Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; - Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; - double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); - Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; - } - - int ij = sq_id / TILE2; - if (ij < 3) { - int i = ij % 3; - int j = ij / 3; - int sh_ij = sq_id % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - if (jk.n_dm == 1) { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i]; - } else { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i] + dm[(nao+j0+j)*nao+i0+i]; - } - } - - for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { - __syncthreads(); - int task_id = task0 + sq_id; - double fac_sym = PI_FAC; - ShellQuartet sq; - if (task_id >= ntasks) { - // To avoid __syncthreads blocking blocking idle warps, all remaining - // threads compute a valid shell quartet with zero normalization factor - sq = shl_quartet_idx[0]; - fac_sym = 0.; - } else { - sq = shl_quartet_idx[task_id]; - } - int ish = sq.i; - int jsh = sq.j; - int ksh = sq.k; - int lsh = sq.l; - int sh_ij = (ish % TILE) * TILE + (jsh % TILE); - if (ish == jsh) fac_sym *= .5; - if (ksh == lsh) fac_sym *= .5; - if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - int k0 = ao_loc[ksh]; - int l0 = ao_loc[lsh]; - int natm = envs.natm; - int ia = bas[ish*BAS_SLOTS+ATOM_OF]; - int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; - int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; - int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; - double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; - double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; - double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; - double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double dd_jk, dd_jl, vj_dd, vk_dd; - double g1, g2, g3, prod; - - - double vk_00xx = 0; - double vj_00xx = 0; - double vk_00xy = 0; - double vj_00xy = 0; - double vk_00xz = 0; - double vj_00xz = 0; - double vk_00yx = 0; - double vj_00yx = 0; - double vk_00yy = 0; - double vj_00yy = 0; - double vk_00yz = 0; - double vj_00yz = 0; - double vk_00zx = 0; - double vj_00zx = 0; - double vk_00zy = 0; - double vj_00zy = 0; - double vk_00zz = 0; - double vj_00zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - g3 = ai*2 * (ai*2 * trr_31x - 3 * trr_11x); - prod = g3 * 1 * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - g1 = ai*2 * trr_21x; - double trr_01x = cpx * fac; - g1 -= 1 * trr_01x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * trr_11x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * trr_11x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_21x - 1 * trr_01x); - prod = g3 * trr_10y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - g3 = ai*2 * (ai*2 * trr_30y - 3 * trr_10y); - prod = g3 * trr_01x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * trr_01x * trr_10y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_21x - 1 * trr_01x); - prod = g3 * 1 * trr_10z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * trr_01x * trr_10z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - g3 = ai*2 * (ai*2 * trr_30z - 3 * trr_10z); - prod = g3 * trr_01x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * (ai*2 * trr_30x - 3 * trr_10x); - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - prod = g3 * trr_01y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ai*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * trr_10x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * trr_10x * trr_01y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * trr_11y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - g3 = ai*2 * (ai*2 * trr_31y - 3 * trr_11y); - prod = g3 * fac * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * trr_11y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * trr_01y * trr_10z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * fac * trr_10z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_30z - 3 * trr_10z); - prod = g3 * fac * trr_01y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * (ai*2 * trr_30x - 3 * trr_10x); - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - prod = g3 * 1 * trr_01z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * trr_10x * trr_01z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ai*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * trr_10x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * trr_10y * trr_01z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_30y - 3 * trr_10y); - prod = g3 * fac * trr_01z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * fac * trr_10y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * trr_11z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * trr_11z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - g3 = ai*2 * (ai*2 * trr_31z - 3 * trr_11z); - prod = g3 * fac * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ia)*9 + 0, vk_00xx); - atomicAdd(vk + (ia*natm+ia)*9 + 1, vk_00xy); - atomicAdd(vk + (ia*natm+ia)*9 + 2, vk_00xz); - atomicAdd(vk + (ia*natm+ia)*9 + 3, vk_00yx); - atomicAdd(vk + (ia*natm+ia)*9 + 4, vk_00yy); - atomicAdd(vk + (ia*natm+ia)*9 + 5, vk_00yz); - atomicAdd(vk + (ia*natm+ia)*9 + 6, vk_00zx); - atomicAdd(vk + (ia*natm+ia)*9 + 7, vk_00zy); - atomicAdd(vk + (ia*natm+ia)*9 + 8, vk_00zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ia)*9 + 0, vj_00xx); - atomicAdd(vj + (ia*natm+ia)*9 + 1, vj_00xy); - atomicAdd(vj + (ia*natm+ia)*9 + 2, vj_00xz); - atomicAdd(vj + (ia*natm+ia)*9 + 3, vj_00yx); - atomicAdd(vj + (ia*natm+ia)*9 + 4, vj_00yy); - atomicAdd(vj + (ia*natm+ia)*9 + 5, vj_00yz); - atomicAdd(vj + (ia*natm+ia)*9 + 6, vj_00zx); - atomicAdd(vj + (ia*natm+ia)*9 + 7, vj_00zy); - atomicAdd(vj + (ia*natm+ia)*9 + 8, vj_00zz); - } - - double vk_01xx = 0; - double vj_01xx = 0; - double vk_01xy = 0; - double vj_01xy = 0; - double vk_01xz = 0; - double vj_01xz = 0; - double vk_01yx = 0; - double vj_01yx = 0; - double vk_01yy = 0; - double vj_01yy = 0; - double vk_01yz = 0; - double vj_01yz = 0; - double vk_01zx = 0; - double vj_01zx = 0; - double vk_01zy = 0; - double vj_01zy = 0; - double vk_01zz = 0; - double vj_01zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = aj*2 * (ai*2 * hrr_2110x - 1 * hrr_0110x); - prod = g3 * 1 * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * trr_11x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * trr_11x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1110x; - prod = g3 * trr_10y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = aj*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * trr_01x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * trr_01x * trr_10y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1110x; - prod = g3 * 1 * trr_10z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * trr_01x * trr_10z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = aj*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * trr_01x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 = aj*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - prod = g3 * trr_01y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = aj*2 * ai*2 * hrr_1110y; - prod = g3 * trr_10x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * trr_10x * trr_01y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * trr_11y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - g3 = aj*2 * (ai*2 * hrr_2110y - 1 * hrr_0110y); - prod = g3 * fac * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * fac * trr_11y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * trr_01y * trr_10z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1110y; - prod = g3 * fac * trr_10z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * fac * trr_01y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - prod = g3 * 1 * trr_01z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * trr_10x * trr_01z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = aj*2 * ai*2 * hrr_1110z; - prod = g3 * trr_10x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * trr_10y * trr_01z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * fac * trr_01z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1110z; - prod = g3 * fac * trr_10y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * 1 * trr_11z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * fac * trr_11z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - g3 = aj*2 * (ai*2 * hrr_2110z - 1 * hrr_0110z); - prod = g3 * fac * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ja)*9 + 0, vk_01xx); - atomicAdd(vk + (ia*natm+ja)*9 + 1, vk_01xy); - atomicAdd(vk + (ia*natm+ja)*9 + 2, vk_01xz); - atomicAdd(vk + (ia*natm+ja)*9 + 3, vk_01yx); - atomicAdd(vk + (ia*natm+ja)*9 + 4, vk_01yy); - atomicAdd(vk + (ia*natm+ja)*9 + 5, vk_01yz); - atomicAdd(vk + (ia*natm+ja)*9 + 6, vk_01zx); - atomicAdd(vk + (ia*natm+ja)*9 + 7, vk_01zy); - atomicAdd(vk + (ia*natm+ja)*9 + 8, vk_01zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ja)*9 + 0, vj_01xx); - atomicAdd(vj + (ia*natm+ja)*9 + 1, vj_01xy); - atomicAdd(vj + (ia*natm+ja)*9 + 2, vj_01xz); - atomicAdd(vj + (ia*natm+ja)*9 + 3, vj_01yx); - atomicAdd(vj + (ia*natm+ja)*9 + 4, vj_01yy); - atomicAdd(vj + (ia*natm+ja)*9 + 5, vj_01yz); - atomicAdd(vj + (ia*natm+ja)*9 + 6, vj_01zx); - atomicAdd(vj + (ia*natm+ja)*9 + 7, vj_01zy); - atomicAdd(vj + (ia*natm+ja)*9 + 8, vj_01zz); - } - - double vk_02xx = 0; - double vj_02xx = 0; - double vk_02xy = 0; - double vj_02xy = 0; - double vk_02xz = 0; - double vj_02xz = 0; - double vk_02yx = 0; - double vj_02yx = 0; - double vk_02yy = 0; - double vj_02yy = 0; - double vk_02yz = 0; - double vj_02yz = 0; - double vk_02zx = 0; - double vj_02zx = 0; - double vk_02zy = 0; - double vj_02zy = 0; - double vk_02zz = 0; - double vj_02zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double trr_01x = cpx * fac; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - g3 = ak*2 * (ai*2 * trr_22x - 1 * trr_02x); - g3 -= 1 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * trr_11x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * trr_11x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_12x; - g3 -= 1 * ai*2 * trr_10x; - prod = g3 * trr_10y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ak*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * trr_01x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * trr_01x * trr_10y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_12x; - g3 -= 1 * ai*2 * trr_10x; - prod = g3 * 1 * trr_10z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * trr_01x * trr_10z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ak*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * trr_01x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * (ai*2 * trr_21x - 1 * trr_01x); - prod = g3 * trr_01y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - g3 = ak*2 * ai*2 * trr_12y; - g3 -= 1 * ai*2 * trr_10y; - prod = g3 * trr_10x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * trr_10x * trr_01y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * trr_11y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - g3 = ak*2 * (ai*2 * trr_22y - 1 * trr_02y); - g3 -= 1 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * fac * trr_11y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * trr_01y * trr_10z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_12y; - g3 -= 1 * ai*2 * trr_10y; - prod = g3 * fac * trr_10z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * fac * trr_01y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * (ai*2 * trr_21x - 1 * trr_01x); - prod = g3 * 1 * trr_01z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * trr_10x * trr_01z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - g3 = ak*2 * ai*2 * trr_12z; - g3 -= 1 * ai*2 * trr_10z; - prod = g3 * trr_10x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * trr_10y * trr_01z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * fac * trr_01z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_12z; - g3 -= 1 * ai*2 * trr_10z; - prod = g3 * fac * trr_10y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * 1 * trr_11z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * fac * trr_11z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - g3 = ak*2 * (ai*2 * trr_22z - 1 * trr_02z); - g3 -= 1 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ka)*9 + 0, vk_02xx); - atomicAdd(vk + (ia*natm+ka)*9 + 1, vk_02xy); - atomicAdd(vk + (ia*natm+ka)*9 + 2, vk_02xz); - atomicAdd(vk + (ia*natm+ka)*9 + 3, vk_02yx); - atomicAdd(vk + (ia*natm+ka)*9 + 4, vk_02yy); - atomicAdd(vk + (ia*natm+ka)*9 + 5, vk_02yz); - atomicAdd(vk + (ia*natm+ka)*9 + 6, vk_02zx); - atomicAdd(vk + (ia*natm+ka)*9 + 7, vk_02zy); - atomicAdd(vk + (ia*natm+ka)*9 + 8, vk_02zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ka)*9 + 0, vj_02xx); - atomicAdd(vj + (ia*natm+ka)*9 + 1, vj_02xy); - atomicAdd(vj + (ia*natm+ka)*9 + 2, vj_02xz); - atomicAdd(vj + (ia*natm+ka)*9 + 3, vj_02yx); - atomicAdd(vj + (ia*natm+ka)*9 + 4, vj_02yy); - atomicAdd(vj + (ia*natm+ka)*9 + 5, vj_02yz); - atomicAdd(vj + (ia*natm+ka)*9 + 6, vj_02zx); - atomicAdd(vj + (ia*natm+ka)*9 + 7, vj_02zy); - atomicAdd(vj + (ia*natm+ka)*9 + 8, vj_02zz); - } - - double vk_03xx = 0; - double vj_03xx = 0; - double vk_03xy = 0; - double vj_03xy = 0; - double vk_03xz = 0; - double vj_03xz = 0; - double vk_03yx = 0; - double vj_03yx = 0; - double vk_03yy = 0; - double vj_03yy = 0; - double vk_03yz = 0; - double vj_03yz = 0; - double vk_03zx = 0; - double vj_03zx = 0; - double vk_03zy = 0; - double vj_03zy = 0; - double vk_03zz = 0; - double vj_03zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double trr_01x = cpx * fac; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 = al*2 * (ai*2 * hrr_2011x - 1 * hrr_0011x); - prod = g3 * 1 * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_21x; - g1 -= 1 * trr_01x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * trr_11x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * trr_11x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1011x; - prod = g3 * trr_10y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = al*2 * (ai*2 * hrr_2001y - 1 * hrr_0001y); - prod = g3 * trr_01x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * trr_01x * trr_10y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1011x; - prod = g3 * 1 * trr_10z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_11x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * trr_01x * trr_10z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = al*2 * (ai*2 * hrr_2001z - 1 * hrr_0001z); - prod = g3 * trr_01x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_0001x = trr_01x - xlxk * fac; - g3 = al*2 * (ai*2 * hrr_2001x - 1 * hrr_0001x); - prod = g3 * trr_01y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = al*2 * ai*2 * hrr_1011y; - prod = g3 * trr_10x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * trr_10x * trr_01y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * trr_11y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - g3 = al*2 * (ai*2 * hrr_2011y - 1 * hrr_0011y); - prod = g3 * fac * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_21y; - g1 -= 1 * trr_01y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * fac * trr_11y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * trr_01y * trr_10z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1011y; - prod = g3 * fac * trr_10z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_11y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * (ai*2 * hrr_2001z - 1 * hrr_0001z); - prod = g3 * fac * trr_01y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * (ai*2 * hrr_2001x - 1 * hrr_0001x); - prod = g3 * 1 * trr_01z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * trr_10x * trr_01z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = al*2 * ai*2 * hrr_1011z; - prod = g3 * trr_10x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * trr_10y * trr_01z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * (ai*2 * hrr_2001y - 1 * hrr_0001y); - prod = g3 * fac * trr_01z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_11z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1011z; - prod = g3 * fac * trr_10y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * 1 * trr_11z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * fac * trr_11z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_21z; - g1 -= 1 * trr_01z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - g3 = al*2 * (ai*2 * hrr_2011z - 1 * hrr_0011z); - prod = g3 * fac * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+la)*9 + 0, vk_03xx); - atomicAdd(vk + (ia*natm+la)*9 + 1, vk_03xy); - atomicAdd(vk + (ia*natm+la)*9 + 2, vk_03xz); - atomicAdd(vk + (ia*natm+la)*9 + 3, vk_03yx); - atomicAdd(vk + (ia*natm+la)*9 + 4, vk_03yy); - atomicAdd(vk + (ia*natm+la)*9 + 5, vk_03yz); - atomicAdd(vk + (ia*natm+la)*9 + 6, vk_03zx); - atomicAdd(vk + (ia*natm+la)*9 + 7, vk_03zy); - atomicAdd(vk + (ia*natm+la)*9 + 8, vk_03zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+la)*9 + 0, vj_03xx); - atomicAdd(vj + (ia*natm+la)*9 + 1, vj_03xy); - atomicAdd(vj + (ia*natm+la)*9 + 2, vj_03xz); - atomicAdd(vj + (ia*natm+la)*9 + 3, vj_03yx); - atomicAdd(vj + (ia*natm+la)*9 + 4, vj_03yy); - atomicAdd(vj + (ia*natm+la)*9 + 5, vj_03yz); - atomicAdd(vj + (ia*natm+la)*9 + 6, vj_03zx); - atomicAdd(vj + (ia*natm+la)*9 + 7, vj_03zy); - atomicAdd(vj + (ia*natm+la)*9 + 8, vj_03zz); - } - - double vk_10xx = 0; - double vj_10xx = 0; - double vk_10xy = 0; - double vj_10xy = 0; - double vk_10xz = 0; - double vj_10xz = 0; - double vk_10yx = 0; - double vj_10yx = 0; - double vk_10yy = 0; - double vj_10yy = 0; - double vk_10yz = 0; - double vj_10yz = 0; - double vk_10zx = 0; - double vj_10zx = 0; - double vk_10zy = 0; - double vj_10zy = 0; - double vk_10zz = 0; - double vj_10zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - g3 = ai*2 * aj*2 * hrr_2110x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 -= 1 * aj*2 * hrr_0110x; - prod = g3 * 1 * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = aj*2 * hrr_1110x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1110x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * trr_11x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * trr_11x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1110x; - prod = g3 * trr_10y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = ai*2 * aj*2 * hrr_2100y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * trr_01x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * trr_01x * trr_10y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1110x; - prod = g3 * 1 * trr_10z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * trr_01x * trr_10z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = ai*2 * aj*2 * hrr_2100z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * trr_01x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g3 = ai*2 * aj*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 -= 1 * aj*2 * hrr_0100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - prod = g3 * trr_01y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g1 = aj*2 * hrr_1100x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g1 = aj*2 * hrr_0110y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ai*2 * aj*2 * hrr_1110y; - prod = g3 * trr_10x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * trr_10x * trr_01y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * trr_11y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - g3 = ai*2 * aj*2 * hrr_2110y; - g3 -= 1 * aj*2 * hrr_0110y; - prod = g3 * fac * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * fac * trr_11y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * trr_01y * trr_10z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1110y; - prod = g3 * fac * trr_10z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_2100z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * fac * trr_01y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * aj*2 * hrr_2100x; - g3 -= 1 * aj*2 * hrr_0100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - prod = g3 * 1 * trr_01z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * trr_10x * trr_01z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g1 = aj*2 * hrr_0110z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ai*2 * aj*2 * hrr_1110z; - prod = g3 * trr_10x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * trr_10y * trr_01z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_2100y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * fac * trr_01z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1110z; - prod = g3 * fac * trr_10y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * 1 * trr_11z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * fac * trr_11z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - g3 = ai*2 * aj*2 * hrr_2110z; - g3 -= 1 * aj*2 * hrr_0110z; - prod = g3 * fac * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ia)*9 + 0, vk_10xx); - atomicAdd(vk + (ja*natm+ia)*9 + 1, vk_10xy); - atomicAdd(vk + (ja*natm+ia)*9 + 2, vk_10xz); - atomicAdd(vk + (ja*natm+ia)*9 + 3, vk_10yx); - atomicAdd(vk + (ja*natm+ia)*9 + 4, vk_10yy); - atomicAdd(vk + (ja*natm+ia)*9 + 5, vk_10yz); - atomicAdd(vk + (ja*natm+ia)*9 + 6, vk_10zx); - atomicAdd(vk + (ja*natm+ia)*9 + 7, vk_10zy); - atomicAdd(vk + (ja*natm+ia)*9 + 8, vk_10zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ia)*9 + 0, vj_10xx); - atomicAdd(vj + (ja*natm+ia)*9 + 1, vj_10xy); - atomicAdd(vj + (ja*natm+ia)*9 + 2, vj_10xz); - atomicAdd(vj + (ja*natm+ia)*9 + 3, vj_10yx); - atomicAdd(vj + (ja*natm+ia)*9 + 4, vj_10yy); - atomicAdd(vj + (ja*natm+ia)*9 + 5, vj_10yz); - atomicAdd(vj + (ja*natm+ia)*9 + 6, vj_10zx); - atomicAdd(vj + (ja*natm+ia)*9 + 7, vj_10zy); - atomicAdd(vj + (ja*natm+ia)*9 + 8, vj_10zz); - } - - double vk_11xx = 0; - double vj_11xx = 0; - double vk_11xy = 0; - double vj_11xy = 0; - double vk_11xz = 0; - double vj_11xz = 0; - double vk_11yx = 0; - double vj_11yx = 0; - double vk_11yy = 0; - double vj_11yy = 0; - double vk_11yz = 0; - double vj_11yz = 0; - double vk_11zx = 0; - double vj_11zx = 0; - double vk_11zy = 0; - double vj_11zy = 0; - double vk_11zz = 0; - double vj_11zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - double hrr_1210x = hrr_2110x - (rj[0] - ri[0]) * hrr_1110x; - g3 = aj*2 * (aj*2 * hrr_1210x - 1 * trr_11x); - prod = g3 * 1 * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1110x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1110x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * trr_11x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * trr_11x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - double hrr_0210x = hrr_1110x - (rj[0] - ri[0]) * hrr_0110x; - g3 = aj*2 * (aj*2 * hrr_0210x - 1 * trr_01x); - prod = g3 * trr_10y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g3 = aj*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * trr_01x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * trr_01x * trr_10y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0210x - 1 * trr_01x); - prod = g3 * 1 * trr_10z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * trr_01x * trr_10z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g3 = aj*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * trr_01x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g3 = aj*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - prod = g3 * trr_01y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - double hrr_0210y = hrr_1110y - (rj[1] - ri[1]) * hrr_0110y; - g3 = aj*2 * (aj*2 * hrr_0210y - 1 * trr_01y); - prod = g3 * trr_10x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * trr_10x * trr_01y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * trr_11y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - double hrr_1210y = hrr_2110y - (rj[1] - ri[1]) * hrr_1110y; - g3 = aj*2 * (aj*2 * hrr_1210y - 1 * trr_11y); - prod = g3 * fac * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * fac * trr_11y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * trr_01y * trr_10z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0210y - 1 * trr_01y); - prod = g3 * fac * trr_10z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * fac * trr_01y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - prod = g3 * 1 * trr_01z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * trr_10x * trr_01z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - double hrr_0210z = hrr_1110z - (rj[2] - ri[2]) * hrr_0110z; - g3 = aj*2 * (aj*2 * hrr_0210z - 1 * trr_01z); - prod = g3 * trr_10x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * trr_10y * trr_01z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * fac * trr_01z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0210z - 1 * trr_01z); - prod = g3 * fac * trr_10y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * 1 * trr_11z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * fac * trr_11z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - double hrr_1210z = hrr_2110z - (rj[2] - ri[2]) * hrr_1110z; - g3 = aj*2 * (aj*2 * hrr_1210z - 1 * trr_11z); - prod = g3 * fac * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ja)*9 + 0, vk_11xx); - atomicAdd(vk + (ja*natm+ja)*9 + 1, vk_11xy); - atomicAdd(vk + (ja*natm+ja)*9 + 2, vk_11xz); - atomicAdd(vk + (ja*natm+ja)*9 + 3, vk_11yx); - atomicAdd(vk + (ja*natm+ja)*9 + 4, vk_11yy); - atomicAdd(vk + (ja*natm+ja)*9 + 5, vk_11yz); - atomicAdd(vk + (ja*natm+ja)*9 + 6, vk_11zx); - atomicAdd(vk + (ja*natm+ja)*9 + 7, vk_11zy); - atomicAdd(vk + (ja*natm+ja)*9 + 8, vk_11zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ja)*9 + 0, vj_11xx); - atomicAdd(vj + (ja*natm+ja)*9 + 1, vj_11xy); - atomicAdd(vj + (ja*natm+ja)*9 + 2, vj_11xz); - atomicAdd(vj + (ja*natm+ja)*9 + 3, vj_11yx); - atomicAdd(vj + (ja*natm+ja)*9 + 4, vj_11yy); - atomicAdd(vj + (ja*natm+ja)*9 + 5, vj_11yz); - atomicAdd(vj + (ja*natm+ja)*9 + 6, vj_11zx); - atomicAdd(vj + (ja*natm+ja)*9 + 7, vj_11zy); - atomicAdd(vj + (ja*natm+ja)*9 + 8, vj_11zz); - } - - double vk_12xx = 0; - double vj_12xx = 0; - double vk_12xy = 0; - double vj_12xy = 0; - double vk_12xz = 0; - double vj_12xz = 0; - double vk_12yx = 0; - double vj_12yx = 0; - double vk_12yy = 0; - double vj_12yy = 0; - double vk_12yz = 0; - double vj_12yz = 0; - double vk_12zx = 0; - double vj_12zx = 0; - double vk_12zy = 0; - double vj_12zy = 0; - double vk_12zz = 0; - double vj_12zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1120x = trr_22x - (rj[0] - ri[0]) * trr_12x; - g3 = ak*2 * aj*2 * hrr_1120x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 -= 1 * aj*2 * hrr_1100x; - prod = g3 * 1 * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = aj*2 * hrr_1110x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1110x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * trr_11x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * trr_11x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0120x = trr_12x - (rj[0] - ri[0]) * trr_02x; - g3 = ak*2 * aj*2 * hrr_0120x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 -= 1 * aj*2 * hrr_0100x; - prod = g3 * trr_10y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g1 = aj*2 * hrr_0110x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ak*2 * aj*2 * hrr_1110y; - prod = g3 * trr_01x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * trr_01x * trr_10y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_0120x; - g3 -= 1 * aj*2 * hrr_0100x; - prod = g3 * 1 * trr_10z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * trr_01x * trr_10z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ak*2 * aj*2 * hrr_1110z; - prod = g3 * trr_01x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * aj*2 * hrr_1110x; - prod = g3 * trr_01y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_0120y = trr_12y - (rj[1] - ri[1]) * trr_02y; - g3 = ak*2 * aj*2 * hrr_0120y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * trr_10x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * trr_10x * trr_01y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * trr_11y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_1120y = trr_22y - (rj[1] - ri[1]) * trr_12y; - g3 = ak*2 * aj*2 * hrr_1120y; - g3 -= 1 * aj*2 * hrr_1100y; - prod = g3 * fac * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * fac * trr_11y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * trr_01y * trr_10z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0120y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * fac * trr_10z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_1110z; - prod = g3 * fac * trr_01y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * aj*2 * hrr_1110x; - prod = g3 * 1 * trr_01z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * trr_10x * trr_01z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_0120z = trr_12z - (rj[2] - ri[2]) * trr_02z; - g3 = ak*2 * aj*2 * hrr_0120z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * trr_10x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * trr_10y * trr_01z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_1110y; - prod = g3 * fac * trr_01z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0120z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * fac * trr_10y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * 1 * trr_11z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * fac * trr_11z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_1120z = trr_22z - (rj[2] - ri[2]) * trr_12z; - g3 = ak*2 * aj*2 * hrr_1120z; - g3 -= 1 * aj*2 * hrr_1100z; - prod = g3 * fac * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ka)*9 + 0, vk_12xx); - atomicAdd(vk + (ja*natm+ka)*9 + 1, vk_12xy); - atomicAdd(vk + (ja*natm+ka)*9 + 2, vk_12xz); - atomicAdd(vk + (ja*natm+ka)*9 + 3, vk_12yx); - atomicAdd(vk + (ja*natm+ka)*9 + 4, vk_12yy); - atomicAdd(vk + (ja*natm+ka)*9 + 5, vk_12yz); - atomicAdd(vk + (ja*natm+ka)*9 + 6, vk_12zx); - atomicAdd(vk + (ja*natm+ka)*9 + 7, vk_12zy); - atomicAdd(vk + (ja*natm+ka)*9 + 8, vk_12zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ka)*9 + 0, vj_12xx); - atomicAdd(vj + (ja*natm+ka)*9 + 1, vj_12xy); - atomicAdd(vj + (ja*natm+ka)*9 + 2, vj_12xz); - atomicAdd(vj + (ja*natm+ka)*9 + 3, vj_12yx); - atomicAdd(vj + (ja*natm+ka)*9 + 4, vj_12yy); - atomicAdd(vj + (ja*natm+ka)*9 + 5, vj_12yz); - atomicAdd(vj + (ja*natm+ka)*9 + 6, vj_12zx); - atomicAdd(vj + (ja*natm+ka)*9 + 7, vj_12zy); - atomicAdd(vj + (ja*natm+ka)*9 + 8, vj_12zz); - } - - double vk_13xx = 0; - double vj_13xx = 0; - double vk_13xy = 0; - double vj_13xy = 0; - double vk_13xz = 0; - double vj_13xz = 0; - double vk_13yx = 0; - double vj_13yx = 0; - double vk_13yy = 0; - double vj_13yy = 0; - double vk_13yz = 0; - double vj_13yz = 0; - double vk_13zx = 0; - double vj_13zx = 0; - double vk_13zy = 0; - double vj_13zy = 0; - double vk_13zz = 0; - double vj_13zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1111x = hrr_2011x - (rj[0] - ri[0]) * hrr_1011x; - g3 = al*2 * aj*2 * hrr_1111x; - prod = g3 * 1 * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = aj*2 * hrr_1110x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1110x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * trr_11x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * trr_11x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0111x = hrr_1011x - (rj[0] - ri[0]) * hrr_0011x; - g3 = al*2 * aj*2 * hrr_0111x; - prod = g3 * trr_10y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g1 = aj*2 * hrr_0110x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = al*2 * aj*2 * hrr_1101y; - prod = g3 * trr_01x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * trr_01x * trr_10y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_0111x; - prod = g3 * 1 * trr_10z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0110x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * trr_01x * trr_10z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = al*2 * aj*2 * hrr_1101z; - prod = g3 * trr_01x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g3 = al*2 * aj*2 * hrr_1101x; - prod = g3 * trr_01y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g1 = aj*2 * hrr_1100x; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g1 = aj*2 * hrr_0110y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - double hrr_0111y = hrr_1011y - (rj[1] - ri[1]) * hrr_0011y; - g3 = al*2 * aj*2 * hrr_0111y; - prod = g3 * trr_10x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * trr_10x * trr_01y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * trr_11y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g1 = aj*2 * hrr_1110y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - double hrr_1111y = hrr_2011y - (rj[1] - ri[1]) * hrr_1011y; - g3 = al*2 * aj*2 * hrr_1111y; - prod = g3 * fac * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1110y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * fac * trr_11y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * trr_01y * trr_10z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0111y; - prod = g3 * fac * trr_10z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0110y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_1101z; - prod = g3 * fac * trr_01y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * aj*2 * hrr_1101x; - prod = g3 * 1 * trr_01z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * trr_10x * trr_01z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g1 = aj*2 * hrr_0110z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - double hrr_0111z = hrr_1011z - (rj[2] - ri[2]) * hrr_0011z; - g3 = al*2 * aj*2 * hrr_0111z; - prod = g3 * trr_10x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * trr_10y * trr_01z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_1101y; - prod = g3 * fac * trr_01z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0110z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0111z; - prod = g3 * fac * trr_10y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * 1 * trr_11z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * fac * trr_11z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g1 = aj*2 * hrr_1110z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1110z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - double hrr_1111z = hrr_2011z - (rj[2] - ri[2]) * hrr_1011z; - g3 = al*2 * aj*2 * hrr_1111z; - prod = g3 * fac * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+la)*9 + 0, vk_13xx); - atomicAdd(vk + (ja*natm+la)*9 + 1, vk_13xy); - atomicAdd(vk + (ja*natm+la)*9 + 2, vk_13xz); - atomicAdd(vk + (ja*natm+la)*9 + 3, vk_13yx); - atomicAdd(vk + (ja*natm+la)*9 + 4, vk_13yy); - atomicAdd(vk + (ja*natm+la)*9 + 5, vk_13yz); - atomicAdd(vk + (ja*natm+la)*9 + 6, vk_13zx); - atomicAdd(vk + (ja*natm+la)*9 + 7, vk_13zy); - atomicAdd(vk + (ja*natm+la)*9 + 8, vk_13zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+la)*9 + 0, vj_13xx); - atomicAdd(vj + (ja*natm+la)*9 + 1, vj_13xy); - atomicAdd(vj + (ja*natm+la)*9 + 2, vj_13xz); - atomicAdd(vj + (ja*natm+la)*9 + 3, vj_13yx); - atomicAdd(vj + (ja*natm+la)*9 + 4, vj_13yy); - atomicAdd(vj + (ja*natm+la)*9 + 5, vj_13yz); - atomicAdd(vj + (ja*natm+la)*9 + 6, vj_13zx); - atomicAdd(vj + (ja*natm+la)*9 + 7, vj_13zy); - atomicAdd(vj + (ja*natm+la)*9 + 8, vj_13zz); - } - - double vk_20xx = 0; - double vj_20xx = 0; - double vk_20xy = 0; - double vj_20xy = 0; - double vk_20xz = 0; - double vj_20xz = 0; - double vk_20yx = 0; - double vj_20yx = 0; - double vk_20yy = 0; - double vj_20yy = 0; - double vk_20yz = 0; - double vj_20yz = 0; - double vk_20zx = 0; - double vj_20zx = 0; - double vk_20zy = 0; - double vj_20zy = 0; - double vk_20zz = 0; - double vj_20zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - g3 = ai*2 * (ak*2 * trr_22x - 1 * trr_20x); - double trr_01x = cpx * fac; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - g3 -= 1 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * 1 * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * trr_11x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * trr_11x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ak*2 * trr_12x - 1 * trr_10x); - prod = g3 * trr_10y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ai*2 * ak*2 * trr_21y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * trr_01x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * trr_01x * trr_10y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ak*2 * trr_12x - 1 * trr_10x); - prod = g3 * 1 * trr_10z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * trr_01x * trr_10z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ai*2 * ak*2 * trr_21z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * trr_01x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * ak*2 * trr_21x; - g3 -= 1 * ak*2 * trr_01x; - prod = g3 * trr_01y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - g3 = ai*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * trr_10x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * trr_10x * trr_01y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * trr_11y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - g3 = ai*2 * (ak*2 * trr_22y - 1 * trr_20y); - g3 -= 1 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * fac * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * fac * trr_11y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * trr_01y * trr_10z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * fac * trr_10z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_21z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * fac * trr_01y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * ak*2 * trr_21x; - g3 -= 1 * ak*2 * trr_01x; - prod = g3 * 1 * trr_01z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * trr_10x * trr_01z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - g3 = ai*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * trr_10x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * trr_10y * trr_01z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_21y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * fac * trr_01z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * fac * trr_10y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * 1 * trr_11z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * fac * trr_11z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - g3 = ai*2 * (ak*2 * trr_22z - 1 * trr_20z); - g3 -= 1 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * fac * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ia)*9 + 0, vk_20xx); - atomicAdd(vk + (ka*natm+ia)*9 + 1, vk_20xy); - atomicAdd(vk + (ka*natm+ia)*9 + 2, vk_20xz); - atomicAdd(vk + (ka*natm+ia)*9 + 3, vk_20yx); - atomicAdd(vk + (ka*natm+ia)*9 + 4, vk_20yy); - atomicAdd(vk + (ka*natm+ia)*9 + 5, vk_20yz); - atomicAdd(vk + (ka*natm+ia)*9 + 6, vk_20zx); - atomicAdd(vk + (ka*natm+ia)*9 + 7, vk_20zy); - atomicAdd(vk + (ka*natm+ia)*9 + 8, vk_20zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ia)*9 + 0, vj_20xx); - atomicAdd(vj + (ka*natm+ia)*9 + 1, vj_20xy); - atomicAdd(vj + (ka*natm+ia)*9 + 2, vj_20xz); - atomicAdd(vj + (ka*natm+ia)*9 + 3, vj_20yx); - atomicAdd(vj + (ka*natm+ia)*9 + 4, vj_20yy); - atomicAdd(vj + (ka*natm+ia)*9 + 5, vj_20yz); - atomicAdd(vj + (ka*natm+ia)*9 + 6, vj_20zx); - atomicAdd(vj + (ka*natm+ia)*9 + 7, vj_20zy); - atomicAdd(vj + (ka*natm+ia)*9 + 8, vj_20zz); - } - - double vk_21xx = 0; - double vj_21xx = 0; - double vk_21xy = 0; - double vj_21xy = 0; - double vk_21xz = 0; - double vj_21xz = 0; - double vk_21yx = 0; - double vj_21yx = 0; - double vk_21yy = 0; - double vj_21yy = 0; - double vk_21yz = 0; - double vj_21yz = 0; - double vk_21zx = 0; - double vj_21zx = 0; - double vk_21zy = 0; - double vj_21zy = 0; - double vk_21zz = 0; - double vj_21zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1120x = trr_22x - (rj[0] - ri[0]) * trr_12x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = aj*2 * (ak*2 * hrr_1120x - 1 * hrr_1100x); - prod = g3 * 1 * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * trr_11x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * trr_11x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0120x = trr_12x - (rj[0] - ri[0]) * trr_02x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 = aj*2 * (ak*2 * hrr_0120x - 1 * hrr_0100x); - prod = g3 * trr_10y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = aj*2 * ak*2 * hrr_1110y; - prod = g3 * trr_01x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * trr_01x * trr_10y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (ak*2 * hrr_0120x - 1 * hrr_0100x); - prod = g3 * 1 * trr_10z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * trr_01x * trr_10z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = aj*2 * ak*2 * hrr_1110z; - prod = g3 * trr_01x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * ak*2 * hrr_1110x; - prod = g3 * trr_01y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_0120y = trr_12y - (rj[1] - ri[1]) * trr_02y; - g3 = aj*2 * (ak*2 * hrr_0120y - 1 * hrr_0100y); - prod = g3 * trr_10x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * trr_10x * trr_01y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * trr_11y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_1120y = trr_22y - (rj[1] - ri[1]) * trr_12y; - g3 = aj*2 * (ak*2 * hrr_1120y - 1 * hrr_1100y); - prod = g3 * fac * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * fac * trr_11y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * trr_01y * trr_10z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * (ak*2 * hrr_0120y - 1 * hrr_0100y); - prod = g3 * fac * trr_10z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_1110z; - prod = g3 * fac * trr_01y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * ak*2 * hrr_1110x; - prod = g3 * 1 * trr_01z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * trr_10x * trr_01z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_0120z = trr_12z - (rj[2] - ri[2]) * trr_02z; - g3 = aj*2 * (ak*2 * hrr_0120z - 1 * hrr_0100z); - prod = g3 * trr_10x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * trr_10y * trr_01z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_1110y; - prod = g3 * fac * trr_01z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * (ak*2 * hrr_0120z - 1 * hrr_0100z); - prod = g3 * fac * trr_10y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * 1 * trr_11z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * fac * trr_11z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_1120z = trr_22z - (rj[2] - ri[2]) * trr_12z; - g3 = aj*2 * (ak*2 * hrr_1120z - 1 * hrr_1100z); - prod = g3 * fac * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ja)*9 + 0, vk_21xx); - atomicAdd(vk + (ka*natm+ja)*9 + 1, vk_21xy); - atomicAdd(vk + (ka*natm+ja)*9 + 2, vk_21xz); - atomicAdd(vk + (ka*natm+ja)*9 + 3, vk_21yx); - atomicAdd(vk + (ka*natm+ja)*9 + 4, vk_21yy); - atomicAdd(vk + (ka*natm+ja)*9 + 5, vk_21yz); - atomicAdd(vk + (ka*natm+ja)*9 + 6, vk_21zx); - atomicAdd(vk + (ka*natm+ja)*9 + 7, vk_21zy); - atomicAdd(vk + (ka*natm+ja)*9 + 8, vk_21zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ja)*9 + 0, vj_21xx); - atomicAdd(vj + (ka*natm+ja)*9 + 1, vj_21xy); - atomicAdd(vj + (ka*natm+ja)*9 + 2, vj_21xz); - atomicAdd(vj + (ka*natm+ja)*9 + 3, vj_21yx); - atomicAdd(vj + (ka*natm+ja)*9 + 4, vj_21yy); - atomicAdd(vj + (ka*natm+ja)*9 + 5, vj_21yz); - atomicAdd(vj + (ka*natm+ja)*9 + 6, vj_21zx); - atomicAdd(vj + (ka*natm+ja)*9 + 7, vj_21zy); - atomicAdd(vj + (ka*natm+ja)*9 + 8, vj_21zz); - } - - double vk_22xx = 0; - double vj_22xx = 0; - double vk_22xy = 0; - double vj_22xy = 0; - double vk_22xz = 0; - double vj_22xz = 0; - double vk_22yx = 0; - double vj_22yx = 0; - double vk_22yy = 0; - double vj_22yy = 0; - double vk_22yz = 0; - double vj_22yz = 0; - double vk_22zx = 0; - double vj_22zx = 0; - double vk_22zy = 0; - double vj_22zy = 0; - double vk_22zz = 0; - double vj_22zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; - g3 = ak*2 * (ak*2 * trr_13x - 3 * trr_11x); - prod = g3 * 1 * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * trr_11x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * trr_11x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; - g3 = ak*2 * (ak*2 * trr_03x - 3 * trr_01x); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - g3 = ak*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * trr_01x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * trr_01x * trr_10y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_03x - 3 * trr_01x); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * trr_01x * trr_10z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - g3 = ak*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * trr_01x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * (ak*2 * trr_12x - 1 * trr_10x); - prod = g3 * trr_01y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; - g3 = ak*2 * (ak*2 * trr_03y - 3 * trr_01y); - prod = g3 * trr_10x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * trr_10x * trr_01y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * trr_11y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; - g3 = ak*2 * (ak*2 * trr_13y - 3 * trr_11y); - prod = g3 * fac * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * fac * trr_11y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * trr_01y * trr_10z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_03y - 3 * trr_01y); - prod = g3 * fac * trr_10z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * fac * trr_01y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * (ak*2 * trr_12x - 1 * trr_10x); - prod = g3 * 1 * trr_01z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * trr_10x * trr_01z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; - g3 = ak*2 * (ak*2 * trr_03z - 3 * trr_01z); - prod = g3 * trr_10x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * trr_10y * trr_01z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * fac * trr_01z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_03z - 3 * trr_01z); - prod = g3 * fac * trr_10y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * 1 * trr_11z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * fac * trr_11z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; - g3 = ak*2 * (ak*2 * trr_13z - 3 * trr_11z); - prod = g3 * fac * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ka)*9 + 0, vk_22xx); - atomicAdd(vk + (ka*natm+ka)*9 + 1, vk_22xy); - atomicAdd(vk + (ka*natm+ka)*9 + 2, vk_22xz); - atomicAdd(vk + (ka*natm+ka)*9 + 3, vk_22yx); - atomicAdd(vk + (ka*natm+ka)*9 + 4, vk_22yy); - atomicAdd(vk + (ka*natm+ka)*9 + 5, vk_22yz); - atomicAdd(vk + (ka*natm+ka)*9 + 6, vk_22zx); - atomicAdd(vk + (ka*natm+ka)*9 + 7, vk_22zy); - atomicAdd(vk + (ka*natm+ka)*9 + 8, vk_22zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ka)*9 + 0, vj_22xx); - atomicAdd(vj + (ka*natm+ka)*9 + 1, vj_22xy); - atomicAdd(vj + (ka*natm+ka)*9 + 2, vj_22xz); - atomicAdd(vj + (ka*natm+ka)*9 + 3, vj_22yx); - atomicAdd(vj + (ka*natm+ka)*9 + 4, vj_22yy); - atomicAdd(vj + (ka*natm+ka)*9 + 5, vj_22yz); - atomicAdd(vj + (ka*natm+ka)*9 + 6, vj_22zx); - atomicAdd(vj + (ka*natm+ka)*9 + 7, vj_22zy); - atomicAdd(vj + (ka*natm+ka)*9 + 8, vj_22zz); - } - - double vk_23xx = 0; - double vj_23xx = 0; - double vk_23xy = 0; - double vj_23xy = 0; - double vk_23xz = 0; - double vj_23xz = 0; - double vk_23yx = 0; - double vj_23yx = 0; - double vk_23yy = 0; - double vj_23yy = 0; - double vk_23yz = 0; - double vj_23yz = 0; - double vk_23zx = 0; - double vj_23zx = 0; - double vk_23zy = 0; - double vj_23zy = 0; - double vk_23zz = 0; - double vj_23zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; - double hrr_1021x = trr_13x - xlxk * trr_12x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g3 = al*2 * (ak*2 * hrr_1021x - 1 * hrr_1001x); - prod = g3 * 1 * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_12x; - g1 -= 1 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * trr_11x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * trr_11x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; - double hrr_0021x = trr_03x - xlxk * trr_02x; - double hrr_0001x = trr_01x - xlxk * fac; - g3 = al*2 * (ak*2 * hrr_0021x - 1 * hrr_0001x); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = al*2 * ak*2 * hrr_1011y; - prod = g3 * trr_01x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * trr_01x * trr_10y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (ak*2 * hrr_0021x - 1 * hrr_0001x); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_02x; - g1 -= 1 * fac; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * trr_01x * trr_10z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = al*2 * ak*2 * hrr_1011z; - prod = g3 * trr_01x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * ak*2 * hrr_1011x; - prod = g3 * trr_01y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; - double hrr_0021y = trr_03y - ylyk * trr_02y; - g3 = al*2 * (ak*2 * hrr_0021y - 1 * hrr_0001y); - prod = g3 * trr_10x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * trr_10x * trr_01y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * trr_11y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; - double hrr_1021y = trr_13y - ylyk * trr_12y; - g3 = al*2 * (ak*2 * hrr_1021y - 1 * hrr_1001y); - prod = g3 * fac * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_12y; - g1 -= 1 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * fac * trr_11y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * trr_01y * trr_10z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * (ak*2 * hrr_0021y - 1 * hrr_0001y); - prod = g3 * fac * trr_10z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_02y; - g1 -= 1 * 1; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_1011z; - prod = g3 * fac * trr_01y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * ak*2 * hrr_1011x; - prod = g3 * 1 * trr_01z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * trr_10x * trr_01z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; - double hrr_0021z = trr_03z - zlzk * trr_02z; - g3 = al*2 * (ak*2 * hrr_0021z - 1 * hrr_0001z); - prod = g3 * trr_10x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * trr_10y * trr_01z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_1011y; - prod = g3 * fac * trr_01z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_02z; - g1 -= 1 * wt; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * (ak*2 * hrr_0021z - 1 * hrr_0001z); - prod = g3 * fac * trr_10y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * 1 * trr_11z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * fac * trr_11z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_12z; - g1 -= 1 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; - double hrr_1021z = trr_13z - zlzk * trr_12z; - g3 = al*2 * (ak*2 * hrr_1021z - 1 * hrr_1001z); - prod = g3 * fac * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+la)*9 + 0, vk_23xx); - atomicAdd(vk + (ka*natm+la)*9 + 1, vk_23xy); - atomicAdd(vk + (ka*natm+la)*9 + 2, vk_23xz); - atomicAdd(vk + (ka*natm+la)*9 + 3, vk_23yx); - atomicAdd(vk + (ka*natm+la)*9 + 4, vk_23yy); - atomicAdd(vk + (ka*natm+la)*9 + 5, vk_23yz); - atomicAdd(vk + (ka*natm+la)*9 + 6, vk_23zx); - atomicAdd(vk + (ka*natm+la)*9 + 7, vk_23zy); - atomicAdd(vk + (ka*natm+la)*9 + 8, vk_23zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+la)*9 + 0, vj_23xx); - atomicAdd(vj + (ka*natm+la)*9 + 1, vj_23xy); - atomicAdd(vj + (ka*natm+la)*9 + 2, vj_23xz); - atomicAdd(vj + (ka*natm+la)*9 + 3, vj_23yx); - atomicAdd(vj + (ka*natm+la)*9 + 4, vj_23yy); - atomicAdd(vj + (ka*natm+la)*9 + 5, vj_23yz); - atomicAdd(vj + (ka*natm+la)*9 + 6, vj_23zx); - atomicAdd(vj + (ka*natm+la)*9 + 7, vj_23zy); - atomicAdd(vj + (ka*natm+la)*9 + 8, vj_23zz); - } - - double vk_30xx = 0; - double vj_30xx = 0; - double vk_30xy = 0; - double vj_30xy = 0; - double vk_30xz = 0; - double vj_30xz = 0; - double vk_30yx = 0; - double vj_30yx = 0; - double vk_30yy = 0; - double vj_30yy = 0; - double vk_30yz = 0; - double vj_30yz = 0; - double vk_30zx = 0; - double vj_30zx = 0; - double vk_30zy = 0; - double vj_30zy = 0; - double vk_30zz = 0; - double vj_30zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - g3 = ai*2 * al*2 * hrr_2011x; - double trr_01x = cpx * fac; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g3 -= 1 * al*2 * hrr_0011x; - prod = g3 * 1 * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g1 = al*2 * hrr_1011x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * trr_11x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_21x; - g2 -= 1 * trr_01x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * trr_11x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1011x; - prod = g3 * trr_10y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = ai*2 * al*2 * hrr_2001y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * trr_01x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * trr_01x * trr_10y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1011x; - prod = g3 * 1 * trr_10z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * trr_10z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * trr_01x * trr_10z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_11x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = ai*2 * al*2 * hrr_2001z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * trr_01x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2001x = trr_21x - xlxk * trr_20x; - g3 = ai*2 * al*2 * hrr_2001x; - double hrr_0001x = trr_01x - xlxk * fac; - g3 -= 1 * al*2 * hrr_0001x; - prod = g3 * trr_01y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_01y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g1 = al*2 * hrr_0011y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = ai*2 * al*2 * hrr_1011y; - prod = g3 * trr_10x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * trr_10x * trr_01y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * trr_11y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_11y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - g3 = ai*2 * al*2 * hrr_2011y; - g3 -= 1 * al*2 * hrr_0011y; - prod = g3 * fac * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_21y; - g2 -= 1 * trr_01y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * fac * trr_11y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * trr_01y * trr_10z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * trr_10z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_01y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1011y; - prod = g3 * fac * trr_10z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_11y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_2001z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * fac * trr_01y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * al*2 * hrr_2001x; - g3 -= 1 * al*2 * hrr_0001x; - prod = g3 * 1 * trr_01z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_01z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_01z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * trr_10x * trr_01z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g1 = al*2 * hrr_0011z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = ai*2 * al*2 * hrr_1011z; - prod = g3 * trr_10x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * trr_10y * trr_01z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_01z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * trr_10y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_01z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_2001y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * fac * trr_01z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_11z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1011z; - prod = g3 * fac * trr_10y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * 1 * trr_11z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_11z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_11z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * fac * trr_11z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_21z; - g2 -= 1 * trr_01z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - g3 = ai*2 * al*2 * hrr_2011z; - g3 -= 1 * al*2 * hrr_0011z; - prod = g3 * fac * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ia)*9 + 0, vk_30xx); - atomicAdd(vk + (la*natm+ia)*9 + 1, vk_30xy); - atomicAdd(vk + (la*natm+ia)*9 + 2, vk_30xz); - atomicAdd(vk + (la*natm+ia)*9 + 3, vk_30yx); - atomicAdd(vk + (la*natm+ia)*9 + 4, vk_30yy); - atomicAdd(vk + (la*natm+ia)*9 + 5, vk_30yz); - atomicAdd(vk + (la*natm+ia)*9 + 6, vk_30zx); - atomicAdd(vk + (la*natm+ia)*9 + 7, vk_30zy); - atomicAdd(vk + (la*natm+ia)*9 + 8, vk_30zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ia)*9 + 0, vj_30xx); - atomicAdd(vj + (la*natm+ia)*9 + 1, vj_30xy); - atomicAdd(vj + (la*natm+ia)*9 + 2, vj_30xz); - atomicAdd(vj + (la*natm+ia)*9 + 3, vj_30yx); - atomicAdd(vj + (la*natm+ia)*9 + 4, vj_30yy); - atomicAdd(vj + (la*natm+ia)*9 + 5, vj_30yz); - atomicAdd(vj + (la*natm+ia)*9 + 6, vj_30zx); - atomicAdd(vj + (la*natm+ia)*9 + 7, vj_30zy); - atomicAdd(vj + (la*natm+ia)*9 + 8, vj_30zz); - } - - double vk_31xx = 0; - double vj_31xx = 0; - double vk_31xy = 0; - double vj_31xy = 0; - double vk_31xz = 0; - double vj_31xz = 0; - double vk_31yx = 0; - double vj_31yx = 0; - double vk_31yy = 0; - double vj_31yy = 0; - double vk_31yz = 0; - double vj_31yz = 0; - double vk_31zx = 0; - double vj_31zx = 0; - double vk_31zy = 0; - double vj_31zy = 0; - double vk_31zz = 0; - double vj_31zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1111x = hrr_2011x - (rj[0] - ri[0]) * hrr_1011x; - g3 = aj*2 * al*2 * hrr_1111x; - prod = g3 * 1 * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * trr_11x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * trr_11x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0111x = hrr_1011x - (rj[0] - ri[0]) * hrr_0011x; - g3 = aj*2 * al*2 * hrr_0111x; - prod = g3 * trr_10y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = aj*2 * al*2 * hrr_1101y; - prod = g3 * trr_01x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * trr_01x * trr_10y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0111x; - prod = g3 * 1 * trr_10z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * trr_01x * trr_10z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = aj*2 * al*2 * hrr_1101z; - prod = g3 * trr_01x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g3 = aj*2 * al*2 * hrr_1101x; - prod = g3 * trr_01y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_01y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g1 = al*2 * hrr_0011y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - double hrr_0111y = hrr_1011y - (rj[1] - ri[1]) * hrr_0011y; - g3 = aj*2 * al*2 * hrr_0111y; - prod = g3 * trr_10x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * trr_10x * trr_01y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * trr_11y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_11y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1011y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - double hrr_1111y = hrr_2011y - (rj[1] - ri[1]) * hrr_1011y; - g3 = aj*2 * al*2 * hrr_1111y; - prod = g3 * fac * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * fac * trr_11y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * trr_01y * trr_10z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * trr_01y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0111y; - prod = g3 * fac * trr_10z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_1101z; - prod = g3 * fac * trr_01y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * al*2 * hrr_1101x; - prod = g3 * 1 * trr_01z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_01z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * trr_01z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * trr_10x * trr_01z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g1 = al*2 * hrr_0011z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - double hrr_0111z = hrr_1011z - (rj[2] - ri[2]) * hrr_0011z; - g3 = aj*2 * al*2 * hrr_0111z; - prod = g3 * trr_10x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * trr_10y * trr_01z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * trr_01z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_01z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_1101y; - prod = g3 * fac * trr_01z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0111z; - prod = g3 * fac * trr_10y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * 1 * trr_11z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_11z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_11z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * fac * trr_11z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - double hrr_1111z = hrr_2011z - (rj[2] - ri[2]) * hrr_1011z; - g3 = aj*2 * al*2 * hrr_1111z; - prod = g3 * fac * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ja)*9 + 0, vk_31xx); - atomicAdd(vk + (la*natm+ja)*9 + 1, vk_31xy); - atomicAdd(vk + (la*natm+ja)*9 + 2, vk_31xz); - atomicAdd(vk + (la*natm+ja)*9 + 3, vk_31yx); - atomicAdd(vk + (la*natm+ja)*9 + 4, vk_31yy); - atomicAdd(vk + (la*natm+ja)*9 + 5, vk_31yz); - atomicAdd(vk + (la*natm+ja)*9 + 6, vk_31zx); - atomicAdd(vk + (la*natm+ja)*9 + 7, vk_31zy); - atomicAdd(vk + (la*natm+ja)*9 + 8, vk_31zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ja)*9 + 0, vj_31xx); - atomicAdd(vj + (la*natm+ja)*9 + 1, vj_31xy); - atomicAdd(vj + (la*natm+ja)*9 + 2, vj_31xz); - atomicAdd(vj + (la*natm+ja)*9 + 3, vj_31yx); - atomicAdd(vj + (la*natm+ja)*9 + 4, vj_31yy); - atomicAdd(vj + (la*natm+ja)*9 + 5, vj_31yz); - atomicAdd(vj + (la*natm+ja)*9 + 6, vj_31zx); - atomicAdd(vj + (la*natm+ja)*9 + 7, vj_31zy); - atomicAdd(vj + (la*natm+ja)*9 + 8, vj_31zz); - } - - double vk_32xx = 0; - double vj_32xx = 0; - double vk_32xy = 0; - double vj_32xy = 0; - double vk_32xz = 0; - double vj_32xz = 0; - double vk_32yx = 0; - double vj_32yx = 0; - double vk_32yy = 0; - double vj_32yy = 0; - double vk_32yz = 0; - double vj_32yz = 0; - double vk_32zx = 0; - double vj_32zx = 0; - double vk_32zy = 0; - double vj_32zy = 0; - double vk_32zz = 0; - double vj_32zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; - double hrr_1021x = trr_13x - xlxk * trr_12x; - g3 = ak*2 * al*2 * hrr_1021x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - g3 -= 1 * al*2 * hrr_1001x; - prod = g3 * 1 * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_1011x = trr_12x - xlxk * trr_11x; - g1 = al*2 * hrr_1011x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * trr_11x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_12x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * trr_11x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; - double hrr_0021x = trr_03x - xlxk * trr_02x; - g3 = ak*2 * al*2 * hrr_0021x; - double hrr_0001x = trr_01x - xlxk * fac; - g3 -= 1 * al*2 * hrr_0001x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_0011x = trr_02x - xlxk * trr_01x; - g1 = al*2 * hrr_0011x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = ak*2 * al*2 * hrr_1011y; - prod = g3 * trr_01x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * trr_01x * trr_10y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0021x; - g3 -= 1 * al*2 * hrr_0001x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * trr_01x * trr_10z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_02x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = ak*2 * al*2 * hrr_1011z; - prod = g3 * trr_01x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * al*2 * hrr_1011x; - prod = g3 * trr_01y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_01y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; - double hrr_0021y = trr_03y - ylyk * trr_02y; - g3 = ak*2 * al*2 * hrr_0021y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * trr_10x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * trr_10x * trr_01y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * trr_11y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_11y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; - double hrr_1021y = trr_13y - ylyk * trr_12y; - g3 = ak*2 * al*2 * hrr_1021y; - g3 -= 1 * al*2 * hrr_1001y; - prod = g3 * fac * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_12y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * fac * trr_11y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * trr_01y * trr_10z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * trr_01y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0021y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * fac * trr_10z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_02y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_1011z; - prod = g3 * fac * trr_01y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * al*2 * hrr_1011x; - prod = g3 * 1 * trr_01z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_01z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * trr_01z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * trr_10x * trr_01z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; - double hrr_0021z = trr_03z - zlzk * trr_02z; - g3 = ak*2 * al*2 * hrr_0021z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * trr_10x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * trr_10y * trr_01z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * trr_01z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_01z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_1011y; - prod = g3 * fac * trr_01z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_02z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0021z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * fac * trr_10y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * 1 * trr_11z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_11z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_11z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * fac * trr_11z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_12z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; - double hrr_1021z = trr_13z - zlzk * trr_12z; - g3 = ak*2 * al*2 * hrr_1021z; - g3 -= 1 * al*2 * hrr_1001z; - prod = g3 * fac * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ka)*9 + 0, vk_32xx); - atomicAdd(vk + (la*natm+ka)*9 + 1, vk_32xy); - atomicAdd(vk + (la*natm+ka)*9 + 2, vk_32xz); - atomicAdd(vk + (la*natm+ka)*9 + 3, vk_32yx); - atomicAdd(vk + (la*natm+ka)*9 + 4, vk_32yy); - atomicAdd(vk + (la*natm+ka)*9 + 5, vk_32yz); - atomicAdd(vk + (la*natm+ka)*9 + 6, vk_32zx); - atomicAdd(vk + (la*natm+ka)*9 + 7, vk_32zy); - atomicAdd(vk + (la*natm+ka)*9 + 8, vk_32zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ka)*9 + 0, vj_32xx); - atomicAdd(vj + (la*natm+ka)*9 + 1, vj_32xy); - atomicAdd(vj + (la*natm+ka)*9 + 2, vj_32xz); - atomicAdd(vj + (la*natm+ka)*9 + 3, vj_32yx); - atomicAdd(vj + (la*natm+ka)*9 + 4, vj_32yy); - atomicAdd(vj + (la*natm+ka)*9 + 5, vj_32yz); - atomicAdd(vj + (la*natm+ka)*9 + 6, vj_32zx); - atomicAdd(vj + (la*natm+ka)*9 + 7, vj_32zy); - atomicAdd(vj + (la*natm+ka)*9 + 8, vj_32zz); - } - - double vk_33xx = 0; - double vj_33xx = 0; - double vk_33xy = 0; - double vj_33xy = 0; - double vk_33xz = 0; - double vj_33xz = 0; - double vk_33yx = 0; - double vj_33yx = 0; - double vk_33yy = 0; - double vj_33yy = 0; - double vk_33yz = 0; - double vj_33yz = 0; - double vk_33zx = 0; - double vj_33zx = 0; - double vk_33zy = 0; - double vj_33zy = 0; - double vk_33zz = 0; - double vj_33zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b00 = .5 * rt_aa; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double b01 = .5/akl * (1 - rt_akl); - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; - double hrr_1021x = trr_13x - xlxk * trr_12x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; - g3 = al*2 * (al*2 * hrr_1012x - 1 * trr_11x); - prod = g3 * 1 * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1011x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * trr_11x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1011x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * trr_11x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; - double hrr_0021x = trr_03x - xlxk * trr_02x; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; - g3 = al*2 * (al*2 * hrr_0012x - 1 * trr_01x); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - g3 = al*2 * (al*2 * hrr_1002y - 1 * trr_10y); - prod = g3 * trr_01x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * trr_01x * trr_10y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0012x - 1 * trr_01x); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0011x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0011x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * trr_10z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * trr_01x * trr_10z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0011x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - g3 = al*2 * (al*2 * hrr_1002z - 1 * trr_10z); - prod = g3 * trr_01x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; - g3 = al*2 * (al*2 * hrr_1002x - 1 * trr_10x); - prod = g3 * trr_01y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_01y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; - double hrr_0021y = trr_03y - ylyk * trr_02y; - double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; - g3 = al*2 * (al*2 * hrr_0012y - 1 * trr_01y); - prod = g3 * trr_10x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * trr_10x * trr_01y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * trr_11y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_11y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; - double hrr_1021y = trr_13y - ylyk * trr_12y; - double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; - g3 = al*2 * (al*2 * hrr_1012y - 1 * trr_11y); - prod = g3 * fac * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1011y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1011y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * fac * trr_11y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+1]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+1] + dm[(nao+l0+0)*nao+k0+1]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * trr_01y * trr_10z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * trr_10z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * trr_01y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0012y - 1 * trr_01y); - prod = g3 * fac * trr_10z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0011y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0011y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_1002z - 1 * trr_10z); - prod = g3 * fac * trr_01y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * (al*2 * hrr_1002x - 1 * trr_10x); - prod = g3 * 1 * trr_01z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_01z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * trr_01z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * trr_10x * trr_01z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; - double hrr_0021z = trr_03z - zlzk * trr_02z; - double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; - g3 = al*2 * (al*2 * hrr_0012z - 1 * trr_01z); - prod = g3 * trr_10x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * trr_10y * trr_01z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * trr_01z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * trr_10y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_01z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_1002y - 1 * trr_10y); - prod = g3 * fac * trr_01z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0011z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0011z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0012z - 1 * trr_01z); - prod = g3 * fac * trr_10y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+2]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+2] + dm[(nao+l0+0)*nao+k0+2]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * 1 * trr_11z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_11z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_11z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * fac * trr_11z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1011z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1011z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; - double hrr_1021z = trr_13z - zlzk * trr_12z; - double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; - g3 = al*2 * (al*2 * hrr_1012z - 1 * trr_11z); - prod = g3 * fac * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+la)*9 + 0, vk_33xx); - atomicAdd(vk + (la*natm+la)*9 + 1, vk_33xy); - atomicAdd(vk + (la*natm+la)*9 + 2, vk_33xz); - atomicAdd(vk + (la*natm+la)*9 + 3, vk_33yx); - atomicAdd(vk + (la*natm+la)*9 + 4, vk_33yy); - atomicAdd(vk + (la*natm+la)*9 + 5, vk_33yz); - atomicAdd(vk + (la*natm+la)*9 + 6, vk_33zx); - atomicAdd(vk + (la*natm+la)*9 + 7, vk_33zy); - atomicAdd(vk + (la*natm+la)*9 + 8, vk_33zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+la)*9 + 0, vj_33xx); - atomicAdd(vj + (la*natm+la)*9 + 1, vj_33xy); - atomicAdd(vj + (la*natm+la)*9 + 2, vj_33xz); - atomicAdd(vj + (la*natm+la)*9 + 3, vj_33yx); - atomicAdd(vj + (la*natm+la)*9 + 4, vj_33yy); - atomicAdd(vj + (la*natm+la)*9 + 5, vj_33yz); - atomicAdd(vj + (la*natm+la)*9 + 6, vj_33zx); - atomicAdd(vj + (la*natm+la)*9 + 7, vj_33zy); - atomicAdd(vj + (la*natm+la)*9 + 8, vj_33zz); - } - } -} -__global__ -void rys_ejk_ip2_1010(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *pool, uint32_t *batch_head) -{ - int b_id = blockIdx.x; - int t_id = threadIdx.x + blockDim.x * threadIdx.y; - ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; - __shared__ int batch_id; - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - } - __syncthreads(); - int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; - int nbatches = bounds.ntile_ij_pairs * nbatches_kl; - while (batch_id < nbatches) { - int batch_ij = batch_id / nbatches_kl; - int batch_kl = batch_id % nbatches_kl; - int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, - batch_ij, batch_kl); - if (ntasks > 0) { - int tile_ij = bounds.tile_ij_mapping[batch_ij]; - int nbas_tiles = nbas / TILE; - int tile_i = tile_ij / nbas_tiles; - int tile_j = tile_ij % nbas_tiles; - int ish0 = tile_i * TILE; - int jsh0 = tile_j * TILE; - _rys_ejk_ip2_1010(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); - } - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - atomicAdd(batch_head+1, ntasks); - } - __syncthreads(); - } -} - -__device__ static -void _rys_ejk_ip2_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) -{ - int sq_id = threadIdx.x + blockDim.x * threadIdx.y; - int nsq_per_block = blockDim.x * blockDim.y; - int iprim = bounds.iprim; - int jprim = bounds.jprim; - int kprim = bounds.kprim; - int lprim = bounds.lprim; - int *ao_loc = envs.ao_loc; - int nbas = envs.nbas; - int nao = ao_loc[nbas]; - int *bas = envs.bas; - double *env = envs.env; - double omega = env[PTR_RANGE_OMEGA]; - double *vj = jk.vj; - double *vk = jk.vk; - double *dm = jk.dm; - extern __shared__ double dm_cache[]; - double *Rpa_cicj = dm_cache + 9 * TILE2; - double *rw = Rpa_cicj + iprim*jprim*TILE2*4; - for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { - int ijp = n / TILE2; - int sh_ij = n % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int ip = ijp / jprim; - int jp = ijp % jprim; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; - double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double aj_aij = aj / aij; - double xjxi = rj[0] - ri[0]; - double yjyi = rj[1] - ri[1]; - double zjzi = rj[2] - ri[2]; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; - Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; - Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; - double theta_ij = ai * aj / aij; - double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); - Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; - } - - int ij = sq_id / TILE2; - if (ij < 9) { - int i = ij % 3; - int j = ij / 3; - int sh_ij = sq_id % TILE2; - int ish = ish0 + sh_ij / TILE; - int jsh = jsh0 + sh_ij % TILE; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - if (jk.n_dm == 1) { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i]; - } else { - dm_cache[sh_ij+ij*TILE2] = dm[(j0+j)*nao+i0+i] + dm[(nao+j0+j)*nao+i0+i]; - } - } - - for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { - __syncthreads(); - int task_id = task0 + sq_id; - double fac_sym = PI_FAC; - ShellQuartet sq; - if (task_id >= ntasks) { - // To avoid __syncthreads blocking blocking idle warps, all remaining - // threads compute a valid shell quartet with zero normalization factor - sq = shl_quartet_idx[0]; - fac_sym = 0.; - } else { - sq = shl_quartet_idx[task_id]; - } - int ish = sq.i; - int jsh = sq.j; - int ksh = sq.k; - int lsh = sq.l; - int sh_ij = (ish % TILE) * TILE + (jsh % TILE); - if (ish == jsh) fac_sym *= .5; - if (ksh == lsh) fac_sym *= .5; - if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; - int i0 = ao_loc[ish]; - int j0 = ao_loc[jsh]; - int k0 = ao_loc[ksh]; - int l0 = ao_loc[lsh]; - int natm = envs.natm; - int ia = bas[ish*BAS_SLOTS+ATOM_OF]; - int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; - int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; - int la = bas[lsh*BAS_SLOTS+ATOM_OF]; - double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; - double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; - double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; - double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; - double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; - double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; - double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; - double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; - double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; - double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; - double dd_jk, dd_jl, vj_dd, vk_dd; - double g1, g2, g3, prod; - - - double vk_00xx = 0; - double vj_00xx = 0; - double vk_00xy = 0; - double vj_00xy = 0; - double vk_00xz = 0; - double vj_00xz = 0; - double vk_00yx = 0; - double vj_00yx = 0; - double vk_00yy = 0; - double vj_00yy = 0; - double vk_00yz = 0; - double vj_00yz = 0; - double vk_00zx = 0; - double vj_00zx = 0; - double vk_00zy = 0; - double vj_00zy = 0; - double vk_00zz = 0; - double vj_00zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; - double hrr_3100x = trr_40x - (rj[0] - ri[0]) * trr_30x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = ai*2 * (ai*2 * hrr_3100x - 3 * hrr_1100x); - prod = g3 * 1 * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g1 = ai*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 -= 1 * hrr_0100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * hrr_2100x; - g1 -= 1 * hrr_0100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * hrr_1100x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * hrr_1100x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - prod = g3 * trr_10y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - g3 = ai*2 * (ai*2 * trr_30y - 3 * trr_10y); - prod = g3 * hrr_0100x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * hrr_0100x * trr_10y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - prod = g3 * 1 * trr_10z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * hrr_0100x * trr_10z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - g3 = ai*2 * (ai*2 * trr_30z - 3 * trr_10z); - prod = g3 * hrr_0100x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_30x - 3 * trr_10x); - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = ai*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * trr_10x * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * trr_10x * hrr_0100y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * hrr_1100y * wt; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * wt; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; - double hrr_3100y = trr_40y - (rj[1] - ri[1]) * trr_30y; - g3 = ai*2 * (ai*2 * hrr_3100y - 3 * hrr_1100y); - prod = g3 * fac * wt; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * hrr_1100y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * hrr_0100y * trr_10z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * fac * trr_10z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_30z - 3 * trr_10z); - prod = g3 * fac * hrr_0100y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_30x - 3 * trr_10x); - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * trr_10x * hrr_0100z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10x; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = ai*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * trr_10x * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * trr_10y * hrr_0100z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10y; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_30y - 3 * trr_10y); - prod = g3 * fac * hrr_0100z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - g3 = ai*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * fac * trr_10y; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * hrr_1100z; - vk_00xx += prod * vk_dd; - vj_00xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100z; - vk_00xy += prod * vk_dd; - vj_00xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * 1; - vk_00xz += prod * vk_dd; - vj_00xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100z; - vk_00yx += prod * vk_dd; - vj_00yx += prod * vj_dd; - g3 = ai*2 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * hrr_1100z; - vk_00yy += prod * vk_dd; - vj_00yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * fac; - vk_00yz += prod * vk_dd; - vj_00yz += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_00zx += prod * vk_dd; - vj_00zx += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_00zy += prod * vk_dd; - vj_00zy += prod * vj_dd; - double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; - double hrr_3100z = trr_40z - (rj[2] - ri[2]) * trr_30z; - g3 = ai*2 * (ai*2 * hrr_3100z - 3 * hrr_1100z); - prod = g3 * fac * 1; - vk_00zz += prod * vk_dd; - vj_00zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ia)*9 + 0, vk_00xx); - atomicAdd(vk + (ia*natm+ia)*9 + 1, vk_00xy); - atomicAdd(vk + (ia*natm+ia)*9 + 2, vk_00xz); - atomicAdd(vk + (ia*natm+ia)*9 + 3, vk_00yx); - atomicAdd(vk + (ia*natm+ia)*9 + 4, vk_00yy); - atomicAdd(vk + (ia*natm+ia)*9 + 5, vk_00yz); - atomicAdd(vk + (ia*natm+ia)*9 + 6, vk_00zx); - atomicAdd(vk + (ia*natm+ia)*9 + 7, vk_00zy); - atomicAdd(vk + (ia*natm+ia)*9 + 8, vk_00zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ia)*9 + 0, vj_00xx); - atomicAdd(vj + (ia*natm+ia)*9 + 1, vj_00xy); - atomicAdd(vj + (ia*natm+ia)*9 + 2, vj_00xz); - atomicAdd(vj + (ia*natm+ia)*9 + 3, vj_00yx); - atomicAdd(vj + (ia*natm+ia)*9 + 4, vj_00yy); - atomicAdd(vj + (ia*natm+ia)*9 + 5, vj_00yz); - atomicAdd(vj + (ia*natm+ia)*9 + 6, vj_00zx); - atomicAdd(vj + (ia*natm+ia)*9 + 7, vj_00zy); - atomicAdd(vj + (ia*natm+ia)*9 + 8, vj_00zz); - } - - double vk_01xx = 0; - double vj_01xx = 0; - double vk_01xy = 0; - double vj_01xy = 0; - double vk_01xz = 0; - double vj_01xz = 0; - double vk_01yx = 0; - double vj_01yx = 0; - double vk_01yy = 0; - double vj_01yy = 0; - double vk_01yz = 0; - double vj_01yz = 0; - double vk_01zx = 0; - double vj_01zx = 0; - double vk_01zy = 0; - double vj_01zy = 0; - double vk_01zz = 0; - double vj_01zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; - double hrr_3100x = trr_40x - (rj[0] - ri[0]) * trr_30x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_2200x = hrr_3100x - (rj[0] - ri[0]) * hrr_2100x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g3 = aj*2 * (ai*2 * hrr_2200x - 1 * hrr_0200x); - g3 -= 1 * (ai*2 * trr_20x - 1 * fac); - prod = g3 * 1 * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * hrr_2100x; - g1 -= 1 * hrr_0100x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * hrr_2100x; - g1 -= 1 * hrr_0100x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * hrr_1100x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * hrr_1100x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1200x; - g3 -= 1 * ai*2 * trr_10x; - prod = g3 * trr_10y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = aj*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * hrr_0100x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * hrr_0100x * trr_10y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1200x; - g3 -= 1 * ai*2 * trr_10x; - prod = g3 * 1 * trr_10z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * hrr_0100x * trr_10z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = aj*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * hrr_0100x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - prod = g3 * hrr_0100y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g3 = aj*2 * ai*2 * hrr_1200y; - g3 -= 1 * ai*2 * trr_10y; - prod = g3 * trr_10x * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * trr_10x * hrr_0100y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * hrr_1100y * wt; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; - double hrr_3100y = trr_40y - (rj[1] - ri[1]) * trr_30y; - double hrr_2200y = hrr_3100y - (rj[1] - ri[1]) * hrr_2100y; - g3 = aj*2 * (ai*2 * hrr_2200y - 1 * hrr_0200y); - g3 -= 1 * (ai*2 * trr_20y - 1 * 1); - prod = g3 * fac * wt; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100z; - prod = g3 * fac * hrr_1100y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * hrr_0100y * trr_10z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1200y; - g3 -= 1 * ai*2 * trr_10y; - prod = g3 * fac * trr_10z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * (ai*2 * hrr_2100z - 1 * hrr_0100z); - prod = g3 * fac * hrr_0100y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (ai*2 * hrr_2100x - 1 * hrr_0100x); - prod = g3 * 1 * hrr_0100z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * trr_10x * hrr_0100z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g3 = aj*2 * ai*2 * hrr_1200z; - g3 -= 1 * ai*2 * trr_10z; - prod = g3 * trr_10x * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * trr_10y * hrr_0100z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * (ai*2 * hrr_2100y - 1 * hrr_0100y); - prod = g3 * fac * hrr_0100z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1200z; - g3 -= 1 * ai*2 * trr_10z; - prod = g3 * fac * trr_10y; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ai*2 * hrr_1100x; - prod = g3 * 1 * hrr_1100z; - vk_01xx += prod * vk_dd; - vj_01xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100z; - vk_01xy += prod * vk_dd; - vj_01xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_01xz += prod * vk_dd; - vj_01xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100z; - vk_01yx += prod * vk_dd; - vj_01yx += prod * vj_dd; - g3 = aj*2 * ai*2 * hrr_1100y; - prod = g3 * fac * hrr_1100z; - vk_01yy += prod * vk_dd; - vj_01yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_01yz += prod * vk_dd; - vj_01yz += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_01zx += prod * vk_dd; - vj_01zx += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_01zy += prod * vk_dd; - vj_01zy += prod * vj_dd; - double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; - double hrr_3100z = trr_40z - (rj[2] - ri[2]) * trr_30z; - double hrr_2200z = hrr_3100z - (rj[2] - ri[2]) * hrr_2100z; - g3 = aj*2 * (ai*2 * hrr_2200z - 1 * hrr_0200z); - g3 -= 1 * (ai*2 * trr_20z - 1 * wt); - prod = g3 * fac * 1; - vk_01zz += prod * vk_dd; - vj_01zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ja)*9 + 0, vk_01xx); - atomicAdd(vk + (ia*natm+ja)*9 + 1, vk_01xy); - atomicAdd(vk + (ia*natm+ja)*9 + 2, vk_01xz); - atomicAdd(vk + (ia*natm+ja)*9 + 3, vk_01yx); - atomicAdd(vk + (ia*natm+ja)*9 + 4, vk_01yy); - atomicAdd(vk + (ia*natm+ja)*9 + 5, vk_01yz); - atomicAdd(vk + (ia*natm+ja)*9 + 6, vk_01zx); - atomicAdd(vk + (ia*natm+ja)*9 + 7, vk_01zy); - atomicAdd(vk + (ia*natm+ja)*9 + 8, vk_01zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ja)*9 + 0, vj_01xx); - atomicAdd(vj + (ia*natm+ja)*9 + 1, vj_01xy); - atomicAdd(vj + (ia*natm+ja)*9 + 2, vj_01xz); - atomicAdd(vj + (ia*natm+ja)*9 + 3, vj_01yx); - atomicAdd(vj + (ia*natm+ja)*9 + 4, vj_01yy); - atomicAdd(vj + (ia*natm+ja)*9 + 5, vj_01yz); - atomicAdd(vj + (ia*natm+ja)*9 + 6, vj_01zx); - atomicAdd(vj + (ia*natm+ja)*9 + 7, vj_01zy); - atomicAdd(vj + (ia*natm+ja)*9 + 8, vj_01zz); - } - - double vk_02xx = 0; - double vj_02xx = 0; - double vk_02xy = 0; - double vj_02xy = 0; - double vk_02xz = 0; - double vj_02xz = 0; - double vk_02yx = 0; - double vj_02yx = 0; - double vk_02yy = 0; - double vj_02yy = 0; - double vk_02yz = 0; - double vj_02yz = 0; - double vk_02zx = 0; - double vj_02zx = 0; - double vk_02zy = 0; - double vj_02zy = 0; - double vk_02zz = 0; - double vj_02zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 = ak*2 * (ai*2 * hrr_2110x - 1 * hrr_0110x); - prod = g3 * 1 * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g1 = ai*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 -= 1 * hrr_0100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * hrr_2100x; - g1 -= 1 * hrr_0100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ak*2 * ai*2 * trr_11y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * hrr_1100x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * hrr_1110x; - prod = g3 * trr_10y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ak*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * hrr_0100x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * hrr_0100x * trr_10y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * hrr_1110x; - prod = g3 * 1 * trr_10z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * hrr_0100x * trr_10z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ak*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * hrr_0100x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ai*2 * trr_21x - 1 * trr_01x); - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = ai*2 * hrr_1100y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ak*2 * ai*2 * hrr_1110y; - prod = g3 * trr_10x * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * trr_10x * hrr_0100y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * hrr_1100y * wt; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - g3 = ak*2 * (ai*2 * hrr_2110y - 1 * hrr_0110y); - prod = g3 * fac * wt; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11z; - prod = g3 * fac * hrr_1100y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * hrr_0100y * trr_10z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * hrr_1110y; - prod = g3 * fac * trr_10z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * (ai*2 * trr_21z - 1 * trr_01z); - prod = g3 * fac * hrr_0100y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ai*2 * trr_21x - 1 * trr_01x); - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * trr_10x * hrr_0100z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = ai*2 * hrr_1100z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ak*2 * ai*2 * hrr_1110z; - prod = g3 * trr_10x * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * trr_10y * hrr_0100z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * (ai*2 * trr_21y - 1 * trr_01y); - prod = g3 * fac * hrr_0100z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - g3 = ak*2 * ai*2 * hrr_1110z; - prod = g3 * fac * trr_10y; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * ai*2 * trr_11x; - prod = g3 * 1 * hrr_1100z; - vk_02xx += prod * vk_dd; - vj_02xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100z; - vk_02xy += prod * vk_dd; - vj_02xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_02xz += prod * vk_dd; - vj_02xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100z; - vk_02yx += prod * vk_dd; - vj_02yx += prod * vj_dd; - g3 = ak*2 * ai*2 * trr_11y; - prod = g3 * fac * hrr_1100z; - vk_02yy += prod * vk_dd; - vj_02yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_02yz += prod * vk_dd; - vj_02yz += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_02zx += prod * vk_dd; - vj_02zx += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_02zy += prod * vk_dd; - vj_02zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - g3 = ak*2 * (ai*2 * hrr_2110z - 1 * hrr_0110z); - prod = g3 * fac * 1; - vk_02zz += prod * vk_dd; - vj_02zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+ka)*9 + 0, vk_02xx); - atomicAdd(vk + (ia*natm+ka)*9 + 1, vk_02xy); - atomicAdd(vk + (ia*natm+ka)*9 + 2, vk_02xz); - atomicAdd(vk + (ia*natm+ka)*9 + 3, vk_02yx); - atomicAdd(vk + (ia*natm+ka)*9 + 4, vk_02yy); - atomicAdd(vk + (ia*natm+ka)*9 + 5, vk_02yz); - atomicAdd(vk + (ia*natm+ka)*9 + 6, vk_02zx); - atomicAdd(vk + (ia*natm+ka)*9 + 7, vk_02zy); - atomicAdd(vk + (ia*natm+ka)*9 + 8, vk_02zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+ka)*9 + 0, vj_02xx); - atomicAdd(vj + (ia*natm+ka)*9 + 1, vj_02xy); - atomicAdd(vj + (ia*natm+ka)*9 + 2, vj_02xz); - atomicAdd(vj + (ia*natm+ka)*9 + 3, vj_02yx); - atomicAdd(vj + (ia*natm+ka)*9 + 4, vj_02yy); - atomicAdd(vj + (ia*natm+ka)*9 + 5, vj_02yz); - atomicAdd(vj + (ia*natm+ka)*9 + 6, vj_02zx); - atomicAdd(vj + (ia*natm+ka)*9 + 7, vj_02zy); - atomicAdd(vj + (ia*natm+ka)*9 + 8, vj_02zz); - } - - double vk_03xx = 0; - double vj_03xx = 0; - double vk_03xy = 0; - double vj_03xy = 0; - double vk_03xz = 0; - double vj_03xz = 0; - double vk_03yx = 0; - double vj_03yx = 0; - double vk_03yy = 0; - double vj_03yy = 0; - double vk_03yz = 0; - double vj_03yz = 0; - double vk_03zx = 0; - double vj_03zx = 0; - double vk_03zy = 0; - double vj_03zy = 0; - double vk_03zz = 0; - double vj_03zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double hrr_3001x = trr_31x - xlxk * trr_30x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_2101x = hrr_3001x - (rj[0] - ri[0]) * hrr_2001x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 = al*2 * (ai*2 * hrr_2101x - 1 * hrr_0101x); - prod = g3 * 1 * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g1 = ai*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g1 -= 1 * hrr_0100x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * hrr_2100x; - g1 -= 1 * hrr_0100x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g1 = ai*2 * trr_10y; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = al*2 * ai*2 * hrr_1001y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * hrr_1100x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1101x; - prod = g3 * trr_10y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = al*2 * (ai*2 * hrr_2001y - 1 * hrr_0001y); - prod = g3 * hrr_0100x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * hrr_0100x * trr_10y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1101x; - prod = g3 * 1 * trr_10z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * hrr_1100x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * hrr_0100x * trr_10z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = al*2 * (ai*2 * hrr_2001z - 1 * hrr_0001z); - prod = g3 * hrr_0100x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (ai*2 * hrr_2001x - 1 * hrr_0001x); - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = ai*2 * hrr_1100y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = al*2 * ai*2 * hrr_1101y; - prod = g3 * trr_10x * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * trr_10x * hrr_0100y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * hrr_1100y * wt; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * wt; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_3001y = trr_31y - ylyk * trr_30y; - double hrr_2101y = hrr_3001y - (rj[1] - ri[1]) * hrr_2001y; - g3 = al*2 * (ai*2 * hrr_2101y - 1 * hrr_0101y); - prod = g3 * fac * wt; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * hrr_2100y; - g1 -= 1 * hrr_0100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_10z; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001z; - prod = g3 * fac * hrr_1100y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * hrr_0100y * trr_10z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1101y; - prod = g3 * fac * trr_10z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * hrr_1100y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * trr_20z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * (ai*2 * hrr_2001z - 1 * hrr_0001z); - prod = g3 * fac * hrr_0100y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (ai*2 * hrr_2001x - 1 * hrr_0001x); - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_20x; - g1 -= 1 * fac; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * trr_10x * hrr_0100z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10x; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = ai*2 * hrr_1100z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = al*2 * ai*2 * hrr_1101z; - prod = g3 * trr_10x * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * trr_10y * hrr_0100z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10y; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * (ai*2 * hrr_2001y - 1 * hrr_0001y); - prod = g3 * fac * hrr_0100z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_20y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * hrr_1100z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1101z; - prod = g3 * fac * trr_10y; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ai*2 * hrr_1001x; - prod = g3 * 1 * hrr_1100z; - vk_03xx += prod * vk_dd; - vj_03xx += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100z; - vk_03xy += prod * vk_dd; - vj_03xy += prod * vj_dd; - g1 = ai*2 * trr_10x; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * 1; - vk_03xz += prod * vk_dd; - vj_03xz += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100z; - vk_03yx += prod * vk_dd; - vj_03yx += prod * vj_dd; - g3 = al*2 * ai*2 * hrr_1001y; - prod = g3 * fac * hrr_1100z; - vk_03yy += prod * vk_dd; - vj_03yy += prod * vj_dd; - g1 = ai*2 * trr_10y; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * fac; - vk_03yz += prod * vk_dd; - vj_03yz += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_03zx += prod * vk_dd; - vj_03zx += prod * vj_dd; - g1 = ai*2 * hrr_2100z; - g1 -= 1 * hrr_0100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_03zy += prod * vk_dd; - vj_03zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_3001z = trr_31z - zlzk * trr_30z; - double hrr_2101z = hrr_3001z - (rj[2] - ri[2]) * hrr_2001z; - g3 = al*2 * (ai*2 * hrr_2101z - 1 * hrr_0101z); - prod = g3 * fac * 1; - vk_03zz += prod * vk_dd; - vj_03zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ia*natm+la)*9 + 0, vk_03xx); - atomicAdd(vk + (ia*natm+la)*9 + 1, vk_03xy); - atomicAdd(vk + (ia*natm+la)*9 + 2, vk_03xz); - atomicAdd(vk + (ia*natm+la)*9 + 3, vk_03yx); - atomicAdd(vk + (ia*natm+la)*9 + 4, vk_03yy); - atomicAdd(vk + (ia*natm+la)*9 + 5, vk_03yz); - atomicAdd(vk + (ia*natm+la)*9 + 6, vk_03zx); - atomicAdd(vk + (ia*natm+la)*9 + 7, vk_03zy); - atomicAdd(vk + (ia*natm+la)*9 + 8, vk_03zz); - } - if (vj != NULL) { - atomicAdd(vj + (ia*natm+la)*9 + 0, vj_03xx); - atomicAdd(vj + (ia*natm+la)*9 + 1, vj_03xy); - atomicAdd(vj + (ia*natm+la)*9 + 2, vj_03xz); - atomicAdd(vj + (ia*natm+la)*9 + 3, vj_03yx); - atomicAdd(vj + (ia*natm+la)*9 + 4, vj_03yy); - atomicAdd(vj + (ia*natm+la)*9 + 5, vj_03yz); - atomicAdd(vj + (ia*natm+la)*9 + 6, vj_03zx); - atomicAdd(vj + (ia*natm+la)*9 + 7, vj_03zy); - atomicAdd(vj + (ia*natm+la)*9 + 8, vj_03zz); - } - - double vk_10xx = 0; - double vj_10xx = 0; - double vk_10xy = 0; - double vj_10xy = 0; - double vk_10xz = 0; - double vj_10xz = 0; - double vk_10yx = 0; - double vj_10yx = 0; - double vk_10yy = 0; - double vj_10yy = 0; - double vk_10yz = 0; - double vj_10yz = 0; - double vk_10zx = 0; - double vj_10zx = 0; - double vk_10zy = 0; - double vj_10zy = 0; - double vk_10zz = 0; - double vj_10zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; - double hrr_3100x = trr_40x - (rj[0] - ri[0]) * trr_30x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_2200x = hrr_3100x - (rj[0] - ri[0]) * hrr_2100x; - g3 = ai*2 * (aj*2 * hrr_2200x - 1 * trr_20x); - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g3 -= 1 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * 1 * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * hrr_1100x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * hrr_1100x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - prod = g3 * trr_10y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g3 = ai*2 * aj*2 * hrr_2100y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * hrr_0100x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * hrr_0100x * trr_10y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - prod = g3 * 1 * trr_10z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * hrr_0100x * trr_10z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g3 = ai*2 * aj*2 * hrr_2100z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * hrr_0100x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_2100x; - g3 -= 1 * aj*2 * hrr_0100x; - prod = g3 * hrr_0100y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g3 = ai*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * trr_10x * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * trr_10x * hrr_0100y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * hrr_1100y * wt; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * wt; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; - double hrr_3100y = trr_40y - (rj[1] - ri[1]) * trr_30y; - double hrr_2200y = hrr_3100y - (rj[1] - ri[1]) * hrr_2100y; - g3 = ai*2 * (aj*2 * hrr_2200y - 1 * trr_20y); - g3 -= 1 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * fac * wt; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100z; - prod = g3 * fac * hrr_1100y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * hrr_0100y * trr_10z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * fac * trr_10z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_2100z; - g3 -= 1 * aj*2 * hrr_0100z; - prod = g3 * fac * hrr_0100y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_2100x; - g3 -= 1 * aj*2 * hrr_0100x; - prod = g3 * 1 * hrr_0100z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * trr_10x * hrr_0100z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10x; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g3 = ai*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * trr_10x * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * trr_10y * hrr_0100z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10y; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_2100y; - g3 -= 1 * aj*2 * hrr_0100y; - prod = g3 * fac * hrr_0100z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - g3 = ai*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * fac * trr_10y; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * aj*2 * hrr_1100x; - prod = g3 * 1 * hrr_1100z; - vk_10xx += prod * vk_dd; - vj_10xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100z; - vk_10xy += prod * vk_dd; - vj_10xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * 1; - vk_10xz += prod * vk_dd; - vj_10xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100z; - vk_10yx += prod * vk_dd; - vj_10yx += prod * vj_dd; - g3 = ai*2 * aj*2 * hrr_1100y; - prod = g3 * fac * hrr_1100z; - vk_10yy += prod * vk_dd; - vj_10yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * fac; - vk_10yz += prod * vk_dd; - vj_10yz += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_10zx += prod * vk_dd; - vj_10zx += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_10zy += prod * vk_dd; - vj_10zy += prod * vj_dd; - double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; - double hrr_3100z = trr_40z - (rj[2] - ri[2]) * trr_30z; - double hrr_2200z = hrr_3100z - (rj[2] - ri[2]) * hrr_2100z; - g3 = ai*2 * (aj*2 * hrr_2200z - 1 * trr_20z); - g3 -= 1 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * fac * 1; - vk_10zz += prod * vk_dd; - vj_10zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ia)*9 + 0, vk_10xx); - atomicAdd(vk + (ja*natm+ia)*9 + 1, vk_10xy); - atomicAdd(vk + (ja*natm+ia)*9 + 2, vk_10xz); - atomicAdd(vk + (ja*natm+ia)*9 + 3, vk_10yx); - atomicAdd(vk + (ja*natm+ia)*9 + 4, vk_10yy); - atomicAdd(vk + (ja*natm+ia)*9 + 5, vk_10yz); - atomicAdd(vk + (ja*natm+ia)*9 + 6, vk_10zx); - atomicAdd(vk + (ja*natm+ia)*9 + 7, vk_10zy); - atomicAdd(vk + (ja*natm+ia)*9 + 8, vk_10zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ia)*9 + 0, vj_10xx); - atomicAdd(vj + (ja*natm+ia)*9 + 1, vj_10xy); - atomicAdd(vj + (ja*natm+ia)*9 + 2, vj_10xz); - atomicAdd(vj + (ja*natm+ia)*9 + 3, vj_10yx); - atomicAdd(vj + (ja*natm+ia)*9 + 4, vj_10yy); - atomicAdd(vj + (ja*natm+ia)*9 + 5, vj_10yz); - atomicAdd(vj + (ja*natm+ia)*9 + 6, vj_10zx); - atomicAdd(vj + (ja*natm+ia)*9 + 7, vj_10zy); - atomicAdd(vj + (ja*natm+ia)*9 + 8, vj_10zz); - } - - double vk_11xx = 0; - double vj_11xx = 0; - double vk_11xy = 0; - double vj_11xy = 0; - double vk_11xz = 0; - double vj_11xz = 0; - double vk_11yx = 0; - double vj_11yx = 0; - double vk_11yy = 0; - double vj_11yy = 0; - double vk_11yz = 0; - double vj_11yz = 0; - double vk_11zx = 0; - double vj_11zx = 0; - double vk_11zy = 0; - double vj_11zy = 0; - double vk_11zz = 0; - double vj_11zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; - double hrr_3100x = trr_40x - (rj[0] - ri[0]) * trr_30x; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_2200x = hrr_3100x - (rj[0] - ri[0]) * hrr_2100x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - double hrr_1300x = hrr_2200x - (rj[0] - ri[0]) * hrr_1200x; - g3 = aj*2 * (aj*2 * hrr_1300x - 3 * hrr_1100x); - prod = g3 * 1 * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * hrr_1100x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * hrr_1100x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - double hrr_0300x = hrr_1200x - (rj[0] - ri[0]) * hrr_0200x; - g3 = aj*2 * (aj*2 * hrr_0300x - 3 * hrr_0100x); - prod = g3 * trr_10y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g3 = aj*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * hrr_0100x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * hrr_0100x * trr_10y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0300x - 3 * hrr_0100x); - prod = g3 * 1 * trr_10z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * hrr_0100x * trr_10z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g3 = aj*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * hrr_0100x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - prod = g3 * hrr_0100y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double hrr_0300y = hrr_1200y - (rj[1] - ri[1]) * hrr_0200y; - g3 = aj*2 * (aj*2 * hrr_0300y - 3 * hrr_0100y); - prod = g3 * trr_10x * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * trr_10x * hrr_0100y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * hrr_1100y * wt; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; - double hrr_3100y = trr_40y - (rj[1] - ri[1]) * trr_30y; - double hrr_2200y = hrr_3100y - (rj[1] - ri[1]) * hrr_2100y; - double hrr_1300y = hrr_2200y - (rj[1] - ri[1]) * hrr_1200y; - g3 = aj*2 * (aj*2 * hrr_1300y - 3 * hrr_1100y); - prod = g3 * fac * wt; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200z - 1 * wt); - prod = g3 * fac * hrr_1100y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * hrr_0100y * trr_10z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0300y - 3 * hrr_0100y); - prod = g3 * fac * trr_10z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_1200z - 1 * trr_10z); - prod = g3 * fac * hrr_0100y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_1200x - 1 * trr_10x); - prod = g3 * 1 * hrr_0100z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * trr_10x * hrr_0100z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double hrr_0300z = hrr_1200z - (rj[2] - ri[2]) * hrr_0200z; - g3 = aj*2 * (aj*2 * hrr_0300z - 3 * hrr_0100z); - prod = g3 * trr_10x * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * trr_10y * hrr_0100z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_1200y - 1 * trr_10y); - prod = g3 * fac * hrr_0100z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0300z - 3 * hrr_0100z); - prod = g3 * fac * trr_10y; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * (aj*2 * hrr_0200x - 1 * fac); - prod = g3 * 1 * hrr_1100z; - vk_11xx += prod * vk_dd; - vj_11xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100z; - vk_11xy += prod * vk_dd; - vj_11xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_11xz += prod * vk_dd; - vj_11xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100z; - vk_11yx += prod * vk_dd; - vj_11yx += prod * vj_dd; - g3 = aj*2 * (aj*2 * hrr_0200y - 1 * 1); - prod = g3 * fac * hrr_1100z; - vk_11yy += prod * vk_dd; - vj_11yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_11yz += prod * vk_dd; - vj_11yz += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_11zx += prod * vk_dd; - vj_11zx += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_11zy += prod * vk_dd; - vj_11zy += prod * vj_dd; - double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; - double hrr_3100z = trr_40z - (rj[2] - ri[2]) * trr_30z; - double hrr_2200z = hrr_3100z - (rj[2] - ri[2]) * hrr_2100z; - double hrr_1300z = hrr_2200z - (rj[2] - ri[2]) * hrr_1200z; - g3 = aj*2 * (aj*2 * hrr_1300z - 3 * hrr_1100z); - prod = g3 * fac * 1; - vk_11zz += prod * vk_dd; - vj_11zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ja)*9 + 0, vk_11xx); - atomicAdd(vk + (ja*natm+ja)*9 + 1, vk_11xy); - atomicAdd(vk + (ja*natm+ja)*9 + 2, vk_11xz); - atomicAdd(vk + (ja*natm+ja)*9 + 3, vk_11yx); - atomicAdd(vk + (ja*natm+ja)*9 + 4, vk_11yy); - atomicAdd(vk + (ja*natm+ja)*9 + 5, vk_11yz); - atomicAdd(vk + (ja*natm+ja)*9 + 6, vk_11zx); - atomicAdd(vk + (ja*natm+ja)*9 + 7, vk_11zy); - atomicAdd(vk + (ja*natm+ja)*9 + 8, vk_11zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ja)*9 + 0, vj_11xx); - atomicAdd(vj + (ja*natm+ja)*9 + 1, vj_11xy); - atomicAdd(vj + (ja*natm+ja)*9 + 2, vj_11xz); - atomicAdd(vj + (ja*natm+ja)*9 + 3, vj_11yx); - atomicAdd(vj + (ja*natm+ja)*9 + 4, vj_11yy); - atomicAdd(vj + (ja*natm+ja)*9 + 5, vj_11yz); - atomicAdd(vj + (ja*natm+ja)*9 + 6, vj_11zx); - atomicAdd(vj + (ja*natm+ja)*9 + 7, vj_11zy); - atomicAdd(vj + (ja*natm+ja)*9 + 8, vj_11zz); - } - - double vk_12xx = 0; - double vj_12xx = 0; - double vk_12xy = 0; - double vj_12xy = 0; - double vk_12xz = 0; - double vj_12xz = 0; - double vk_12yx = 0; - double vj_12yx = 0; - double vk_12yy = 0; - double vj_12yy = 0; - double vk_12yz = 0; - double vj_12yz = 0; - double vk_12zx = 0; - double vj_12zx = 0; - double vk_12zy = 0; - double vj_12zy = 0; - double vk_12zz = 0; - double vj_12zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - double hrr_1210x = hrr_2110x - (rj[0] - ri[0]) * hrr_1110x; - g3 = ak*2 * (aj*2 * hrr_1210x - 1 * trr_11x); - prod = g3 * 1 * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * hrr_1100x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * hrr_1100x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - double hrr_0210x = hrr_1110x - (rj[0] - ri[0]) * hrr_0110x; - g3 = ak*2 * (aj*2 * hrr_0210x - 1 * trr_01x); - prod = g3 * trr_10y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ak*2 * aj*2 * hrr_1110y; - prod = g3 * hrr_0100x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * hrr_0100x * trr_10y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (aj*2 * hrr_0210x - 1 * trr_01x); - prod = g3 * 1 * trr_10z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * hrr_0100x * trr_10z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ak*2 * aj*2 * hrr_1110z; - prod = g3 * hrr_0100x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_1110x; - prod = g3 * hrr_0100y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double hrr_0210y = hrr_1110y - (rj[1] - ri[1]) * hrr_0110y; - g3 = ak*2 * (aj*2 * hrr_0210y - 1 * trr_01y); - prod = g3 * trr_10x * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * trr_10x * hrr_0100y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * hrr_1100y * wt; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - double hrr_1210y = hrr_2110y - (rj[1] - ri[1]) * hrr_1110y; - g3 = ak*2 * (aj*2 * hrr_1210y - 1 * trr_11y); - prod = g3 * fac * wt; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110z; - prod = g3 * fac * hrr_1100y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * hrr_0100y * trr_10z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * (aj*2 * hrr_0210y - 1 * trr_01y); - prod = g3 * fac * trr_10z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_1110z; - prod = g3 * fac * hrr_0100y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_1110x; - prod = g3 * 1 * hrr_0100z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * trr_10x * hrr_0100z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double hrr_0210z = hrr_1110z - (rj[2] - ri[2]) * hrr_0110z; - g3 = ak*2 * (aj*2 * hrr_0210z - 1 * trr_01z); - prod = g3 * trr_10x * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * trr_10y * hrr_0100z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_1110y; - prod = g3 * fac * hrr_0100z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - g3 = ak*2 * (aj*2 * hrr_0210z - 1 * trr_01z); - prod = g3 * fac * trr_10y; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * aj*2 * hrr_0110x; - prod = g3 * 1 * hrr_1100z; - vk_12xx += prod * vk_dd; - vj_12xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100z; - vk_12xy += prod * vk_dd; - vj_12xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_12xz += prod * vk_dd; - vj_12xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100z; - vk_12yx += prod * vk_dd; - vj_12yx += prod * vj_dd; - g3 = ak*2 * aj*2 * hrr_0110y; - prod = g3 * fac * hrr_1100z; - vk_12yy += prod * vk_dd; - vj_12yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_12yz += prod * vk_dd; - vj_12yz += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_12zx += prod * vk_dd; - vj_12zx += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_12zy += prod * vk_dd; - vj_12zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - double hrr_1210z = hrr_2110z - (rj[2] - ri[2]) * hrr_1110z; - g3 = ak*2 * (aj*2 * hrr_1210z - 1 * trr_11z); - prod = g3 * fac * 1; - vk_12zz += prod * vk_dd; - vj_12zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+ka)*9 + 0, vk_12xx); - atomicAdd(vk + (ja*natm+ka)*9 + 1, vk_12xy); - atomicAdd(vk + (ja*natm+ka)*9 + 2, vk_12xz); - atomicAdd(vk + (ja*natm+ka)*9 + 3, vk_12yx); - atomicAdd(vk + (ja*natm+ka)*9 + 4, vk_12yy); - atomicAdd(vk + (ja*natm+ka)*9 + 5, vk_12yz); - atomicAdd(vk + (ja*natm+ka)*9 + 6, vk_12zx); - atomicAdd(vk + (ja*natm+ka)*9 + 7, vk_12zy); - atomicAdd(vk + (ja*natm+ka)*9 + 8, vk_12zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+ka)*9 + 0, vj_12xx); - atomicAdd(vj + (ja*natm+ka)*9 + 1, vj_12xy); - atomicAdd(vj + (ja*natm+ka)*9 + 2, vj_12xz); - atomicAdd(vj + (ja*natm+ka)*9 + 3, vj_12yx); - atomicAdd(vj + (ja*natm+ka)*9 + 4, vj_12yy); - atomicAdd(vj + (ja*natm+ka)*9 + 5, vj_12yz); - atomicAdd(vj + (ja*natm+ka)*9 + 6, vj_12zx); - atomicAdd(vj + (ja*natm+ka)*9 + 7, vj_12zy); - atomicAdd(vj + (ja*natm+ka)*9 + 8, vj_12zz); - } - - double vk_13xx = 0; - double vj_13xx = 0; - double vk_13xy = 0; - double vj_13xy = 0; - double vk_13xz = 0; - double vj_13xz = 0; - double vk_13yx = 0; - double vj_13yx = 0; - double vk_13yy = 0; - double vj_13yy = 0; - double vk_13yz = 0; - double vj_13yz = 0; - double vk_13zx = 0; - double vj_13zx = 0; - double vk_13zy = 0; - double vj_13zy = 0; - double vk_13zz = 0; - double vj_13zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double hrr_3001x = trr_31x - xlxk * trr_30x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_2101x = hrr_3001x - (rj[0] - ri[0]) * hrr_2001x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - double hrr_1201x = hrr_2101x - (rj[0] - ri[0]) * hrr_1101x; - g3 = al*2 * (aj*2 * hrr_1201x - 1 * hrr_1001x); - prod = g3 * 1 * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1200x; - g1 -= 1 * trr_10x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * hrr_1100x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * hrr_1100x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - double hrr_0201x = hrr_1101x - (rj[0] - ri[0]) * hrr_0101x; - g3 = al*2 * (aj*2 * hrr_0201x - 1 * hrr_0001x); - prod = g3 * trr_10y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = al*2 * aj*2 * hrr_1101y; - prod = g3 * hrr_0100x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * hrr_0100x * trr_10y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (aj*2 * hrr_0201x - 1 * hrr_0001x); - prod = g3 * 1 * trr_10z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0200x; - g1 -= 1 * fac; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * hrr_0100x * trr_10z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = al*2 * aj*2 * hrr_1101z; - prod = g3 * hrr_0100x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_1101x; - prod = g3 * hrr_0100y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double hrr_0201y = hrr_1101y - (rj[1] - ri[1]) * hrr_0101y; - g3 = al*2 * (aj*2 * hrr_0201y - 1 * hrr_0001y); - prod = g3 * trr_10x * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * trr_10x * hrr_0100y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * hrr_1100y * wt; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * wt; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_3001y = trr_31y - ylyk * trr_30y; - double hrr_2101y = hrr_3001y - (rj[1] - ri[1]) * hrr_2001y; - double hrr_1201y = hrr_2101y - (rj[1] - ri[1]) * hrr_1101y; - g3 = al*2 * (aj*2 * hrr_1201y - 1 * hrr_1001y); - prod = g3 * fac * wt; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1200y; - g1 -= 1 * trr_10y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0100z; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101z; - prod = g3 * fac * hrr_1100y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * hrr_0100y * trr_10z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * (aj*2 * hrr_0201y - 1 * hrr_0001y); - prod = g3 * fac * trr_10z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0200y; - g1 -= 1 * 1; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1100z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_1101z; - prod = g3 * fac * hrr_0100y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_1101x; - prod = g3 * 1 * hrr_0100z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_1100x; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * trr_10x * hrr_0100z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10x; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double hrr_0201z = hrr_1101z - (rj[2] - ri[2]) * hrr_0101z; - g3 = al*2 * (aj*2 * hrr_0201z - 1 * hrr_0001z); - prod = g3 * trr_10x * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * trr_10y * hrr_0100z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10y; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_1101y; - prod = g3 * fac * hrr_0100z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_1100y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_0200z; - g1 -= 1 * wt; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - g3 = al*2 * (aj*2 * hrr_0201z - 1 * hrr_0001z); - prod = g3 * fac * trr_10y; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * aj*2 * hrr_0101x; - prod = g3 * 1 * hrr_1100z; - vk_13xx += prod * vk_dd; - vj_13xx += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100z; - vk_13xy += prod * vk_dd; - vj_13xy += prod * vj_dd; - g1 = aj*2 * hrr_0100x; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * 1; - vk_13xz += prod * vk_dd; - vj_13xz += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100z; - vk_13yx += prod * vk_dd; - vj_13yx += prod * vj_dd; - g3 = al*2 * aj*2 * hrr_0101y; - prod = g3 * fac * hrr_1100z; - vk_13yy += prod * vk_dd; - vj_13yy += prod * vj_dd; - g1 = aj*2 * hrr_0100y; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * fac; - vk_13yz += prod * vk_dd; - vj_13yz += prod * vj_dd; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_13zx += prod * vk_dd; - vj_13zx += prod * vj_dd; - g1 = aj*2 * hrr_1200z; - g1 -= 1 * trr_10z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_13zy += prod * vk_dd; - vj_13zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_3001z = trr_31z - zlzk * trr_30z; - double hrr_2101z = hrr_3001z - (rj[2] - ri[2]) * hrr_2001z; - double hrr_1201z = hrr_2101z - (rj[2] - ri[2]) * hrr_1101z; - g3 = al*2 * (aj*2 * hrr_1201z - 1 * hrr_1001z); - prod = g3 * fac * 1; - vk_13zz += prod * vk_dd; - vj_13zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ja*natm+la)*9 + 0, vk_13xx); - atomicAdd(vk + (ja*natm+la)*9 + 1, vk_13xy); - atomicAdd(vk + (ja*natm+la)*9 + 2, vk_13xz); - atomicAdd(vk + (ja*natm+la)*9 + 3, vk_13yx); - atomicAdd(vk + (ja*natm+la)*9 + 4, vk_13yy); - atomicAdd(vk + (ja*natm+la)*9 + 5, vk_13yz); - atomicAdd(vk + (ja*natm+la)*9 + 6, vk_13zx); - atomicAdd(vk + (ja*natm+la)*9 + 7, vk_13zy); - atomicAdd(vk + (ja*natm+la)*9 + 8, vk_13zz); - } - if (vj != NULL) { - atomicAdd(vj + (ja*natm+la)*9 + 0, vj_13xx); - atomicAdd(vj + (ja*natm+la)*9 + 1, vj_13xy); - atomicAdd(vj + (ja*natm+la)*9 + 2, vj_13xz); - atomicAdd(vj + (ja*natm+la)*9 + 3, vj_13yx); - atomicAdd(vj + (ja*natm+la)*9 + 4, vj_13yy); - atomicAdd(vj + (ja*natm+la)*9 + 5, vj_13yz); - atomicAdd(vj + (ja*natm+la)*9 + 6, vj_13zx); - atomicAdd(vj + (ja*natm+la)*9 + 7, vj_13zy); - atomicAdd(vj + (ja*natm+la)*9 + 8, vj_13zz); - } - - double vk_20xx = 0; - double vj_20xx = 0; - double vk_20xy = 0; - double vj_20xy = 0; - double vk_20xz = 0; - double vj_20xz = 0; - double vk_20yx = 0; - double vj_20yx = 0; - double vk_20yy = 0; - double vj_20yy = 0; - double vk_20yz = 0; - double vj_20yz = 0; - double vk_20zx = 0; - double vj_20zx = 0; - double vk_20zy = 0; - double vj_20zy = 0; - double vk_20zz = 0; - double vj_20zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - g3 = ai*2 * ak*2 * hrr_2110x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g3 -= 1 * ak*2 * hrr_0110x; - prod = g3 * 1 * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = ak*2 * hrr_1110x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * hrr_1110x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g2 = ai*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g3 = ai*2 * ak*2 * trr_11y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * hrr_1100x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * hrr_1110x; - prod = g3 * trr_10y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - g3 = ai*2 * ak*2 * trr_21y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * hrr_0100x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * hrr_0100x * trr_10y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * hrr_1110x; - prod = g3 * 1 * trr_10z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * hrr_0100x * trr_10z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - g3 = ai*2 * ak*2 * trr_21z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * hrr_0100x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_21x; - g3 -= 1 * ak*2 * trr_01x; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g1 = ak*2 * hrr_0110y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = ai*2 * ak*2 * hrr_1110y; - prod = g3 * trr_10x * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * trr_10x * hrr_0100y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * hrr_1100y * wt; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * wt; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - g3 = ai*2 * ak*2 * hrr_2110y; - g3 -= 1 * ak*2 * hrr_0110y; - prod = g3 * fac * wt; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11z; - prod = g3 * fac * hrr_1100y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * hrr_0100y * trr_10z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * hrr_1110y; - prod = g3 * fac * trr_10z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_21z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * fac * hrr_0100y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_21x; - g3 -= 1 * ak*2 * trr_01x; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * trr_10x * hrr_0100z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10x; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g1 = ak*2 * hrr_0110z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = ai*2 * ak*2 * hrr_1110z; - prod = g3 * trr_10x * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * trr_10y * hrr_0100z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10y; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_21y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * fac * hrr_0100z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - g3 = ai*2 * ak*2 * hrr_1110z; - prod = g3 * fac * trr_10y; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * ak*2 * trr_11x; - prod = g3 * 1 * hrr_1100z; - vk_20xx += prod * vk_dd; - vj_20xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100z; - vk_20xy += prod * vk_dd; - vj_20xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * 1; - vk_20xz += prod * vk_dd; - vj_20xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100z; - vk_20yx += prod * vk_dd; - vj_20yx += prod * vj_dd; - g3 = ai*2 * ak*2 * trr_11y; - prod = g3 * fac * hrr_1100z; - vk_20yy += prod * vk_dd; - vj_20yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * fac; - vk_20yz += prod * vk_dd; - vj_20yz += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_20zx += prod * vk_dd; - vj_20zx += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_20zy += prod * vk_dd; - vj_20zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - g3 = ai*2 * ak*2 * hrr_2110z; - g3 -= 1 * ak*2 * hrr_0110z; - prod = g3 * fac * 1; - vk_20zz += prod * vk_dd; - vj_20zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ia)*9 + 0, vk_20xx); - atomicAdd(vk + (ka*natm+ia)*9 + 1, vk_20xy); - atomicAdd(vk + (ka*natm+ia)*9 + 2, vk_20xz); - atomicAdd(vk + (ka*natm+ia)*9 + 3, vk_20yx); - atomicAdd(vk + (ka*natm+ia)*9 + 4, vk_20yy); - atomicAdd(vk + (ka*natm+ia)*9 + 5, vk_20yz); - atomicAdd(vk + (ka*natm+ia)*9 + 6, vk_20zx); - atomicAdd(vk + (ka*natm+ia)*9 + 7, vk_20zy); - atomicAdd(vk + (ka*natm+ia)*9 + 8, vk_20zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ia)*9 + 0, vj_20xx); - atomicAdd(vj + (ka*natm+ia)*9 + 1, vj_20xy); - atomicAdd(vj + (ka*natm+ia)*9 + 2, vj_20xz); - atomicAdd(vj + (ka*natm+ia)*9 + 3, vj_20yx); - atomicAdd(vj + (ka*natm+ia)*9 + 4, vj_20yy); - atomicAdd(vj + (ka*natm+ia)*9 + 5, vj_20yz); - atomicAdd(vj + (ka*natm+ia)*9 + 6, vj_20zx); - atomicAdd(vj + (ka*natm+ia)*9 + 7, vj_20zy); - atomicAdd(vj + (ka*natm+ia)*9 + 8, vj_20zz); - } - - double vk_21xx = 0; - double vj_21xx = 0; - double vk_21xy = 0; - double vj_21xy = 0; - double vk_21xz = 0; - double vj_21xz = 0; - double vk_21yx = 0; - double vj_21yx = 0; - double vk_21yy = 0; - double vj_21yy = 0; - double vk_21yz = 0; - double vj_21yz = 0; - double vk_21zx = 0; - double vj_21zx = 0; - double vk_21zy = 0; - double vj_21zy = 0; - double vk_21zz = 0; - double vj_21zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2110x = trr_31x - (rj[0] - ri[0]) * trr_21x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - double hrr_1210x = hrr_2110x - (rj[0] - ri[0]) * hrr_1110x; - g3 = aj*2 * ak*2 * hrr_1210x; - g3 -= 1 * ak*2 * trr_11x; - prod = g3 * 1 * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * hrr_1110x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * hrr_1110x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g1 = ak*2 * trr_01y; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * hrr_1100x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * hrr_1100x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_01x = cpx * fac; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - double hrr_0210x = hrr_1110x - (rj[0] - ri[0]) * hrr_0110x; - g3 = aj*2 * ak*2 * hrr_0210x; - g3 -= 1 * ak*2 * trr_01x; - prod = g3 * trr_10y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g3 = aj*2 * ak*2 * hrr_1110y; - prod = g3 * hrr_0100x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * hrr_0100x * trr_10y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_0210x; - g3 -= 1 * ak*2 * trr_01x; - prod = g3 * 1 * trr_10z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * hrr_0100x * trr_10z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g3 = aj*2 * ak*2 * hrr_1110z; - prod = g3 * hrr_0100x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_1110x; - prod = g3 * hrr_0100y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double hrr_0210y = hrr_1110y - (rj[1] - ri[1]) * hrr_0110y; - g3 = aj*2 * ak*2 * hrr_0210y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * trr_10x * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * trr_10x * hrr_0100y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * hrr_1100y * wt; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_2110y = trr_31y - (rj[1] - ri[1]) * trr_21y; - double hrr_1210y = hrr_2110y - (rj[1] - ri[1]) * hrr_1110y; - g3 = aj*2 * ak*2 * hrr_1210y; - g3 -= 1 * ak*2 * trr_11y; - prod = g3 * fac * wt; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110z; - prod = g3 * fac * hrr_1100y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * hrr_0100y * trr_10z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0210y; - g3 -= 1 * ak*2 * trr_01y; - prod = g3 * fac * trr_10z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_1110z; - prod = g3 * fac * hrr_0100y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_1110x; - prod = g3 * 1 * hrr_0100z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * trr_10x * hrr_0100z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double hrr_0210z = hrr_1110z - (rj[2] - ri[2]) * hrr_0110z; - g3 = aj*2 * ak*2 * hrr_0210z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * trr_10x * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * trr_10y * hrr_0100z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_1110y; - prod = g3 * fac * hrr_0100z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0210z; - g3 -= 1 * ak*2 * trr_01z; - prod = g3 * fac * trr_10y; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * ak*2 * hrr_0110x; - prod = g3 * 1 * hrr_1100z; - vk_21xx += prod * vk_dd; - vj_21xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100z; - vk_21xy += prod * vk_dd; - vj_21xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_21xz += prod * vk_dd; - vj_21xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100z; - vk_21yx += prod * vk_dd; - vj_21yx += prod * vj_dd; - g3 = aj*2 * ak*2 * hrr_0110y; - prod = g3 * fac * hrr_1100z; - vk_21yy += prod * vk_dd; - vj_21yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_21yz += prod * vk_dd; - vj_21yz += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_21zx += prod * vk_dd; - vj_21zx += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_21zy += prod * vk_dd; - vj_21zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_2110z = trr_31z - (rj[2] - ri[2]) * trr_21z; - double hrr_1210z = hrr_2110z - (rj[2] - ri[2]) * hrr_1110z; - g3 = aj*2 * ak*2 * hrr_1210z; - g3 -= 1 * ak*2 * trr_11z; - prod = g3 * fac * 1; - vk_21zz += prod * vk_dd; - vj_21zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ja)*9 + 0, vk_21xx); - atomicAdd(vk + (ka*natm+ja)*9 + 1, vk_21xy); - atomicAdd(vk + (ka*natm+ja)*9 + 2, vk_21xz); - atomicAdd(vk + (ka*natm+ja)*9 + 3, vk_21yx); - atomicAdd(vk + (ka*natm+ja)*9 + 4, vk_21yy); - atomicAdd(vk + (ka*natm+ja)*9 + 5, vk_21yz); - atomicAdd(vk + (ka*natm+ja)*9 + 6, vk_21zx); - atomicAdd(vk + (ka*natm+ja)*9 + 7, vk_21zy); - atomicAdd(vk + (ka*natm+ja)*9 + 8, vk_21zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ja)*9 + 0, vj_21xx); - atomicAdd(vj + (ka*natm+ja)*9 + 1, vj_21xy); - atomicAdd(vj + (ka*natm+ja)*9 + 2, vj_21xz); - atomicAdd(vj + (ka*natm+ja)*9 + 3, vj_21yx); - atomicAdd(vj + (ka*natm+ja)*9 + 4, vj_21yy); - atomicAdd(vj + (ka*natm+ja)*9 + 5, vj_21yz); - atomicAdd(vj + (ka*natm+ja)*9 + 6, vj_21zx); - atomicAdd(vj + (ka*natm+ja)*9 + 7, vj_21zy); - atomicAdd(vj + (ka*natm+ja)*9 + 8, vj_21zz); - } - - double vk_22xx = 0; - double vj_22xx = 0; - double vk_22xy = 0; - double vj_22xy = 0; - double vk_22xz = 0; - double vj_22xz = 0; - double vk_22yx = 0; - double vj_22yx = 0; - double vk_22yy = 0; - double vj_22yy = 0; - double vk_22yz = 0; - double vj_22yz = 0; - double vk_22zx = 0; - double vj_22zx = 0; - double vk_22zy = 0; - double vj_22zy = 0; - double vk_22zz = 0; - double vj_22zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1120x = trr_22x - (rj[0] - ri[0]) * trr_12x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = ak*2 * (ak*2 * hrr_1120x - 1 * hrr_1100x); - prod = g3 * 1 * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = ak*2 * hrr_1110x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * hrr_1110x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * hrr_1100x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * hrr_1100x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0120x = trr_12x - (rj[0] - ri[0]) * trr_02x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 = ak*2 * (ak*2 * hrr_0120x - 1 * hrr_0100x); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g1 = ak*2 * hrr_0110x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - g3 = ak*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * hrr_0100x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * hrr_0100x * trr_10y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * hrr_0120x - 1 * hrr_0100x); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * hrr_0100x * trr_10z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - g3 = ak*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * hrr_0100x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_12x - 1 * trr_10x); - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double hrr_0120y = trr_12y - (rj[1] - ri[1]) * trr_02y; - g3 = ak*2 * (ak*2 * hrr_0120y - 1 * hrr_0100y); - prod = g3 * trr_10x * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * trr_10x * hrr_0100y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - prod = g3 * hrr_1100y * wt; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_1120y = trr_22y - (rj[1] - ri[1]) * trr_12y; - g3 = ak*2 * (ak*2 * hrr_1120y - 1 * hrr_1100y); - prod = g3 * fac * wt; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02z - 1 * wt); - prod = g3 * fac * hrr_1100y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * hrr_0100y * trr_10z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * hrr_0120y - 1 * hrr_0100y); - prod = g3 * fac * trr_10z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_12z - 1 * trr_10z); - prod = g3 * fac * hrr_0100y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_12x - 1 * trr_10x); - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * trr_10x * hrr_0100z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double hrr_0120z = trr_12z - (rj[2] - ri[2]) * trr_02z; - g3 = ak*2 * (ak*2 * hrr_0120z - 1 * hrr_0100z); - prod = g3 * trr_10x * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - prod = g3 * trr_10y * hrr_0100z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_12y - 1 * trr_10y); - prod = g3 * fac * hrr_0100z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - g3 = ak*2 * (ak*2 * hrr_0120z - 1 * hrr_0100z); - prod = g3 * fac * trr_10y; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * (ak*2 * trr_02x - 1 * fac); - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - prod = g3 * 1 * hrr_1100z; - vk_22xx += prod * vk_dd; - vj_22xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100z; - vk_22xy += prod * vk_dd; - vj_22xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_22xz += prod * vk_dd; - vj_22xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100z; - vk_22yx += prod * vk_dd; - vj_22yx += prod * vj_dd; - g3 = ak*2 * (ak*2 * trr_02y - 1 * 1); - prod = g3 * fac * hrr_1100z; - vk_22yy += prod * vk_dd; - vj_22yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_22yz += prod * vk_dd; - vj_22yz += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_22zx += prod * vk_dd; - vj_22zx += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_22zy += prod * vk_dd; - vj_22zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_1120z = trr_22z - (rj[2] - ri[2]) * trr_12z; - g3 = ak*2 * (ak*2 * hrr_1120z - 1 * hrr_1100z); - prod = g3 * fac * 1; - vk_22zz += prod * vk_dd; - vj_22zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+ka)*9 + 0, vk_22xx); - atomicAdd(vk + (ka*natm+ka)*9 + 1, vk_22xy); - atomicAdd(vk + (ka*natm+ka)*9 + 2, vk_22xz); - atomicAdd(vk + (ka*natm+ka)*9 + 3, vk_22yx); - atomicAdd(vk + (ka*natm+ka)*9 + 4, vk_22yy); - atomicAdd(vk + (ka*natm+ka)*9 + 5, vk_22yz); - atomicAdd(vk + (ka*natm+ka)*9 + 6, vk_22zx); - atomicAdd(vk + (ka*natm+ka)*9 + 7, vk_22zy); - atomicAdd(vk + (ka*natm+ka)*9 + 8, vk_22zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+ka)*9 + 0, vj_22xx); - atomicAdd(vj + (ka*natm+ka)*9 + 1, vj_22xy); - atomicAdd(vj + (ka*natm+ka)*9 + 2, vj_22xz); - atomicAdd(vj + (ka*natm+ka)*9 + 3, vj_22yx); - atomicAdd(vj + (ka*natm+ka)*9 + 4, vj_22yy); - atomicAdd(vj + (ka*natm+ka)*9 + 5, vj_22yz); - atomicAdd(vj + (ka*natm+ka)*9 + 6, vj_22zx); - atomicAdd(vj + (ka*natm+ka)*9 + 7, vj_22zy); - atomicAdd(vj + (ka*natm+ka)*9 + 8, vj_22zz); - } - - double vk_23xx = 0; - double vj_23xx = 0; - double vk_23xy = 0; - double vj_23xy = 0; - double vk_23xz = 0; - double vj_23xz = 0; - double vk_23yx = 0; - double vj_23yx = 0; - double vk_23yy = 0; - double vj_23yy = 0; - double vk_23yz = 0; - double vj_23yz = 0; - double vk_23zx = 0; - double vj_23zx = 0; - double vk_23zy = 0; - double vj_23zy = 0; - double vk_23zz = 0; - double vj_23zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1111x = hrr_2011x - (rj[0] - ri[0]) * hrr_1011x; - g3 = al*2 * ak*2 * hrr_1111x; - prod = g3 * 1 * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g1 = ak*2 * hrr_1110x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * hrr_1110x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = al*2 * ak*2 * hrr_0011y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * hrr_1100x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0111x = hrr_1011x - (rj[0] - ri[0]) * hrr_0011x; - g3 = al*2 * ak*2 * hrr_0111x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g1 = ak*2 * hrr_0110x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = al*2 * ak*2 * hrr_1011y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - prod = g3 * hrr_0100x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * hrr_0100x * trr_10y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_0111x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * hrr_0110x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * hrr_0100x * trr_10z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = al*2 * ak*2 * hrr_1011z; - prod = g3 * hrr_0100x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_1011x; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g1 = ak*2 * hrr_0110y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double hrr_0111y = hrr_1011y - (rj[1] - ri[1]) * hrr_0011y; - g3 = al*2 * ak*2 * hrr_0111y; - prod = g3 * trr_10x * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * trr_10x * hrr_0100y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - prod = g3 * hrr_1100y * wt; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * wt; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g1 = ak*2 * hrr_1110y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - double hrr_1111y = hrr_2011y - (rj[1] - ri[1]) * hrr_1011y; - g3 = al*2 * ak*2 * hrr_1111y; - prod = g3 * fac * wt; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * hrr_1110y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_01z; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011z; - prod = g3 * fac * hrr_1100y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * hrr_0100y * trr_10z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0111y; - prod = g3 * fac * trr_10z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * hrr_0110y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * trr_11z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_1011z; - prod = g3 * fac * hrr_0100y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_1011x; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_11x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_11x; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * trr_10x * hrr_0100z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10x; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g1 = ak*2 * hrr_0110z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double hrr_0111z = hrr_1011z - (rj[2] - ri[2]) * hrr_0011z; - g3 = al*2 * ak*2 * hrr_0111z; - prod = g3 * trr_10x * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - prod = g3 * trr_10y * hrr_0100z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10y; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_1011y; - prod = g3 * fac * hrr_0100z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_11y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * hrr_0110z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0111z; - prod = g3 * fac * trr_10y; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * ak*2 * hrr_0011x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - prod = g3 * 1 * hrr_1100z; - vk_23xx += prod * vk_dd; - vj_23xx += prod * vj_dd; - g1 = ak*2 * trr_01x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100z; - vk_23xy += prod * vk_dd; - vj_23xy += prod * vj_dd; - g1 = ak*2 * trr_01x; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * 1; - vk_23xz += prod * vk_dd; - vj_23xz += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100z; - vk_23yx += prod * vk_dd; - vj_23yx += prod * vj_dd; - g3 = al*2 * ak*2 * hrr_0011y; - prod = g3 * fac * hrr_1100z; - vk_23yy += prod * vk_dd; - vj_23yy += prod * vj_dd; - g1 = ak*2 * trr_01y; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * fac; - vk_23yz += prod * vk_dd; - vj_23yz += prod * vj_dd; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g1 = ak*2 * hrr_1110z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_23zx += prod * vk_dd; - vj_23zx += prod * vj_dd; - g1 = ak*2 * hrr_1110z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_23zy += prod * vk_dd; - vj_23zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - double hrr_1111z = hrr_2011z - (rj[2] - ri[2]) * hrr_1011z; - g3 = al*2 * ak*2 * hrr_1111z; - prod = g3 * fac * 1; - vk_23zz += prod * vk_dd; - vj_23zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (ka*natm+la)*9 + 0, vk_23xx); - atomicAdd(vk + (ka*natm+la)*9 + 1, vk_23xy); - atomicAdd(vk + (ka*natm+la)*9 + 2, vk_23xz); - atomicAdd(vk + (ka*natm+la)*9 + 3, vk_23yx); - atomicAdd(vk + (ka*natm+la)*9 + 4, vk_23yy); - atomicAdd(vk + (ka*natm+la)*9 + 5, vk_23yz); - atomicAdd(vk + (ka*natm+la)*9 + 6, vk_23zx); - atomicAdd(vk + (ka*natm+la)*9 + 7, vk_23zy); - atomicAdd(vk + (ka*natm+la)*9 + 8, vk_23zz); - } - if (vj != NULL) { - atomicAdd(vj + (ka*natm+la)*9 + 0, vj_23xx); - atomicAdd(vj + (ka*natm+la)*9 + 1, vj_23xy); - atomicAdd(vj + (ka*natm+la)*9 + 2, vj_23xz); - atomicAdd(vj + (ka*natm+la)*9 + 3, vj_23yx); - atomicAdd(vj + (ka*natm+la)*9 + 4, vj_23yy); - atomicAdd(vj + (ka*natm+la)*9 + 5, vj_23yz); - atomicAdd(vj + (ka*natm+la)*9 + 6, vj_23zx); - atomicAdd(vj + (ka*natm+la)*9 + 7, vj_23zy); - atomicAdd(vj + (ka*natm+la)*9 + 8, vj_23zz); - } - - double vk_30xx = 0; - double vj_30xx = 0; - double vk_30xy = 0; - double vj_30xy = 0; - double vk_30xz = 0; - double vj_30xz = 0; - double vk_30yx = 0; - double vj_30yx = 0; - double vk_30yy = 0; - double vj_30yy = 0; - double vk_30yz = 0; - double vj_30yz = 0; - double vk_30zx = 0; - double vj_30zx = 0; - double vk_30zy = 0; - double vj_30zy = 0; - double vk_30zz = 0; - double vj_30zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double hrr_3001x = trr_31x - xlxk * trr_30x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_2101x = hrr_3001x - (rj[0] - ri[0]) * hrr_2001x; - g3 = ai*2 * al*2 * hrr_2101x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g3 -= 1 * al*2 * hrr_0101x; - prod = g3 * 1 * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g1 = al*2 * hrr_1101x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1101x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - g2 = ai*2 * hrr_2100x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g3 = ai*2 * al*2 * hrr_1001y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * hrr_2100x; - g2 -= 1 * hrr_0100x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * hrr_1100x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1101x; - prod = g3 * trr_10y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - g3 = ai*2 * al*2 * hrr_2001y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * hrr_0100x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * hrr_0100x * trr_10y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1101x; - prod = g3 * 1 * trr_10z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * trr_10z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * hrr_0100x * trr_10z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - g3 = ai*2 * al*2 * hrr_2001z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * hrr_0100x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_2001x; - g3 -= 1 * al*2 * hrr_0001x; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_0100y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g1 = al*2 * hrr_0101y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = ai*2 * al*2 * hrr_1101y; - prod = g3 * trr_10x * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * trr_10x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * trr_10x * hrr_0100y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * hrr_1100y * wt; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * wt; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * hrr_1100y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * wt; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_3001y = trr_31y - ylyk * trr_30y; - double hrr_2101y = hrr_3001y - (rj[1] - ri[1]) * hrr_2001y; - g3 = ai*2 * al*2 * hrr_2101y; - g3 -= 1 * al*2 * hrr_0101y; - prod = g3 * fac * wt; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = ai*2 * trr_10z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ai*2 * hrr_2100y; - g2 -= 1 * hrr_0100y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001z; - prod = g3 * fac * hrr_1100y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * hrr_0100y * trr_10z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * trr_10z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * hrr_0100y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1101y; - prod = g3 * fac * trr_10z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ai*2 * trr_20z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ai*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_2001z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * fac * hrr_0100y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_2001x; - g3 -= 1 * al*2 * hrr_0001x; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_0100z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * hrr_0100z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * trr_10x * hrr_0100z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10x; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g1 = al*2 * hrr_0101z; - g2 = ai*2 * trr_20x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * trr_10x; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = ai*2 * al*2 * hrr_1101z; - prod = g3 * trr_10x * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * trr_10y * hrr_0100z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * hrr_0100z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * trr_10y; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_0100z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_2001y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * fac * hrr_0100z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ai*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * trr_10y; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ai*2 * trr_20y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1101z; - prod = g3 * fac * trr_10y; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ai*2 * al*2 * hrr_1001x; - prod = g3 * 1 * hrr_1100z; - vk_30xx += prod * vk_dd; - vj_30xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * hrr_1100z; - vk_30xy += prod * vk_dd; - vj_30xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * 1; - vk_30xz += prod * vk_dd; - vj_30xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * hrr_1100z; - vk_30yx += prod * vk_dd; - vj_30yx += prod * vj_dd; - g3 = ai*2 * al*2 * hrr_1001y; - prod = g3 * fac * hrr_1100z; - vk_30yy += prod * vk_dd; - vj_30yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ai*2 * hrr_2100z; - g2 -= 1 * hrr_0100z; - prod = g1 * g2 * fac; - vk_30yz += prod * vk_dd; - vj_30yz += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = ai*2 * trr_10x; - prod = g1 * g2 * 1; - vk_30zx += prod * vk_dd; - vj_30zx += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = ai*2 * trr_10y; - prod = g1 * g2 * fac; - vk_30zy += prod * vk_dd; - vj_30zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_3001z = trr_31z - zlzk * trr_30z; - double hrr_2101z = hrr_3001z - (rj[2] - ri[2]) * hrr_2001z; - g3 = ai*2 * al*2 * hrr_2101z; - g3 -= 1 * al*2 * hrr_0101z; - prod = g3 * fac * 1; - vk_30zz += prod * vk_dd; - vj_30zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ia)*9 + 0, vk_30xx); - atomicAdd(vk + (la*natm+ia)*9 + 1, vk_30xy); - atomicAdd(vk + (la*natm+ia)*9 + 2, vk_30xz); - atomicAdd(vk + (la*natm+ia)*9 + 3, vk_30yx); - atomicAdd(vk + (la*natm+ia)*9 + 4, vk_30yy); - atomicAdd(vk + (la*natm+ia)*9 + 5, vk_30yz); - atomicAdd(vk + (la*natm+ia)*9 + 6, vk_30zx); - atomicAdd(vk + (la*natm+ia)*9 + 7, vk_30zy); - atomicAdd(vk + (la*natm+ia)*9 + 8, vk_30zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ia)*9 + 0, vj_30xx); - atomicAdd(vj + (la*natm+ia)*9 + 1, vj_30xy); - atomicAdd(vj + (la*natm+ia)*9 + 2, vj_30xz); - atomicAdd(vj + (la*natm+ia)*9 + 3, vj_30yx); - atomicAdd(vj + (la*natm+ia)*9 + 4, vj_30yy); - atomicAdd(vj + (la*natm+ia)*9 + 5, vj_30yz); - atomicAdd(vj + (la*natm+ia)*9 + 6, vj_30zx); - atomicAdd(vj + (la*natm+ia)*9 + 7, vj_30zy); - atomicAdd(vj + (la*natm+ia)*9 + 8, vj_30zz); - } - - double vk_31xx = 0; - double vj_31xx = 0; - double vk_31xy = 0; - double vj_31xy = 0; - double vk_31xz = 0; - double vj_31xz = 0; - double vk_31yx = 0; - double vj_31yx = 0; - double vk_31yy = 0; - double vj_31yy = 0; - double vk_31yz = 0; - double vj_31yz = 0; - double vk_31zx = 0; - double vj_31zx = 0; - double vk_31zy = 0; - double vj_31zy = 0; - double vk_31zz = 0; - double vj_31zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; - double b00 = .5 * rt_aa; - double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; - double hrr_3001x = trr_31x - xlxk * trr_30x; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_2101x = hrr_3001x - (rj[0] - ri[0]) * hrr_2001x; - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - double hrr_1201x = hrr_2101x - (rj[0] - ri[0]) * hrr_1101x; - g3 = aj*2 * al*2 * hrr_1201x; - g3 -= 1 * al*2 * hrr_1001x; - prod = g3 * 1 * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1101x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1101x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_2100x = trr_30x - (rj[0] - ri[0]) * trr_20x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - double hrr_1200x = hrr_2100x - (rj[0] - ri[0]) * hrr_1100x; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * hrr_1100x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1200x; - g2 -= 1 * trr_10x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * hrr_1100x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_01x = cpx * fac; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - double hrr_0201x = hrr_1101x - (rj[0] - ri[0]) * hrr_0101x; - g3 = aj*2 * al*2 * hrr_0201x; - g3 -= 1 * al*2 * hrr_0001x; - prod = g3 * trr_10y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - double hrr_0200x = hrr_1100x - (rj[0] - ri[0]) * hrr_0100x; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g3 = aj*2 * al*2 * hrr_1101y; - prod = g3 * hrr_0100x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * hrr_0100x * trr_10y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0201x; - g3 -= 1 * al*2 * hrr_0001x; - prod = g3 * 1 * trr_10z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * trr_10z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * hrr_0100x * trr_10z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0200x; - g2 -= 1 * fac; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g3 = aj*2 * al*2 * hrr_1101z; - prod = g3 * hrr_0100x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_1101x; - prod = g3 * hrr_0100y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0200y = hrr_1100y - (rj[1] - ri[1]) * hrr_0100y; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_0100y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double hrr_0201y = hrr_1101y - (rj[1] - ri[1]) * hrr_0101y; - g3 = aj*2 * al*2 * hrr_0201y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * trr_10x * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * trr_10x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * trr_10x * hrr_0100y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * hrr_1100y * wt; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; - double hrr_2100y = trr_30y - (rj[1] - ri[1]) * trr_20y; - double hrr_1200y = hrr_2100y - (rj[1] - ri[1]) * hrr_1100y; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * wt; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * hrr_1100y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * wt; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; - double hrr_3001y = trr_31y - ylyk * trr_30y; - double hrr_2101y = hrr_3001y - (rj[1] - ri[1]) * hrr_2001y; - double hrr_1201y = hrr_2101y - (rj[1] - ri[1]) * hrr_1101y; - g3 = aj*2 * al*2 * hrr_1201y; - g3 -= 1 * al*2 * hrr_1001y; - prod = g3 * fac * wt; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = aj*2 * hrr_0100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = aj*2 * hrr_1200y; - g2 -= 1 * trr_10y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101z; - prod = g3 * fac * hrr_1100y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * hrr_0100y * trr_10z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * trr_10z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * hrr_0100y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0201y; - g3 -= 1 * al*2 * hrr_0001y; - prod = g3 * fac * trr_10z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = aj*2 * hrr_1100z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = aj*2 * hrr_0200y; - g2 -= 1 * 1; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_1101z; - prod = g3 * fac * hrr_0100y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_1101x; - prod = g3 * 1 * hrr_0100z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_0100z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0200z = hrr_1100z - (rj[2] - ri[2]) * hrr_0100z; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * hrr_0100z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * trr_10x * hrr_0100z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10x; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = aj*2 * hrr_1100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * trr_10x; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double hrr_0201z = hrr_1101z - (rj[2] - ri[2]) * hrr_0101z; - g3 = aj*2 * al*2 * hrr_0201z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * trr_10x * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * trr_10y * hrr_0100z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * hrr_0100z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * trr_10y; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_0100z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_1101y; - prod = g3 * fac * hrr_0100z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = aj*2 * hrr_0200z; - g2 -= 1 * wt; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * trr_10y; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = aj*2 * hrr_1100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0201z; - g3 -= 1 * al*2 * hrr_0001z; - prod = g3 * fac * trr_10y; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = aj*2 * al*2 * hrr_0101x; - prod = g3 * 1 * hrr_1100z; - vk_31xx += prod * vk_dd; - vj_31xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * hrr_1100z; - vk_31xy += prod * vk_dd; - vj_31xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; - double hrr_2100z = trr_30z - (rj[2] - ri[2]) * trr_20z; - double hrr_1200z = hrr_2100z - (rj[2] - ri[2]) * hrr_1100z; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * 1; - vk_31xz += prod * vk_dd; - vj_31xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * hrr_1100z; - vk_31yx += prod * vk_dd; - vj_31yx += prod * vj_dd; - g3 = aj*2 * al*2 * hrr_0101y; - prod = g3 * fac * hrr_1100z; - vk_31yy += prod * vk_dd; - vj_31yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = aj*2 * hrr_1200z; - g2 -= 1 * trr_10z; - prod = g1 * g2 * fac; - vk_31yz += prod * vk_dd; - vj_31yz += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = aj*2 * hrr_0100x; - prod = g1 * g2 * 1; - vk_31zx += prod * vk_dd; - vj_31zx += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = aj*2 * hrr_0100y; - prod = g1 * g2 * fac; - vk_31zy += prod * vk_dd; - vj_31zy += prod * vj_dd; - double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; - double hrr_3001z = trr_31z - zlzk * trr_30z; - double hrr_2101z = hrr_3001z - (rj[2] - ri[2]) * hrr_2001z; - double hrr_1201z = hrr_2101z - (rj[2] - ri[2]) * hrr_1101z; - g3 = aj*2 * al*2 * hrr_1201z; - g3 -= 1 * al*2 * hrr_1001z; - prod = g3 * fac * 1; - vk_31zz += prod * vk_dd; - vj_31zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ja)*9 + 0, vk_31xx); - atomicAdd(vk + (la*natm+ja)*9 + 1, vk_31xy); - atomicAdd(vk + (la*natm+ja)*9 + 2, vk_31xz); - atomicAdd(vk + (la*natm+ja)*9 + 3, vk_31yx); - atomicAdd(vk + (la*natm+ja)*9 + 4, vk_31yy); - atomicAdd(vk + (la*natm+ja)*9 + 5, vk_31yz); - atomicAdd(vk + (la*natm+ja)*9 + 6, vk_31zx); - atomicAdd(vk + (la*natm+ja)*9 + 7, vk_31zy); - atomicAdd(vk + (la*natm+ja)*9 + 8, vk_31zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ja)*9 + 0, vj_31xx); - atomicAdd(vj + (la*natm+ja)*9 + 1, vj_31xy); - atomicAdd(vj + (la*natm+ja)*9 + 2, vj_31xz); - atomicAdd(vj + (la*natm+ja)*9 + 3, vj_31yx); - atomicAdd(vj + (la*natm+ja)*9 + 4, vj_31yy); - atomicAdd(vj + (la*natm+ja)*9 + 5, vj_31yz); - atomicAdd(vj + (la*natm+ja)*9 + 6, vj_31zx); - atomicAdd(vj + (la*natm+ja)*9 + 7, vj_31zy); - atomicAdd(vj + (la*natm+ja)*9 + 8, vj_31zz); - } - - double vk_32xx = 0; - double vj_32xx = 0; - double vk_32xy = 0; - double vj_32xy = 0; - double vk_32xz = 0; - double vj_32xz = 0; - double vk_32yx = 0; - double vj_32yx = 0; - double vk_32yy = 0; - double vj_32yy = 0; - double vk_32yz = 0; - double vj_32yz = 0; - double vk_32zx = 0; - double vj_32zx = 0; - double vk_32zy = 0; - double vj_32zy = 0; - double vk_32zz = 0; - double vj_32zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1111x = hrr_2011x - (rj[0] - ri[0]) * hrr_1011x; - g3 = ak*2 * al*2 * hrr_1111x; - prod = g3 * 1 * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g1 = al*2 * hrr_1101x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1101x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_0001y = trr_01y - ylyk * 1; - g1 = al*2 * hrr_0001y; - double hrr_1110x = trr_21x - (rj[0] - ri[0]) * trr_11x; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - g3 = ak*2 * al*2 * hrr_0011y; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - prod = g3 * hrr_1100x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_0001z = trr_01z - zlzk * wt; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * hrr_1110x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * hrr_1100x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0111x = hrr_1011x - (rj[0] - ri[0]) * hrr_0011x; - g3 = ak*2 * al*2 * hrr_0111x; - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g1 = al*2 * hrr_0101x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g1 = al*2 * hrr_1001y; - double hrr_0110x = trr_11x - (rj[0] - ri[0]) * trr_01x; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - g3 = ak*2 * al*2 * hrr_1011y; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - prod = g3 * hrr_0100x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * hrr_0100x * trr_10y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0111x; - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * trr_10z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * hrr_0100x * trr_10z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * hrr_0110x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - g3 = ak*2 * al*2 * hrr_1011z; - prod = g3 * hrr_0100x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_1011x; - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0110y = trr_11y - (rj[1] - ri[1]) * trr_01y; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_0100y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g1 = al*2 * hrr_0101y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double hrr_0111y = hrr_1011y - (rj[1] - ri[1]) * hrr_0011y; - g3 = ak*2 * al*2 * hrr_0111y; - prod = g3 * trr_10x * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * trr_10x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * trr_10x * hrr_0100y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - prod = g3 * hrr_1100y * wt; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_1110y = trr_21y - (rj[1] - ri[1]) * trr_11y; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * wt; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * hrr_1100y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g1 = al*2 * hrr_1101y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * wt; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - double hrr_1111y = hrr_2011y - (rj[1] - ri[1]) * hrr_1011y; - g3 = ak*2 * al*2 * hrr_1111y; - prod = g3 * fac * wt; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = ak*2 * trr_01z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = ak*2 * hrr_1110y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011z; - prod = g3 * fac * hrr_1100y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * hrr_0100y * trr_10z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * trr_10z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * hrr_0100y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0111y; - prod = g3 * fac * trr_10z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = ak*2 * trr_11z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = ak*2 * hrr_0110y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_1011z; - prod = g3 * fac * hrr_0100y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_1011x; - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_0100z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0110z = trr_11z - (rj[2] - ri[2]) * trr_01z; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * hrr_0100z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * trr_10x * hrr_0100z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10x; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g1 = al*2 * hrr_0101z; - g2 = ak*2 * trr_11x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * trr_10x; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double hrr_0111z = hrr_1011z - (rj[2] - ri[2]) * hrr_0011z; - g3 = ak*2 * al*2 * hrr_0111z; - prod = g3 * trr_10x * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - prod = g3 * trr_10y * hrr_0100z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * hrr_0100z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * trr_10y; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_0100z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_1011y; - prod = g3 * fac * hrr_0100z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = ak*2 * hrr_0110z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * trr_10y; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = ak*2 * trr_11y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0111z; - prod = g3 * fac * trr_10y; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = ak*2 * al*2 * hrr_0011x; - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - prod = g3 * 1 * hrr_1100z; - vk_32xx += prod * vk_dd; - vj_32xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * hrr_1100z; - vk_32xy += prod * vk_dd; - vj_32xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_1110z = trr_21z - (rj[2] - ri[2]) * trr_11z; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * 1; - vk_32xz += prod * vk_dd; - vj_32xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * hrr_1100z; - vk_32yx += prod * vk_dd; - vj_32yx += prod * vj_dd; - g3 = ak*2 * al*2 * hrr_0011y; - prod = g3 * fac * hrr_1100z; - vk_32yy += prod * vk_dd; - vj_32yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = ak*2 * hrr_1110z; - prod = g1 * g2 * fac; - vk_32yz += prod * vk_dd; - vj_32yz += prod * vj_dd; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g1 = al*2 * hrr_1101z; - g2 = ak*2 * trr_01x; - prod = g1 * g2 * 1; - vk_32zx += prod * vk_dd; - vj_32zx += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = ak*2 * trr_01y; - prod = g1 * g2 * fac; - vk_32zy += prod * vk_dd; - vj_32zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - double hrr_1111z = hrr_2011z - (rj[2] - ri[2]) * hrr_1011z; - g3 = ak*2 * al*2 * hrr_1111z; - prod = g3 * fac * 1; - vk_32zz += prod * vk_dd; - vj_32zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+ka)*9 + 0, vk_32xx); - atomicAdd(vk + (la*natm+ka)*9 + 1, vk_32xy); - atomicAdd(vk + (la*natm+ka)*9 + 2, vk_32xz); - atomicAdd(vk + (la*natm+ka)*9 + 3, vk_32yx); - atomicAdd(vk + (la*natm+ka)*9 + 4, vk_32yy); - atomicAdd(vk + (la*natm+ka)*9 + 5, vk_32yz); - atomicAdd(vk + (la*natm+ka)*9 + 6, vk_32zx); - atomicAdd(vk + (la*natm+ka)*9 + 7, vk_32zy); - atomicAdd(vk + (la*natm+ka)*9 + 8, vk_32zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+ka)*9 + 0, vj_32xx); - atomicAdd(vj + (la*natm+ka)*9 + 1, vj_32xy); - atomicAdd(vj + (la*natm+ka)*9 + 2, vj_32xz); - atomicAdd(vj + (la*natm+ka)*9 + 3, vj_32yx); - atomicAdd(vj + (la*natm+ka)*9 + 4, vj_32yy); - atomicAdd(vj + (la*natm+ka)*9 + 5, vj_32yz); - atomicAdd(vj + (la*natm+ka)*9 + 6, vj_32zx); - atomicAdd(vj + (la*natm+ka)*9 + 7, vj_32zy); - atomicAdd(vj + (la*natm+ka)*9 + 8, vj_32zz); - } - - double vk_33xx = 0; - double vj_33xx = 0; - double vk_33xy = 0; - double vj_33xy = 0; - double vk_33xz = 0; - double vj_33xz = 0; - double vk_33yx = 0; - double vj_33yx = 0; - double vk_33yy = 0; - double vj_33yy = 0; - double vk_33yz = 0; - double vj_33yz = 0; - double vk_33zx = 0; - double vj_33zx = 0; - double vk_33zy = 0; - double vj_33zy = 0; - double vk_33zz = 0; - double vj_33zz = 0; - for (int klp = 0; klp < kprim*lprim; ++klp) { - int kp = klp / lprim; - int lp = klp % lprim; - double ak = expk[kp]; - double al = expl[lp]; - double akl = ak + al; - double al_akl = al / akl; - double xlxk = rl[0] - rk[0]; - double ylyk = rl[1] - rk[1]; - double zlzk = rl[2] - rk[2]; - double theta_kl = ak * al / akl; - double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); - double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; - double xqc = xlxk * al_akl; - double yqc = ylyk * al_akl; - double zqc = zlzk * al_akl; - double xkl = rk[0] + xqc; - double ykl = rk[1] + yqc; - double zkl = rk[2] + zqc; - for (int ijp = 0; ijp < iprim*jprim; ++ijp) { - int ip = ijp / jprim; - int jp = ijp % jprim; - double ai = expi[ip]; - double aj = expj[jp]; - double aij = ai + aj; - double *Rpa = Rpa_cicj + ijp * TILE2*4; - double cicj = Rpa[sh_ij+3*TILE2]; - double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); - double xpa = Rpa[sh_ij+0*TILE2]; - double ypa = Rpa[sh_ij+1*TILE2]; - double zpa = Rpa[sh_ij+2*TILE2]; - double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij - double yij = ri[1] + ypa; - double zij = ri[2] + zpa; - double xpq = xij - xkl; - double ypq = yij - ykl; - double zpq = zij - zkl; - double theta = aij * akl / (aij + akl); - double rr = xpq * xpq + ypq * ypq + zpq * zpq; - double theta_rr = theta * rr; - if (omega == 0) { - rys_roots(3, theta_rr, rw); - } else { - double theta_fac = omega * omega / (omega * omega + theta); - rys_roots(3, theta_fac*theta_rr, rw); - fac *= sqrt(theta_fac); - for (int irys = 0; irys < 3; ++irys) { - rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; - } - } - __syncthreads(); - if (task_id < ntasks) { - for (int irys = 0; irys < 3; ++irys) { - double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; - double rt = rw[sq_id + 2*irys *nsq_per_block]; - double rt_aa = rt / (aij + akl); - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[0*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[0*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double rt_akl = rt_aa * aij; - double cpx = xqc + xpq*rt_akl; - double rt_aij = rt_aa * akl; - double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; - double trr_10x = c0x * fac; - double b10 = .5/aij * (1 - rt_aij); - double trr_20x = c0x * trr_10x + 1*b10 * fac; - double b00 = .5 * rt_aa; - double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; - double b01 = .5/akl * (1 - rt_akl); - double trr_11x = cpx * trr_10x + 1*b00 * fac; - double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; - double hrr_2011x = trr_22x - xlxk * trr_21x; - double hrr_2001x = trr_21x - xlxk * trr_20x; - double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; - double trr_01x = cpx * fac; - double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; - double hrr_1011x = trr_12x - xlxk * trr_11x; - double hrr_1001x = trr_11x - xlxk * trr_10x; - double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; - double hrr_1102x = hrr_2002x - (rj[0] - ri[0]) * hrr_1002x; - double hrr_1100x = trr_20x - (rj[0] - ri[0]) * trr_10x; - g3 = al*2 * (al*2 * hrr_1102x - 1 * hrr_1100x); - prod = g3 * 1 * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - double hrr_1101x = hrr_2001x - (rj[0] - ri[0]) * hrr_1001x; - g1 = al*2 * hrr_1101x; - double cpy = yqc + ypq*rt_akl; - double trr_01y = cpy * 1; - double hrr_0001y = trr_01y - ylyk * 1; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1101x; - double cpz = zqc + zpq*rt_akl; - double trr_01z = cpz * wt; - double hrr_0001z = trr_01z - zlzk * wt; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_02y = cpy * trr_01y + 1*b01 * 1; - double hrr_0011y = trr_02y - ylyk * trr_01y; - double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * hrr_1100x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1101x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_02z = cpz * trr_01z + 1*b01 * wt; - double hrr_0011z = trr_02z - zlzk * trr_01z; - double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * hrr_1100x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[1*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[1*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - double trr_02x = cpx * trr_01x + 1*b01 * fac; - double hrr_0011x = trr_02x - xlxk * trr_01x; - double hrr_0001x = trr_01x - xlxk * fac; - double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; - double hrr_0102x = hrr_1002x - (rj[0] - ri[0]) * hrr_0002x; - double hrr_0100x = trr_10x - (rj[0] - ri[0]) * fac; - g3 = al*2 * (al*2 * hrr_0102x - 1 * hrr_0100x); - double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; - double trr_10y = c0y * 1; - prod = g3 * trr_10y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - double hrr_0101x = hrr_1001x - (rj[0] - ri[0]) * hrr_0001x; - g1 = al*2 * hrr_0101x; - double trr_11y = cpy * trr_10y + 1*b00 * 1; - double hrr_1001y = trr_11y - ylyk * trr_10y; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; - double hrr_1011y = trr_12y - ylyk * trr_11y; - double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; - g3 = al*2 * (al*2 * hrr_1002y - 1 * trr_10y); - prod = g3 * hrr_0100x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * hrr_0100x * trr_10y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[2*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[2*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0102x - 1 * hrr_0100x); - double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; - double trr_10z = c0z * wt; - prod = g3 * 1 * trr_10z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0101x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0101x; - double trr_11z = cpz * trr_10z + 1*b00 * wt; - double hrr_1001z = trr_11z - zlzk * trr_10z; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * trr_10z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * hrr_0100x * trr_10z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0101x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; - double hrr_1011z = trr_12z - zlzk * trr_11z; - double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; - g3 = al*2 * (al*2 * hrr_1002z - 1 * trr_10z); - prod = g3 * hrr_0100x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[3*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[3*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_1002x - 1 * trr_10x); - double hrr_0100y = trr_10y - (rj[1] - ri[1]) * 1; - prod = g3 * hrr_0100y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0101y = hrr_1001y - (rj[1] - ri[1]) * hrr_0001y; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_0100y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double hrr_0102y = hrr_1002y - (rj[1] - ri[1]) * hrr_0002y; - g3 = al*2 * (al*2 * hrr_0102y - 1 * hrr_0100y); - prod = g3 * trr_10x * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * trr_10x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * trr_10x * hrr_0100y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[4*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[4*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - double trr_20y = c0y * trr_10y + 1*b10 * 1; - double hrr_1100y = trr_20y - (rj[1] - ri[1]) * trr_10y; - prod = g3 * hrr_1100y * wt; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; - double hrr_2001y = trr_21y - ylyk * trr_20y; - double hrr_1101y = hrr_2001y - (rj[1] - ri[1]) * hrr_1001y; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * wt; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * hrr_1100y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * wt; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; - double hrr_2011y = trr_22y - ylyk * trr_21y; - double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; - double hrr_1102y = hrr_2002y - (rj[1] - ri[1]) * hrr_1002y; - g3 = al*2 * (al*2 * hrr_1102y - 1 * hrr_1100y); - prod = g3 * fac * wt; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1101y; - g2 = al*2 * hrr_0001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0001z; - g2 = al*2 * hrr_1101y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002z - 1 * wt); - prod = g3 * fac * hrr_1100y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[5*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[5*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * hrr_0100y * trr_10z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * trr_10z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * hrr_0100y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0102y - 1 * hrr_0100y); - prod = g3 * fac * trr_10z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0101y; - g2 = al*2 * hrr_1001z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1001z; - g2 = al*2 * hrr_0101y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_1002z - 1 * trr_10z); - prod = g3 * fac * hrr_0100y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[6*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[6*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_1002x - 1 * trr_10x); - double hrr_0100z = trr_10z - (rj[2] - ri[2]) * wt; - prod = g3 * 1 * hrr_0100z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_1001x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_0100z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_1001x; - double hrr_0101z = hrr_1001z - (rj[2] - ri[2]) * hrr_0001z; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * hrr_0100z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * trr_10x * hrr_0100z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10x; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = al*2 * hrr_1001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * trr_10x; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double hrr_0102z = hrr_1002z - (rj[2] - ri[2]) * hrr_0002z; - g3 = al*2 * (al*2 * hrr_0102z - 1 * hrr_0100z); - prod = g3 * trr_10x * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[7*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[7*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - prod = g3 * trr_10y * hrr_0100z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * hrr_0100z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * trr_10y; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_0100z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_1002y - 1 * trr_10y); - prod = g3 * fac * hrr_0100z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_1001y; - g2 = al*2 * hrr_0101z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * trr_10y; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_0101z; - g2 = al*2 * hrr_1001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0102z - 1 * hrr_0100z); - prod = g3 * fac * trr_10y; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - if (vk != NULL) { - dd_jk = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; - dd_jl = dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; - vk_dd = dd_jk + dd_jl; - if (jk.n_dm > 1) { - dd_jk = dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; - dd_jl = dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; - vk_dd += dd_jk + dd_jl; - } - } - if (vj != NULL) { - if (jk.n_dm == 1) { - vj_dd = dm_cache[8*TILE2+sh_ij] * dm[(l0+0)*nao+k0+0]; - } else { - vj_dd = dm_cache[8*TILE2+sh_ij] * (dm[(l0+0)*nao+k0+0] + dm[(nao+l0+0)*nao+k0+0]); - } - } - g3 = al*2 * (al*2 * hrr_0002x - 1 * fac); - double trr_20z = c0z * trr_10z + 1*b10 * wt; - double hrr_1100z = trr_20z - (rj[2] - ri[2]) * trr_10z; - prod = g3 * 1 * hrr_1100z; - vk_33xx += prod * vk_dd; - vj_33xx += prod * vj_dd; - g1 = al*2 * hrr_0001x; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * hrr_1100z; - vk_33xy += prod * vk_dd; - vj_33xy += prod * vj_dd; - g1 = al*2 * hrr_0001x; - double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; - double hrr_2001z = trr_21z - zlzk * trr_20z; - double hrr_1101z = hrr_2001z - (rj[2] - ri[2]) * hrr_1001z; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * 1; - vk_33xz += prod * vk_dd; - vj_33xz += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * hrr_1100z; - vk_33yx += prod * vk_dd; - vj_33yx += prod * vj_dd; - g3 = al*2 * (al*2 * hrr_0002y - 1 * 1); - prod = g3 * fac * hrr_1100z; - vk_33yy += prod * vk_dd; - vj_33yy += prod * vj_dd; - g1 = al*2 * hrr_0001y; - g2 = al*2 * hrr_1101z; - prod = g1 * g2 * fac; - vk_33yz += prod * vk_dd; - vj_33yz += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = al*2 * hrr_0001x; - prod = g1 * g2 * 1; - vk_33zx += prod * vk_dd; - vj_33zx += prod * vj_dd; - g1 = al*2 * hrr_1101z; - g2 = al*2 * hrr_0001y; - prod = g1 * g2 * fac; - vk_33zy += prod * vk_dd; - vj_33zy += prod * vj_dd; - double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; - double hrr_2011z = trr_22z - zlzk * trr_21z; - double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; - double hrr_1102z = hrr_2002z - (rj[2] - ri[2]) * hrr_1002z; - g3 = al*2 * (al*2 * hrr_1102z - 1 * hrr_1100z); - prod = g3 * fac * 1; - vk_33zz += prod * vk_dd; - vj_33zz += prod * vj_dd; - } - } - } - } - if (vk != NULL) { - atomicAdd(vk + (la*natm+la)*9 + 0, vk_33xx); - atomicAdd(vk + (la*natm+la)*9 + 1, vk_33xy); - atomicAdd(vk + (la*natm+la)*9 + 2, vk_33xz); - atomicAdd(vk + (la*natm+la)*9 + 3, vk_33yx); - atomicAdd(vk + (la*natm+la)*9 + 4, vk_33yy); - atomicAdd(vk + (la*natm+la)*9 + 5, vk_33yz); - atomicAdd(vk + (la*natm+la)*9 + 6, vk_33zx); - atomicAdd(vk + (la*natm+la)*9 + 7, vk_33zy); - atomicAdd(vk + (la*natm+la)*9 + 8, vk_33zz); - } - if (vj != NULL) { - atomicAdd(vj + (la*natm+la)*9 + 0, vj_33xx); - atomicAdd(vj + (la*natm+la)*9 + 1, vj_33xy); - atomicAdd(vj + (la*natm+la)*9 + 2, vj_33xz); - atomicAdd(vj + (la*natm+la)*9 + 3, vj_33yx); - atomicAdd(vj + (la*natm+la)*9 + 4, vj_33yy); - atomicAdd(vj + (la*natm+la)*9 + 5, vj_33yz); - atomicAdd(vj + (la*natm+la)*9 + 6, vj_33zx); - atomicAdd(vj + (la*natm+la)*9 + 7, vj_33zy); - atomicAdd(vj + (la*natm+la)*9 + 8, vj_33zz); - } - } -} -__global__ -void rys_ejk_ip2_1100(RysIntEnvVars envs, JKMatrix jk, BoundsInfo bounds, - ShellQuartet *pool, uint32_t *batch_head) -{ - int b_id = blockIdx.x; - int t_id = threadIdx.x + blockDim.x * threadIdx.y; - ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; - __shared__ int batch_id; - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - } - __syncthreads(); - int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; - int nbatches = bounds.ntile_ij_pairs * nbatches_kl; - while (batch_id < nbatches) { - int batch_ij = batch_id / nbatches_kl; - int batch_kl = batch_id % nbatches_kl; - int nbas = envs.nbas; - int ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, - batch_ij, batch_kl); - if (ntasks > 0) { - int tile_ij = bounds.tile_ij_mapping[batch_ij]; - int nbas_tiles = nbas / TILE; - int tile_i = tile_ij / nbas_tiles; - int tile_j = tile_ij % nbas_tiles; - int ish0 = tile_i * TILE; - int jsh0 = tile_j * TILE; - _rys_ejk_ip2_1100(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); - } - if (t_id == 0) { - batch_id = atomicAdd(batch_head, 1); - atomicAdd(batch_head+1, ntasks); - } - __syncthreads(); - } -} - -int rys_ejk_ip2_unrolled(RysIntEnvVars *envs, JKMatrix *jk, BoundsInfo *bounds, - ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers) -{ - int li = bounds->li; - int lj = bounds->lj; - int lk = bounds->lk; - int ll = bounds->ll; - int threads = scheme[0] * scheme[1]; - int nroots = (li + lj + lk + ll + 2) / 2 + 1; - int iprim = bounds->iprim; - int jprim = bounds->jprim; - int ij_prims = iprim * jprim; - int nfi = (li + 1) * (li + 2) / 2; - int nfj = (lj + 1) * (lj + 2) / 2; - int buflen = nroots*2 * threads + nfi*nfj*TILE2 + ij_prims*TILE2*4; - int ijkl = li*125 + lj*25 + lk*5 + ll; - switch (ijkl) { - case 0: rys_ejk_ip2_0000<<>>(*envs, *jk, *bounds, pool, batch_head); break; - case 125: rys_ejk_ip2_1000<<>>(*envs, *jk, *bounds, pool, batch_head); break; - case 130: rys_ejk_ip2_1010<<>>(*envs, *jk, *bounds, pool, batch_head); break; - case 150: rys_ejk_ip2_1100<<>>(*envs, *jk, *bounds, pool, batch_head); break; - default: return 0; - } - return 1; -} diff --git a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type12.cu b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type12.cu new file mode 100644 index 00000000..6c14c0a3 --- /dev/null +++ b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type12.cu @@ -0,0 +1,12900 @@ +#include "vhf.cuh" +#include "rys_roots_unrolled.cu" +#include "create_tasks_ip1.cu" +int rys_ejk_ip2_type12_unrolled_lmax = 1; +int rys_ejk_ip2_type12_unrolled_max_order = 3; + + +__device__ static +void _rys_ejk_ip2_type12_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(2, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double hrr_0100x = trr_10x - xjxi * fac; + g1x = aj2 * hrr_0100x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + g3x = ai2 * hrr_1100x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double trr_01x = cpx * fac; + double hrr_0001x = trr_01x - xlxk * fac; + g1x = al2 * hrr_0001x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + double b01 = .5/akl * (1 - rt_akl); + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + g3x = ak2 * hrr_0011x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_0000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type12_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(2, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + g1x = aj2 * hrr_1100x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + g2x -= 1 * fac; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double hrr_2100x = trr_30x - xjxi * trr_20x; + g3x = ai2 * hrr_2100x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + double hrr_0100x = trr_10x - xjxi * fac; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1001x = trr_11x - xlxk * trr_10x; + g1x = al2 * hrr_1001x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + g3x = ak2 * hrr_1011x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_10z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1100x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double hrr_0001x = trr_01x - xlxk * fac; + g1x = al2 * hrr_0001x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_01z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + g3x = ak2 * hrr_0011x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1100y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_11z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0011y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + g3z = ak2 * hrr_1011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_1000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type12_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double hrr_1110x = trr_21x - xjxi * trr_11x; + g1x = aj2 * hrr_1110x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_21x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + double trr_01x = cpx * fac; + g2x -= 1 * trr_01x; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + g3x = ai2 * hrr_2110x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + double hrr_0110x = trr_11x - xjxi * trr_01x; + g3x -= 1 * hrr_0110x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_31x - 3 * trr_11x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + g3x = aj2 * (aj2 * hrr_1210x - 1 * trr_11x); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_10z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1110x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_1100y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + double hrr_1100x = trr_20x - xjxi * trr_10x; + g1x = aj2 * hrr_1100x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_0110y = trr_11y - yjyi * trr_01y; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_10z; + g2x -= 1 * fac; + double hrr_2100x = trr_30x - xjxi * trr_20x; + g3x = ai2 * hrr_2100x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_1110y = trr_21y - yjyi * trr_11y; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_1100z; + double hrr_0100x = trr_10x - xjxi * fac; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1110y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_21y; + g2z = ai2 * trr_10z; + g2y -= 1 * trr_01y; + g3x = ai2 * hrr_1100x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_2110y = trr_31y - yjyi * trr_21y; + g3y = ai2 * hrr_2110y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0110y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_31y - 3 * trr_11y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + g3y = aj2 * (aj2 * hrr_1210y - 1 * trr_11y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0100y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_0110z = trr_11z - zjzi * trr_01z; + g1z = aj2 * hrr_0110z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_11z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1100y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_1110z = trr_21z - zjzi * trr_11z; + g3z = ai2 * hrr_1110z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0110z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_11z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1110z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1110z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_21z; + g2z -= 1 * trr_01z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1100y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_2110z = trr_31z - zjzi * trr_21z; + g3z = ai2 * hrr_2110z; + g3z -= 1 * hrr_0110z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_31z - 3 * trr_11z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + g3z = aj2 * (aj2 * hrr_1210z - 1 * trr_11z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + g1x = al2 * hrr_1011x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_12x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + g2x -= 1 * trr_10x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; + double hrr_1021x = trr_13x - xlxk * trr_12x; + g3x = ak2 * hrr_1021x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + double hrr_1001x = trr_11x - xlxk * trr_10x; + g3x -= 1 * hrr_1001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_13x - 3 * trr_11x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; + g3x = al2 * (al2 * hrr_1012x - 1 * trr_11x); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + double hrr_0011x = trr_02x - xlxk * trr_01x; + g1x = al2 * hrr_0011x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_01z; + g2x -= 1 * fac; + double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; + double hrr_0021x = trr_03x - xlxk * trr_02x; + g3x = ak2 * hrr_0021x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0011z; + double hrr_0001x = trr_01x - xlxk * fac; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_0001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_11z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_0011y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + g3z = ak2 * hrr_1011z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_01z; + g2y -= 1 * 1; + g3x = ak2 * hrr_1011x; + double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; + double hrr_0021y = trr_03y - ylyk * trr_02y; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1011y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_12y; + g2z = ak2 * trr_01z; + g2y -= 1 * trr_10y; + g3x = ak2 * hrr_0011x; + double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; + double hrr_1021y = trr_13y - ylyk * trr_12y; + g3y = ak2 * hrr_1021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_1001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_13y - 3 * trr_11y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; + g3y = al2 * (al2 * hrr_1012y - 1 * trr_11y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_11z; + g2y -= 1 * 1; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_1011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0011z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0011y; + double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; + double hrr_0021z = trr_03z - zlzk * trr_02z; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0011z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1011z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_12z; + g2z -= 1 * trr_10z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0011y; + double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; + double hrr_1021z = trr_13z - zlzk * trr_12z; + g3z = ak2 * hrr_1021z; + g3z -= 1 * hrr_1001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_13z - 3 * trr_11z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; + g3z = al2 * (al2 * hrr_1012z - 1 * trr_11z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_1010(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type12_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1011x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1011x * Iy; + prod_xz = hrr_1011x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + g1x = aj2 * hrr_1111x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g2x = ai2 * hrr_2011x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + g2x -= 1 * hrr_0011x; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; + double hrr_3011x = trr_32x - xlxk * trr_31x; + double hrr_2111x = hrr_3011x - xjxi * hrr_2011x; + g3x = ai2 * hrr_2111x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + g3x -= 1 * hrr_0111x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3011x - 3 * hrr_1011x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1211x = hrr_2111x - xjxi * hrr_1111x; + g3x = aj2 * (aj2 * hrr_1211x - 1 * hrr_1011x); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0111x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * hrr_1011x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_10z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1111x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2011x - 1 * hrr_0011x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0211x = hrr_1111x - xjxi * hrr_0111x; + g3x = aj2 * (aj2 * hrr_0211x - 1 * hrr_0011x); + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0111x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * hrr_1011x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1111x; + g3y = ai2 * hrr_1100y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2011x - 1 * hrr_0011x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0211x - 1 * hrr_0011x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = trr_01y * Iz; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + g1x = aj2 * hrr_1101x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_0110y = trr_11y - yjyi * trr_01y; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * hrr_2001x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_10z; + double hrr_0001x = trr_01x - xlxk * fac; + g2x -= 1 * hrr_0001x; + double hrr_3001x = trr_31x - xlxk * trr_30x; + double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; + g3x = ai2 * hrr_2101x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_1110y = trr_21y - yjyi * trr_11y; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_1100z; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + g3x -= 1 * hrr_0101x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3001x - 3 * hrr_1001x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1201x = hrr_2101x - xjxi * hrr_1101x; + g3x = aj2 * (aj2 * hrr_1201x - 1 * hrr_1001x); + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_11y * Iz; + g1x = aj2 * hrr_0101x; + g1y = aj2 * hrr_1110y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * hrr_1001x; + g2y = ai2 * trr_21y; + g2z = ai2 * trr_10z; + g2y -= 1 * trr_01y; + g3x = ai2 * hrr_1101x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_2110y = trr_31y - yjyi * trr_21y; + g3y = ai2 * hrr_2110y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0110y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2001x - 1 * hrr_0001x); + g3y = ai2 * (ai2 * trr_31y - 3 * trr_11y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0201x = hrr_1101x - xjxi * hrr_0101x; + g3x = aj2 * (aj2 * hrr_0201x - 1 * hrr_0001x); + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + g3y = aj2 * (aj2 * hrr_1210y - 1 * trr_11y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_0101x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * hrr_1001x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1101x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2001x - 1 * hrr_0001x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0201x - 1 * hrr_0001x); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1101x; + g1y = aj2 * hrr_0100y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_0110z = trr_11z - zjzi * trr_01z; + g1z = aj2 * hrr_0110z; + g2x = ai2 * hrr_2001x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_11z; + g2x -= 1 * hrr_0001x; + g3x = ai2 * hrr_2101x; + g3y = ai2 * hrr_1100y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_1110z = trr_21z - zjzi * trr_11z; + g3z = ai2 * hrr_1110z; + g3x -= 1 * hrr_0101x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3001x - 3 * hrr_1001x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1201x - 1 * hrr_1001x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0101x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0110z; + g2x = ai2 * hrr_1001x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_11z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1101x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1110z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2001x - 1 * hrr_0001x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0201x - 1 * hrr_0001x); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0101x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1110z; + g2x = ai2 * hrr_1001x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_21z; + g2z -= 1 * trr_01z; + g3x = ai2 * hrr_1101x; + g3y = ai2 * hrr_1100y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_2110z = trr_31z - zjzi * trr_21z; + g3z = ai2 * hrr_2110z; + g3z -= 1 * hrr_0110z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2001x - 1 * hrr_0001x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_31z - 3 * trr_11z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0201x - 1 * hrr_0001x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + g3z = aj2 * (aj2 * hrr_1210z - 1 * trr_11z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double hrr_0001y = trr_01y - ylyk * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0001y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0001y * Iz; + double hrr_1110x = trr_21x - xjxi * trr_11x; + g1x = aj2 * hrr_1110x; + double hrr_1001y = trr_11y - ylyk * trr_10y; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + g1y = aj2 * hrr_0101y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_21x; + g2y = ai2 * hrr_1001y; + g2z = ai2 * trr_10z; + g2x -= 1 * trr_01x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + g3x = ai2 * hrr_2110x; + double hrr_2001y = trr_21y - ylyk * trr_20y; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + g3y = ai2 * hrr_1101y; + g3z = ai2 * hrr_1100z; + double hrr_0110x = trr_11x - xjxi * trr_01x; + g3x -= 1 * hrr_0110x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_31x - 3 * trr_11x); + g3y = ai2 * (ai2 * hrr_2001y - 1 * hrr_0001y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + g3x = aj2 * (aj2 * hrr_1210x - 1 * trr_11x); + double hrr_0201y = hrr_1101y - yjyi * hrr_0101y; + g3y = aj2 * (aj2 * hrr_0201y - 1 * hrr_0001y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1001y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1001y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_1101y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_11x; + g2y = ai2 * hrr_2001y; + g2z = ai2 * trr_10z; + g2y -= 1 * hrr_0001y; + g3x = ai2 * hrr_1110x; + double hrr_3001y = trr_31y - ylyk * trr_30y; + double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; + g3y = ai2 * hrr_2101y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0101y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * hrr_3001y - 3 * hrr_1001y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + double hrr_1201y = hrr_2101y - yjyi * hrr_1101y; + g3y = aj2 * (aj2 * hrr_1201y - 1 * hrr_1001y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0001y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0001y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_0101y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * trr_11x; + g2y = ai2 * hrr_1001y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_1101y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * hrr_2001y - 1 * hrr_0001y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_0201y - 1 * hrr_0001y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0011y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0011y * Iz; + double hrr_1100x = trr_20x - xjxi * trr_10x; + g1x = aj2 * hrr_1100x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + g1y = aj2 * hrr_0111y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_20x; + g2y = ai2 * hrr_1011y; + g2z = ai2 * trr_10z; + g2x -= 1 * fac; + double hrr_2100x = trr_30x - xjxi * trr_20x; + g3x = ai2 * hrr_2100x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + g3y = ai2 * hrr_1111y; + g3z = ai2 * hrr_1100z; + double hrr_0100x = trr_10x - xjxi * fac; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * hrr_2011y - 1 * hrr_0011y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + double hrr_0211y = hrr_1111y - yjyi * hrr_0111y; + g3y = aj2 * (aj2 * hrr_0211y - 1 * hrr_0011y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1011y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1011y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1111y; + g1z = aj2 * hrr_0100z; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_2011y; + g2z = ai2 * trr_10z; + g2y -= 1 * hrr_0011y; + g3x = ai2 * hrr_1100x; + double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; + double hrr_3011y = trr_32y - ylyk * trr_31y; + double hrr_2111y = hrr_3011y - yjyi * hrr_2011y; + g3y = ai2 * hrr_2111y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0111y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_3011y - 3 * hrr_1011y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_1211y = hrr_2111y - yjyi * hrr_1111y; + g3y = aj2 * (aj2 * hrr_1211y - 1 * hrr_1011y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0011y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0011y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0111y; + g1z = aj2 * hrr_1100z; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_1011y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1111y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_2011y - 1 * hrr_0011y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0211y - 1 * hrr_0011y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0001y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0001y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0101y; + g1z = aj2 * hrr_0110z; + g2x = ai2 * trr_20x; + g2y = ai2 * hrr_1001y; + g2z = ai2 * trr_11z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1101y; + g3z = ai2 * hrr_1110z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * hrr_2001y - 1 * hrr_0001y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0201y - 1 * hrr_0001y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1001y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1001y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1101y; + g1z = aj2 * hrr_0110z; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_2001y; + g2z = ai2 * trr_11z; + g2y -= 1 * hrr_0001y; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2101y; + g3z = ai2 * hrr_1110z; + g3y -= 1 * hrr_0101y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_3001y - 3 * hrr_1001y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1201y - 1 * hrr_1001y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0001y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0001y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0101y; + g1z = aj2 * hrr_1110z; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_1001y; + g2z = ai2 * trr_21z; + g2z -= 1 * trr_01z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1101y; + g3z = ai2 * hrr_2110z; + g3z -= 1 * hrr_0110z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_2001y - 1 * hrr_0001y); + g3z = ai2 * (ai2 * trr_31z - 3 * trr_11z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0201y - 1 * hrr_0001y); + g3z = aj2 * (aj2 * hrr_1210z - 1 * trr_11z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double hrr_0001z = trr_01z - zlzk * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1110x; + g1y = aj2 * hrr_0100y; + double hrr_1001z = trr_11z - zlzk * trr_10z; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + g1z = aj2 * hrr_0101z; + g2x = ai2 * trr_21x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_1001z; + g2x -= 1 * trr_01x; + g3x = ai2 * hrr_2110x; + g3y = ai2 * hrr_1100y; + double hrr_2001z = trr_21z - zlzk * trr_20z; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + g3z = ai2 * hrr_1101z; + g3x -= 1 * hrr_0110x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_31x - 3 * trr_11x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_2001z - 1 * hrr_0001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1210x - 1 * trr_11x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0201z = hrr_1101z - zjzi * hrr_0101z; + g3z = aj2 * (aj2 * hrr_0201z - 1 * hrr_0001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0101z; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_20y; + g2z = ai2 * hrr_1001z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1101z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * hrr_2001z - 1 * hrr_0001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0201z - 1 * hrr_0001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1101z; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_2001z; + g2z -= 1 * hrr_0001z; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_1100y; + double hrr_3001z = trr_31z - zlzk * trr_30z; + double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; + g3z = ai2 * hrr_2101z; + g3z -= 1 * hrr_0101z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_3001z - 3 * hrr_1001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1201z = hrr_2101z - zjzi * hrr_1101z; + g3z = aj2 * (aj2 * hrr_1201z - 1 * hrr_1001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_0101z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_11y; + g2z = ai2 * hrr_1001z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_1101z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * hrr_2001z - 1 * hrr_0001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_0201z - 1 * hrr_0001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1110y; + g1z = aj2 * hrr_0101z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_21y; + g2z = ai2 * hrr_1001z; + g2y -= 1 * trr_01y; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2110y; + g3z = ai2 * hrr_1101z; + g3y -= 1 * hrr_0110y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_31y - 3 * trr_11y); + g3z = ai2 * (ai2 * hrr_2001z - 1 * hrr_0001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1210y - 1 * trr_11y); + g3z = aj2 * (aj2 * hrr_0201z - 1 * hrr_0001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_1101z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_11y; + g2z = ai2 * hrr_2001z; + g2z -= 1 * hrr_0001z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_2101z; + g3z -= 1 * hrr_0101z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * hrr_3001z - 3 * hrr_1001z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_1201z - 1 * hrr_1001z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0011z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0100y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + g1z = aj2 * hrr_0111z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_1011z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1100y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + g3z = ai2 * hrr_1111z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_2011z - 1 * hrr_0011z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0211z = hrr_1111z - zjzi * hrr_0111z; + g3z = aj2 * (aj2 * hrr_0211z - 1 * hrr_0011z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0111z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_20y; + g2z = ai2 * hrr_1011z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1111z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * hrr_2011z - 1 * hrr_0011z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0211z - 1 * hrr_0011z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1111z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_2011z; + g2z -= 1 * hrr_0011z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1100y; + double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; + double hrr_3011z = trr_32z - zlzk * trr_31z; + double hrr_2111z = hrr_3011z - zjzi * hrr_2011z; + g3z = ai2 * hrr_2111z; + g3z -= 1 * hrr_0111z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_3011z - 3 * hrr_1011z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1211z = hrr_2111z - zjzi * hrr_1111z; + g3z = aj2 * (aj2 * hrr_1211z - 1 * hrr_1011z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1011x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1011x * Iy; + prod_xz = hrr_1011x * Iz; + prod_yz = 1 * Iz; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; + double hrr_1021x = trr_13x - xlxk * trr_12x; + double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; + g1x = al2 * hrr_1012x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + g1x -= 1 * trr_11x; + g2x = ak2 * hrr_1021x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + double hrr_1001x = trr_11x - xlxk * trr_10x; + g2x -= 1 * hrr_1001x; + double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; + double trr_14x = cpx * trr_13x + 3*b01 * trr_12x + 1*b00 * trr_03x; + double hrr_1031x = trr_14x - xlxk * trr_13x; + double hrr_1022x = hrr_1031x - xlxk * hrr_1021x; + g3x = ak2 * hrr_1022x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + g3x -= 1 * hrr_1002x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_12x - 1 * trr_10x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_1031x - 3 * hrr_1011x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_1013x = hrr_1022x - xlxk * hrr_1012x; + g3x = al2 * (al2 * hrr_1013x - 3 * hrr_1011x); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0011x = trr_02x - xlxk * trr_01x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = trr_10y * Iz; + double hrr_0021x = trr_03x - xlxk * trr_02x; + double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; + g1x = al2 * hrr_0012x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0001z; + g1x -= 1 * trr_01x; + g2x = ak2 * hrr_0021x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_01z; + double hrr_0001x = trr_01x - xlxk * fac; + g2x -= 1 * hrr_0001x; + double trr_04x = cpx * trr_03x + 3*b01 * trr_02x; + double hrr_0031x = trr_04x - xlxk * trr_03x; + double hrr_0022x = hrr_0031x - xlxk * hrr_0021x; + g3x = ak2 * hrr_0022x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0011z; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + g3x -= 1 * hrr_0002x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_02x - 1 * fac); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0031x - 3 * hrr_0011x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0013x = hrr_0022x - xlxk * hrr_0012x; + g3x = al2 * (al2 * hrr_0013x - 3 * hrr_0011x); + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0012x; + g1y = al2 * hrr_0001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + g1z = al2 * hrr_1001z; + g1x -= 1 * trr_01x; + g2x = ak2 * hrr_0021x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_11z; + g2x -= 1 * hrr_0001x; + g3x = ak2 * hrr_0022x; + g3y = ak2 * hrr_0011y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + g3z = ak2 * hrr_1011z; + g3x -= 1 * hrr_0002x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_02x - 1 * fac); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0031x - 3 * hrr_0011x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0013x - 3 * hrr_0011x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_1002x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_0001z; + g1x -= 1 * trr_10x; + g2x = ak2 * hrr_1011x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_01z; + g2y -= 1 * 1; + g3x = ak2 * hrr_1012x; + double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; + double hrr_0021y = trr_03y - ylyk * trr_02y; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_11x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_1021x - 1 * hrr_1001x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_1003x = hrr_1012x - xlxk * hrr_1002x; + g3x = al2 * (al2 * hrr_1003x - 3 * hrr_1001x); + double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_11y * Iz; + g1x = al2 * hrr_0002x; + g1y = al2 * hrr_1011y; + g1z = al2 * hrr_0001z; + g1x -= 1 * fac; + g2x = ak2 * hrr_0011x; + g2y = ak2 * trr_12y; + g2z = ak2 * trr_01z; + g2y -= 1 * trr_10y; + g3x = ak2 * hrr_0012x; + double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; + double hrr_1021y = trr_13y - ylyk * trr_12y; + g3y = ak2 * hrr_1021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_1001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_01x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0021x - 1 * hrr_0001x); + g3y = ak2 * (ak2 * trr_13y - 3 * trr_11y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0003x = hrr_0012x - xlxk * hrr_0002x; + g3x = al2 * (al2 * hrr_0003x - 3 * hrr_0001x); + double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; + g3y = al2 * (al2 * hrr_1012y - 1 * trr_11y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_0002x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_1001z; + g1x -= 1 * fac; + g2x = ak2 * hrr_0011x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_11z; + g2y -= 1 * 1; + g3x = ak2 * hrr_0012x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_1011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_01x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0021x - 1 * hrr_0001x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0003x - 3 * hrr_0001x); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1002x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0011z; + g1x -= 1 * trr_10x; + g2x = ak2 * hrr_1011x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_1012x; + g3y = ak2 * hrr_0011y; + double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; + double hrr_0021z = trr_03z - zlzk * trr_02z; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_11x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_1021x - 1 * hrr_1001x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1003x - 3 * hrr_1001x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0002x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0011z; + g1x -= 1 * fac; + g2x = ak2 * hrr_0011x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_0012x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_01x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0021x - 1 * hrr_0001x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0003x - 3 * hrr_0001x); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0002x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1011z; + g1x -= 1 * fac; + g2x = ak2 * hrr_0011x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_12z; + g2z -= 1 * trr_10z; + g3x = ak2 * hrr_0012x; + g3y = ak2 * hrr_0011y; + double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; + double hrr_1021z = trr_13z - zlzk * trr_12z; + g3z = ak2 * hrr_1021z; + g3z -= 1 * hrr_1001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3x -= 1 * (ak2 * trr_01x); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0021x - 1 * hrr_0001x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_13z - 3 * trr_11z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0003x - 3 * hrr_0001x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; + g3z = al2 * (al2 * hrr_1012z - 1 * trr_11z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0001y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0001y * Iz; + g1x = al2 * hrr_1011x; + g1y = al2 * hrr_0002y; + g1z = al2 * hrr_0001z; + g1y -= 1 * 1; + g2x = ak2 * trr_12x; + g2y = ak2 * hrr_0011y; + g2z = ak2 * trr_01z; + g2x -= 1 * trr_10x; + g3x = ak2 * hrr_1021x; + g3y = ak2 * hrr_0012y; + g3z = ak2 * hrr_0011z; + g3x -= 1 * hrr_1001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_01y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_13x - 3 * trr_11x); + g3y = ak2 * (ak2 * hrr_0021y - 1 * hrr_0001y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1012x - 1 * trr_11x); + double hrr_0003y = hrr_0012y - ylyk * hrr_0002y; + g3y = al2 * (al2 * hrr_0003y - 3 * hrr_0001y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1001y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1001y * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_1002y; + g1z = al2 * hrr_0001z; + g1y -= 1 * trr_10y; + g2x = ak2 * trr_02x; + g2y = ak2 * hrr_1011y; + g2z = ak2 * trr_01z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_1012y; + g3z = ak2 * hrr_0011z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_11y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * hrr_1021y - 1 * hrr_1001y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + double hrr_1003y = hrr_1012y - ylyk * hrr_1002y; + g3y = al2 * (al2 * hrr_1003y - 3 * hrr_1001y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0001y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0001y * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_0002y; + g1z = al2 * hrr_1001z; + g1y -= 1 * 1; + g2x = ak2 * trr_02x; + g2y = ak2 * hrr_0011y; + g2z = ak2 * trr_11z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_0012y; + g3z = ak2 * hrr_1011z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_01y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * hrr_0021y - 1 * hrr_0001y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_0003y - 3 * hrr_0001y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0011y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0011y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0012y; + g1z = al2 * hrr_0001z; + g1y -= 1 * trr_01y; + g2x = ak2 * trr_11x; + g2y = ak2 * hrr_0021y; + g2z = ak2 * trr_01z; + g2y -= 1 * hrr_0001y; + g3x = ak2 * hrr_1011x; + double trr_04y = cpy * trr_03y + 3*b01 * trr_02y; + double hrr_0031y = trr_04y - ylyk * trr_03y; + double hrr_0022y = hrr_0031y - ylyk * hrr_0021y; + g3y = ak2 * hrr_0022y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_0002y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_02y - 1 * 1); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * hrr_0031y - 3 * hrr_0011y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + double hrr_0013y = hrr_0022y - ylyk * hrr_0012y; + g3y = al2 * (al2 * hrr_0013y - 3 * hrr_0011y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1011y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1011y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1012y; + g1z = al2 * hrr_0001z; + g1y -= 1 * trr_11y; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_1021y; + g2z = ak2 * trr_01z; + g2y -= 1 * hrr_1001y; + g3x = ak2 * hrr_0011x; + double trr_14y = cpy * trr_13y + 3*b01 * trr_12y + 1*b00 * trr_03y; + double hrr_1031y = trr_14y - ylyk * trr_13y; + double hrr_1022y = hrr_1031y - ylyk * hrr_1021y; + g3y = ak2 * hrr_1022y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_1002y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_12y - 1 * trr_10y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_1031y - 3 * hrr_1011y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_1013y = hrr_1022y - ylyk * hrr_1012y; + g3y = al2 * (al2 * hrr_1013y - 3 * hrr_1011y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0011y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0011y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0012y; + g1z = al2 * hrr_1001z; + g1y -= 1 * trr_01y; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_0021y; + g2z = ak2 * trr_11z; + g2y -= 1 * hrr_0001y; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0022y; + g3z = ak2 * hrr_1011z; + g3y -= 1 * hrr_0002y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_02y - 1 * 1); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_0031y - 3 * hrr_0011y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0013y - 3 * hrr_0011y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0001y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0001y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0002y; + g1z = al2 * hrr_0011z; + g1y -= 1 * 1; + g2x = ak2 * trr_11x; + g2y = ak2 * hrr_0011y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0012y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_01y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * hrr_0021y - 1 * hrr_0001y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0003y - 3 * hrr_0001y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1001y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1001y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1002y; + g1z = al2 * hrr_0011z; + g1y -= 1 * trr_10y; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_1011y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1012y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_11y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_1021y - 1 * hrr_1001y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1003y - 3 * hrr_1001y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0001y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0001y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0002y; + g1z = al2 * hrr_1011z; + g1y -= 1 * 1; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_0011y; + g2z = ak2 * trr_12z; + g2z -= 1 * trr_10z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0012y; + g3z = ak2 * hrr_1021z; + g3z -= 1 * hrr_1001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3y -= 1 * (ak2 * trr_01y); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_0021y - 1 * hrr_0001y); + g3z = ak2 * (ak2 * trr_13z - 3 * trr_11z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0003y - 3 * hrr_0001y); + g3z = al2 * (al2 * hrr_1012z - 1 * trr_11z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1011x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0002z; + g1z -= 1 * wt; + g2x = ak2 * trr_12x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_0011z; + g2x -= 1 * trr_10x; + g3x = ak2 * hrr_1021x; + g3y = ak2 * hrr_0011y; + g3z = ak2 * hrr_0012z; + g3x -= 1 * hrr_1001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_01z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_13x - 3 * trr_11x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * hrr_0021z - 1 * hrr_0001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1012x - 1 * trr_11x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0003z = hrr_0012z - zlzk * hrr_0002z; + g3z = al2 * (al2 * hrr_0003z - 3 * hrr_0001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0002z; + g1z -= 1 * wt; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_11y; + g2z = ak2 * hrr_0011z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0012z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_01z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * hrr_0021z - 1 * hrr_0001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0003z - 3 * hrr_0001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1002z; + g1z -= 1 * trr_10z; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_1011z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_0011y; + g3z = ak2 * hrr_1012z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_11z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * hrr_1021z - 1 * hrr_1001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1003z = hrr_1012z - zlzk * hrr_1002z; + g3z = al2 * (al2 * hrr_1003z - 3 * hrr_1001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_0002z; + g1z -= 1 * wt; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_02y; + g2z = ak2 * hrr_0011z; + g2y -= 1 * 1; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_0012z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_01z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * hrr_0021z - 1 * hrr_0001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_0003z - 3 * hrr_0001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1011y; + g1z = al2 * hrr_0002z; + g1z -= 1 * wt; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_12y; + g2z = ak2 * hrr_0011z; + g2y -= 1 * trr_10y; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1021y; + g3z = ak2 * hrr_0012z; + g3y -= 1 * hrr_1001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_01z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_13y - 3 * trr_11y); + g3z = ak2 * (ak2 * hrr_0021z - 1 * hrr_0001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1012y - 1 * trr_11y); + g3z = al2 * (al2 * hrr_0003z - 3 * hrr_0001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_1002z; + g1z -= 1 * trr_10z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_02y; + g2z = ak2 * hrr_1011z; + g2y -= 1 * 1; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_1012z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_11z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * hrr_1021z - 1 * hrr_1001z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_1003z - 3 * hrr_1001z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0011z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0012z; + g1z -= 1 * trr_01z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_0021z; + g2z -= 1 * hrr_0001z; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0011y; + double trr_04z = cpz * trr_03z + 3*b01 * trr_02z; + double hrr_0031z = trr_04z - zlzk * trr_03z; + double hrr_0022z = hrr_0031z - zlzk * hrr_0021z; + g3z = ak2 * hrr_0022z; + g3z -= 1 * hrr_0002z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_02z - 1 * wt); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * hrr_0031z - 3 * hrr_0011z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0013z = hrr_0022z - zlzk * hrr_0012z; + g3z = al2 * (al2 * hrr_0013z - 3 * hrr_0011z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0012z; + g1z -= 1 * trr_01z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_11y; + g2z = ak2 * hrr_0021z; + g2z -= 1 * hrr_0001z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0022z; + g3z -= 1 * hrr_0002z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_02z - 1 * wt); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * hrr_0031z - 3 * hrr_0011z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0013z - 3 * hrr_0011z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1012z; + g1z -= 1 * trr_11z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_1021z; + g2z -= 1 * hrr_1001z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0011y; + double trr_14z = cpz * trr_13z + 3*b01 * trr_12z + 1*b00 * trr_03z; + double hrr_1031z = trr_14z - zlzk * trr_13z; + double hrr_1022z = hrr_1031z - zlzk * hrr_1021z; + g3z = ak2 * hrr_1022z; + g3z -= 1 * hrr_1002z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + g3z -= 1 * (ak2 * trr_12z - 1 * trr_10z); + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * hrr_1031z - 3 * hrr_1011z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1013z = hrr_1022z - zlzk * hrr_1012z; + g3z = al2 * (al2 * hrr_1013z - 3 * hrr_1011z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_1011(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type12_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double hrr_2100x = trr_30x - xjxi * trr_20x; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + g1x = aj2 * hrr_1200x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g1x -= 1 * trr_10x; + g2x = ai2 * hrr_2100x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + double hrr_0100x = trr_10x - xjxi * fac; + g2x -= 1 * hrr_0100x; + double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; + double hrr_3100x = trr_40x - xjxi * trr_30x; + double hrr_2200x = hrr_3100x - xjxi * hrr_2100x; + g3x = ai2 * hrr_2200x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x -= 1 * hrr_0200x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_20x - 1 * fac); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3100x - 3 * hrr_1100x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1300x = hrr_2200x - xjxi * hrr_1200x; + g3x = aj2 * (aj2 * hrr_1300x - 3 * hrr_1100x); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0100z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_10z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1200x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0300x = hrr_1200x - xjxi * hrr_0200x; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1100z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1200x; + g3y = ai2 * hrr_1100y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * 1; + g2x = ai2 * trr_20x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_10z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_1100z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + double hrr_0300y = hrr_1200y - yjyi * hrr_0200y; + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1200y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * trr_10y; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_2100y; + g2z = ai2 * trr_10z; + g2y -= 1 * hrr_0100y; + g3x = ai2 * hrr_1100x; + double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; + double hrr_3100y = trr_40y - yjyi * trr_30y; + double hrr_2200y = hrr_3100y - yjyi * hrr_2100y; + g3y = ai2 * hrr_2200y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0200y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_20y - 1 * 1); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_3100y - 3 * hrr_1100y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_1300y = hrr_2200y - yjyi * hrr_1200y; + g3y = aj2 * (aj2 * hrr_1300y - 3 * hrr_1100y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_1100z; + g1y -= 1 * 1; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_1100z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1100y; + g3z = ai2 * hrr_1200z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0300z = hrr_1200z - zjzi * hrr_0200z; + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_20y; + g2z = ai2 * hrr_1100z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1200z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1200z; + g1z -= 1 * trr_10z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_2100z; + g2z -= 1 * hrr_0100z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1100y; + double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; + double hrr_3100z = trr_40z - zjzi * trr_30z; + double hrr_2200z = hrr_3100z - zjzi * hrr_2100z; + g3z = ai2 * hrr_2200z; + g3z -= 1 * hrr_0200z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_20z - 1 * wt); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_3100z - 3 * hrr_1100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1300z = hrr_2200z - zjzi * hrr_1200z; + g3z = aj2 * (aj2 * hrr_1300z - 3 * hrr_1100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + g1x = al2 * hrr_1101x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + double hrr_1110x = trr_21x - xjxi * trr_11x; + g2x = ak2 * hrr_1110x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + double b01 = .5/akl * (1 - rt_akl); + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + g3x = ak2 * hrr_1111x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + double hrr_1120x = trr_22x - xjxi * trr_12x; + g3x = ak2 * (ak2 * hrr_1120x - 1 * hrr_1100x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + double hrr_1102x = hrr_2002x - xjxi * hrr_1002x; + g3x = al2 * (al2 * hrr_1102x - 1 * hrr_1100x); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0100x = trr_10x - xjxi * fac; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + double hrr_0001x = trr_01x - xlxk * fac; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + g1x = al2 * hrr_0101x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0001z; + double hrr_0110x = trr_11x - xjxi * trr_01x; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_01z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + g3x = ak2 * hrr_0111x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + double hrr_0120x = trr_12x - xjxi * trr_02x; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + double hrr_0102x = hrr_1002x - xjxi * hrr_0002x; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0101x; + g1y = al2 * hrr_0001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + g1z = al2 * hrr_1001z; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_11z; + g3x = ak2 * hrr_0111x; + g3y = ak2 * hrr_0011y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + g3z = ak2 * hrr_1011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0100y = trr_10y - yjyi * 1; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_1001x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_11x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_01z; + g3x = ak2 * hrr_1011x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + double hrr_0120y = trr_12y - yjyi * trr_02y; + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + double hrr_0102y = hrr_1002y - yjyi * hrr_0002y; + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + g1x = al2 * hrr_0001x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + g1y = al2 * hrr_1101y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_01x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + g2y = ak2 * hrr_1110y; + g2z = ak2 * trr_01z; + g3x = ak2 * hrr_0011x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + g3y = ak2 * hrr_1111y; + g3z = ak2 * hrr_0011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + double hrr_1120y = trr_22y - yjyi * trr_12y; + g3y = ak2 * (ak2 * hrr_1120y - 1 * hrr_1100y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; + double hrr_1102y = hrr_2002y - yjyi * hrr_1002y; + g3y = al2 * (al2 * hrr_1102y - 1 * hrr_1100y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_11z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_1011z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0100z = trr_10z - zjzi * wt; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0001y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_01y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + g2z = ak2 * hrr_0110z; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0011y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + g3z = ak2 * hrr_0111z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_0120z = trr_12z - zjzi * trr_02z; + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0102z = hrr_1002z - zjzi * hrr_0002z; + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_11y; + g2z = ak2 * hrr_0110z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0111z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + g1z = al2 * hrr_1101z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + g2z = ak2 * hrr_1110z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0011y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + g3z = ak2 * hrr_1111z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_1120z = trr_22z - zjzi * trr_12z; + g3z = ak2 * (ak2 * hrr_1120z - 1 * hrr_1100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; + double hrr_1102z = hrr_2002z - zjzi * hrr_1002z; + g3z = al2 * (al2 * hrr_1102z - 1 * hrr_1100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_1100(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type12_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double g1x, g1y, g1z; + double g2x, g2y, g2z; + double g3x, g3y, g3z; + double v_ixx = 0; + double v_ixy = 0; + double v_ixz = 0; + double v_iyy = 0; + double v_iyz = 0; + double v_izz = 0; + double v_jxx = 0; + double v_jxy = 0; + double v_jxz = 0; + double v_jyy = 0; + double v_jyz = 0; + double v_jzz = 0; + double v_kxx = 0; + double v_kxy = 0; + double v_kxz = 0; + double v_kyy = 0; + double v_kyz = 0; + double v_kzz = 0; + double v_lxx = 0; + double v_lxy = 0; + double v_lxz = 0; + double v_lyy = 0; + double v_lyz = 0; + double v_lzz = 0; + double v1xx = 0; + double v1xy = 0; + double v1xz = 0; + double v1yx = 0; + double v1yy = 0; + double v1yz = 0; + double v1zx = 0; + double v1zy = 0; + double v1zz = 0; + double v2xx = 0; + double v2xy = 0; + double v2xz = 0; + double v2yx = 0; + double v2yy = 0; + double v2yz = 0; + double v2zx = 0; + double v2zy = 0; + double v2zz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1110x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1110x * Iy; + prod_xz = hrr_1110x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + g1x = aj2 * hrr_1210x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + double hrr_0100y = trr_10y - yjyi * 1; + g1y = aj2 * hrr_0100y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + double hrr_0100z = trr_10z - zjzi * wt; + g1z = aj2 * hrr_0100z; + g1x -= 1 * trr_11x; + g2x = ai2 * hrr_2110x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_10z; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + g2x -= 1 * hrr_0110x; + double trr_40x = c0x * trr_30x + 3*b10 * trr_20x; + double trr_41x = cpx * trr_40x + 4*b00 * trr_30x; + double hrr_3110x = trr_41x - xjxi * trr_31x; + double hrr_2210x = hrr_3110x - xjxi * hrr_2110x; + g3x = ai2 * hrr_2210x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + g3y = ai2 * hrr_1100y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + g3z = ai2 * hrr_1100z; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + g3x -= 1 * hrr_0210x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_21x - 1 * trr_01x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3110x - 3 * hrr_1110x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1310x = hrr_2210x - xjxi * hrr_1210x; + g3x = aj2 * (aj2 * hrr_1310x - 3 * hrr_1110x); + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0210x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0100z; + g1x -= 1 * trr_01x; + g2x = ai2 * hrr_1110x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_10z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1210x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_11x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2110x - 1 * hrr_0110x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0310x = hrr_1210x - xjxi * hrr_0210x; + g3x = aj2 * (aj2 * hrr_0310x - 3 * hrr_0110x); + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0210x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1100z; + g1x -= 1 * trr_01x; + g2x = ai2 * hrr_1110x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1210x; + g3y = ai2 * hrr_1100y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_11x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2110x - 1 * hrr_0110x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0310x - 3 * hrr_0110x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_1110x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * 1; + g2x = ai2 * trr_21x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_10z; + g2x -= 1 * trr_01x; + g3x = ai2 * hrr_2110x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_1100z; + g3x -= 1 * hrr_0110x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_31x - 3 * trr_11x); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1210x - 1 * trr_11x); + double hrr_0300y = hrr_1200y - yjyi * hrr_0200y; + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1100y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_1200y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * trr_10y; + g2x = ai2 * trr_11x; + g2y = ai2 * hrr_2100y; + g2z = ai2 * trr_10z; + g2y -= 1 * hrr_0100y; + g3x = ai2 * hrr_1110x; + double trr_40y = c0y * trr_30y + 3*b10 * trr_20y; + double hrr_3100y = trr_40y - yjyi * trr_30y; + double hrr_2200y = hrr_3100y - yjyi * hrr_2100y; + g3y = ai2 * hrr_2200y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0200y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_20y - 1 * 1); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * hrr_3100y - 3 * hrr_1100y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + double hrr_1300y = hrr_2200y - yjyi * hrr_1200y; + g3y = aj2 * (aj2 * hrr_1300y - 3 * hrr_1100y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_1100z; + g1y -= 1 * 1; + g2x = ai2 * trr_11x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1110x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_21x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_1100z; + g2x -= 1 * trr_01x; + g3x = ai2 * hrr_2110x; + g3y = ai2 * hrr_1100y; + g3z = ai2 * hrr_1200z; + g3x -= 1 * hrr_0110x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_31x - 3 * trr_11x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1210x - 1 * trr_11x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0300z = hrr_1200z - zjzi * hrr_0200z; + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_20y; + g2z = ai2 * hrr_1100z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1200z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0110x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1200z; + g1z -= 1 * trr_10z; + g2x = ai2 * trr_11x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_2100z; + g2z -= 1 * hrr_0100z; + g3x = ai2 * hrr_1110x; + g3y = ai2 * hrr_1100y; + double trr_40z = c0z * trr_30z + 3*b10 * trr_20z; + double hrr_3100z = trr_40z - zjzi * trr_30z; + double hrr_2200z = hrr_3100z - zjzi * hrr_2100z; + g3z = ai2 * hrr_2200z; + g3z -= 1 * hrr_0200z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_20z - 1 * wt); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_21x - 1 * trr_01x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_3100z - 3 * hrr_1100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0210x - 1 * trr_01x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1300z = hrr_2200z - zjzi * hrr_1200z; + g3z = aj2 * (aj2 * hrr_1300z - 3 * hrr_1100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double hrr_1100x = trr_20x - xjxi * trr_10x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = trr_01y * Iz; + double hrr_2100x = trr_30x - xjxi * trr_20x; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + g1x = aj2 * hrr_1200x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_0110y = trr_11y - yjyi * trr_01y; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_0100z; + g1x -= 1 * trr_10x; + g2x = ai2 * hrr_2100x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_10z; + double hrr_0100x = trr_10x - xjxi * fac; + g2x -= 1 * hrr_0100x; + double hrr_3100x = trr_40x - xjxi * trr_30x; + double hrr_2200x = hrr_3100x - xjxi * hrr_2100x; + g3x = ai2 * hrr_2200x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_1110y = trr_21y - yjyi * trr_11y; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_1100z; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + g3x -= 1 * hrr_0200x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_20x - 1 * fac); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3100x - 3 * hrr_1100x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_1300x = hrr_2200x - xjxi * hrr_1200x; + g3x = aj2 * (aj2 * hrr_1300x - 3 * hrr_1100x); + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_11y * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_1110y; + g1z = aj2 * hrr_0100z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_21y; + g2z = ai2 * trr_10z; + g2y -= 1 * trr_01y; + g3x = ai2 * hrr_1200x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_2110y = trr_31y - yjyi * trr_21y; + g3y = ai2 * hrr_2110y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0110y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_31y - 3 * trr_11y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + double hrr_0300x = hrr_1200x - xjxi * hrr_0200x; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + g3y = aj2 * (aj2 * hrr_1210y - 1 * trr_11y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_1100z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_11y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1200x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0110y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0110y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0210y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * trr_01y; + g2x = ai2 * trr_20x; + g2y = ai2 * hrr_1110y; + g2z = ai2 * trr_10z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1210y; + g3z = ai2 * hrr_1100z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_11y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * hrr_2110y - 1 * hrr_0110y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + double hrr_0310y = hrr_1210y - yjyi * hrr_0210y; + g3y = aj2 * (aj2 * hrr_0310y - 3 * hrr_0110y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1110y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1110y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1210y; + g1z = aj2 * hrr_0100z; + g1y -= 1 * trr_11y; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_2110y; + g2z = ai2 * trr_10z; + g2y -= 1 * hrr_0110y; + g3x = ai2 * hrr_1100x; + double trr_41y = cpy * trr_40y + 4*b00 * trr_30y; + double hrr_3110y = trr_41y - yjyi * trr_31y; + double hrr_2210y = hrr_3110y - yjyi * hrr_2110y; + g3y = ai2 * hrr_2210y; + g3z = ai2 * hrr_1100z; + g3y -= 1 * hrr_0210y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_21y - 1 * trr_01y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_3110y - 3 * hrr_1110y); + g3z = ai2 * (ai2 * trr_20z - 1 * wt); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + double hrr_1310y = hrr_2210y - yjyi * hrr_1210y; + g3y = aj2 * (aj2 * hrr_1310y - 3 * hrr_1110y); + g3z = aj2 * (aj2 * hrr_0200z - 1 * wt); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0110y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0110y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0210y; + g1z = aj2 * hrr_1100z; + g1y -= 1 * trr_01y; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_1110y; + g2z = ai2 * trr_20z; + g2z -= 1 * wt; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1210y; + g3z = ai2 * hrr_2100z; + g3z -= 1 * hrr_0100z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_11y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_2110y - 1 * hrr_0110y); + g3z = ai2 * (ai2 * trr_30z - 3 * trr_10z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0310y - 3 * hrr_0110y); + g3z = aj2 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_11y; + g2z = ai2 * hrr_1100z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_1200z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1110y; + g1z = aj2 * hrr_0200z; + g1z -= 1 * wt; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_21y; + g2z = ai2 * hrr_1100z; + g2y -= 1 * trr_01y; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2110y; + g3z = ai2 * hrr_1200z; + g3y -= 1 * hrr_0110y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_10z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_31y - 3 * trr_11y); + g3z = ai2 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1210y - 1 * trr_11y); + g3z = aj2 * (aj2 * hrr_0300z - 3 * hrr_0100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0110y; + g1z = aj2 * hrr_1200z; + g1z -= 1 * trr_10z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_11y; + g2z = ai2 * hrr_2100z; + g2z -= 1 * hrr_0100z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1110y; + g3z = ai2 * hrr_2200z; + g3z -= 1 * hrr_0200z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_20z - 1 * wt); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_21y - 1 * trr_01y); + g3z = ai2 * (ai2 * hrr_3100z - 3 * hrr_1100z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0210y - 1 * trr_01y); + g3z = aj2 * (aj2 * hrr_1300z - 3 * hrr_1100z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1200x; + g1y = aj2 * hrr_0100y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_0110z = trr_11z - zjzi * trr_01z; + g1z = aj2 * hrr_0110z; + g1x -= 1 * trr_10x; + g2x = ai2 * hrr_2100x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_11z; + g2x -= 1 * hrr_0100x; + g3x = ai2 * hrr_2200x; + g3y = ai2 * hrr_1100y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_1110z = trr_21z - zjzi * trr_11z; + g3z = ai2 * hrr_1110z; + g3x -= 1 * hrr_0200x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_20x - 1 * fac); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_3100x - 3 * hrr_1100x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1300x - 3 * hrr_1100x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0110z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_20y; + g2z = ai2 * trr_11z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1200x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1110z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0200x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1110z; + g1x -= 1 * fac; + g2x = ai2 * hrr_1100x; + g2y = ai2 * trr_10y; + g2z = ai2 * trr_21z; + g2z -= 1 * trr_01z; + g3x = ai2 * hrr_1200x; + g3y = ai2 * hrr_1100y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_2110z = trr_31z - zjzi * trr_21z; + g3z = ai2 * hrr_2110z; + g3z -= 1 * hrr_0110z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3x -= 1 * (ai2 * trr_10x); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * hrr_2100x - 1 * hrr_0100x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * trr_31z - 3 * trr_11z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0300x - 3 * hrr_0100x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + g3z = aj2 * (aj2 * hrr_1210z - 1 * trr_11z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_0110z; + g1y -= 1 * 1; + g2x = ai2 * trr_20x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_11z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_1110z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1200y; + g1z = aj2 * hrr_0110z; + g1y -= 1 * trr_10y; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_2100y; + g2z = ai2 * trr_11z; + g2y -= 1 * hrr_0100y; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2200y; + g3z = ai2 * hrr_1110z; + g3y -= 1 * hrr_0200y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_20y - 1 * 1); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_3100y - 3 * hrr_1100y); + g3z = ai2 * (ai2 * trr_21z - 1 * trr_01z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1300y - 3 * hrr_1100y); + g3z = aj2 * (aj2 * hrr_0210z - 1 * trr_01z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0200y; + g1z = aj2 * hrr_1110z; + g1y -= 1 * 1; + g2x = ai2 * trr_10x; + g2y = ai2 * hrr_1100y; + g2z = ai2 * trr_21z; + g2z -= 1 * trr_01z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1200y; + g3z = ai2 * hrr_2110z; + g3z -= 1 * hrr_0110z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3y -= 1 * (ai2 * trr_10y); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * hrr_2100y - 1 * hrr_0100y); + g3z = ai2 * (ai2 * trr_31z - 3 * trr_11z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0300y - 3 * hrr_0100y); + g3z = aj2 * (aj2 * hrr_1210z - 1 * trr_11z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0110z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_1100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_0210z; + g1z -= 1 * trr_01z; + g2x = ai2 * trr_20x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_1110z; + g2x -= 1 * fac; + g3x = ai2 * hrr_2100x; + g3y = ai2 * hrr_1100y; + g3z = ai2 * hrr_1210z; + g3x -= 1 * hrr_0100x; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_11z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_30x - 3 * trr_10x); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_2110z - 1 * hrr_0110z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_1200x - 1 * trr_10x); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_0310z = hrr_1210z - zjzi * hrr_0210z; + g3z = aj2 * (aj2 * hrr_0310z - 3 * hrr_0110z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_1100y; + g1z = aj2 * hrr_0210z; + g1z -= 1 * trr_01z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_20y; + g2z = ai2 * hrr_1110z; + g2y -= 1 * 1; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_2100y; + g3z = ai2 * hrr_1210z; + g3y -= 1 * hrr_0100y; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_11z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_30y - 3 * trr_10y); + g3z = ai2 * (ai2 * hrr_2110z - 1 * hrr_0110z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_1200y - 1 * trr_10y); + g3z = aj2 * (aj2 * hrr_0310z - 3 * hrr_0110z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = aj2 * hrr_0100x; + g1y = aj2 * hrr_0100y; + g1z = aj2 * hrr_1210z; + g1z -= 1 * trr_11z; + g2x = ai2 * trr_10x; + g2y = ai2 * trr_10y; + g2z = ai2 * hrr_2110z; + g2z -= 1 * hrr_0110z; + g3x = ai2 * hrr_1100x; + g3y = ai2 * hrr_1100y; + double trr_41z = cpz * trr_40z + 4*b00 * trr_30z; + double hrr_3110z = trr_41z - zjzi * trr_31z; + double hrr_2210z = hrr_3110z - zjzi * hrr_2110z; + g3z = ai2 * hrr_2210z; + g3z -= 1 * hrr_0210z; + g3x *= aj2; + g3y *= aj2; + g3z *= aj2; + g3z -= 1 * (ai2 * trr_21z - 1 * trr_01z); + v1xx += g3x * prod_yz; + v1yy += g3y * prod_xz; + v1zz += g3z * prod_xy; + v1xy += g2x * g1y * Iz; + v1xz += g2x * g1z * Iy; + v1yx += g2y * g1x * Iz; + v1yz += g2y * g1z * Ix; + v1zx += g2z * g1x * Iy; + v1zy += g2z * g1y * Ix; + g3x = ai2 * (ai2 * trr_20x - 1 * fac); + g3y = ai2 * (ai2 * trr_20y - 1 * 1); + g3z = ai2 * (ai2 * hrr_3110z - 3 * hrr_1110z); + v_ixx += g3x * prod_yz; + v_iyy += g3y * prod_xz; + v_izz += g3z * prod_xy; + v_ixy += g2x * g2y * Iz; + v_ixz += g2x * g2z * Iy; + v_iyz += g2y * g2z * Ix; + g3x = aj2 * (aj2 * hrr_0200x - 1 * fac); + g3y = aj2 * (aj2 * hrr_0200y - 1 * 1); + double hrr_1310z = hrr_2210z - zjzi * hrr_1210z; + g3z = aj2 * (aj2 * hrr_1310z - 3 * hrr_1110z); + v_jxx += g3x * prod_yz; + v_jyy += g3y * prod_xz; + v_jzz += g3z * prod_xy; + v_jxy += g1x * g1y * Iz; + v_jxz += g1x * g1z * Iy; + v_jyz += g1y * g1z * Ix; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1110x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1110x * Iy; + prod_xz = hrr_1110x * Iz; + prod_yz = 1 * Iz; + double b01 = .5/akl * (1 - rt_akl); + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + g1x = al2 * hrr_1111x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + g1y = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + g1z = al2 * hrr_0001z; + double hrr_1120x = trr_22x - xjxi * trr_12x; + g2x = ak2 * hrr_1120x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_01z; + double hrr_1100x = trr_20x - xjxi * trr_10x; + g2x -= 1 * hrr_1100x; + double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; + double hrr_2021x = trr_23x - xlxk * trr_22x; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; + double hrr_1021x = trr_13x - xlxk * trr_12x; + double hrr_1121x = hrr_2021x - xjxi * hrr_1021x; + g3x = ak2 * hrr_1121x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + g3y = ak2 * hrr_0011y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + g3z = ak2 * hrr_0011z; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + g3x -= 1 * hrr_1101x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + double hrr_1130x = trr_23x - xjxi * trr_13x; + g3x = ak2 * (ak2 * hrr_1130x - 3 * hrr_1110x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_2012x = hrr_2021x - xlxk * hrr_2011x; + double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; + double hrr_1112x = hrr_2012x - xjxi * hrr_1012x; + g3x = al2 * (al2 * hrr_1112x - 1 * hrr_1110x); + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0110x = trr_11x - xjxi * trr_01x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = trr_10y * Iz; + double hrr_0011x = trr_02x - xlxk * trr_01x; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + g1x = al2 * hrr_0111x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0001z; + double hrr_0120x = trr_12x - xjxi * trr_02x; + g2x = ak2 * hrr_0120x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_01z; + double hrr_0100x = trr_10x - xjxi * fac; + g2x -= 1 * hrr_0100x; + double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; + double hrr_0021x = trr_03x - xlxk * trr_02x; + double hrr_0121x = hrr_1021x - xjxi * hrr_0021x; + g3x = ak2 * hrr_0121x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0011z; + double hrr_0001x = trr_01x - xlxk * fac; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + g3x -= 1 * hrr_0101x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + double hrr_0130x = trr_13x - xjxi * trr_03x; + g3x = ak2 * (ak2 * hrr_0130x - 3 * hrr_0110x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; + double hrr_0112x = hrr_1012x - xjxi * hrr_0012x; + g3x = al2 * (al2 * hrr_0112x - 1 * hrr_0110x); + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0111x; + g1y = al2 * hrr_0001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + g1z = al2 * hrr_1001z; + g2x = ak2 * hrr_0120x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_11z; + g2x -= 1 * hrr_0100x; + g3x = ak2 * hrr_0121x; + g3y = ak2 * hrr_0011y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + g3z = ak2 * hrr_1011z; + g3x -= 1 * hrr_0101x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0130x - 3 * hrr_0110x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0112x - 1 * hrr_0110x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0100y = trr_10y - yjyi * 1; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_1011x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_12x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_01z; + g2x -= 1 * trr_10x; + g3x = ak2 * hrr_1021x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_0011z; + g3x -= 1 * hrr_1001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_13x - 3 * trr_11x); + double hrr_0120y = trr_12y - yjyi * trr_02y; + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1012x - 1 * trr_11x); + double hrr_0102y = hrr_1002y - yjyi * hrr_0002y; + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + double hrr_1100y = trr_20y - yjyi * trr_10y; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1100y * Iz; + g1x = al2 * hrr_0011x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + g1y = al2 * hrr_1101y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_02x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + g2y = ak2 * hrr_1110y; + g2z = ak2 * trr_01z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + g3y = ak2 * hrr_1111y; + g3z = ak2 * hrr_0011z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + double hrr_1120y = trr_22y - yjyi * trr_12y; + g3y = ak2 * (ak2 * hrr_1120y - 1 * hrr_1100y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; + double hrr_1102y = hrr_2002y - yjyi * hrr_1002y; + g3y = al2 * (al2 * hrr_1102y - 1 * hrr_1100y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_02x; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_11z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_1011z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double hrr_0100z = trr_10z - zjzi * wt; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1011x; + g1y = al2 * hrr_0001y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_12x; + g2y = ak2 * trr_01y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + g2z = ak2 * hrr_0110z; + g2x -= 1 * trr_10x; + g3x = ak2 * hrr_1021x; + g3y = ak2 * hrr_0011y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + g3z = ak2 * hrr_0111z; + g3x -= 1 * hrr_1001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_13x - 3 * trr_11x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_0120z = trr_12z - zjzi * trr_02z; + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1012x - 1 * trr_11x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0102z = hrr_1002z - zjzi * hrr_0002z; + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_11y; + g2z = ak2 * hrr_0110z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0111z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + double hrr_1100z = trr_20z - zjzi * trr_10z; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0011x; + g1y = al2 * hrr_0001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + g1z = al2 * hrr_1101z; + g2x = ak2 * trr_02x; + g2y = ak2 * trr_01y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + g2z = ak2 * hrr_1110z; + g2x -= 1 * fac; + g3x = ak2 * hrr_0021x; + g3y = ak2 * hrr_0011y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + g3z = ak2 * hrr_1111z; + g3x -= 1 * hrr_0001x; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_03x - 3 * trr_01x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_1120z = trr_22z - zjzi * trr_12z; + g3z = ak2 * (ak2 * hrr_1120z - 1 * hrr_1100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0012x - 1 * trr_01x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; + double hrr_1102z = hrr_2002z - zjzi * hrr_1002z; + g3z = al2 * (al2 * hrr_1102z - 1 * hrr_1100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_1101x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_0001z; + g2x = ak2 * hrr_1110x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_01z; + g2y -= 1 * 1; + g3x = ak2 * hrr_1111x; + double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; + double hrr_0021y = trr_03y - ylyk * trr_02y; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_1120x - 1 * hrr_1100x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + double hrr_1102x = hrr_2002x - xjxi * hrr_1002x; + g3x = al2 * (al2 * hrr_1102x - 1 * hrr_1100x); + double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_11y * Iz; + g1x = al2 * hrr_0101x; + g1y = al2 * hrr_1011y; + g1z = al2 * hrr_0001z; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_12y; + g2z = ak2 * trr_01z; + g2y -= 1 * trr_10y; + g3x = ak2 * hrr_0111x; + double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; + double hrr_1021y = trr_13y - ylyk * trr_12y; + g3y = ak2 * hrr_1021y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_1001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_13y - 3 * trr_11y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + double hrr_0102x = hrr_1002x - xjxi * hrr_0002x; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; + g3y = al2 * (al2 * hrr_1012y - 1 * trr_11y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_0101x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_1001z; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_02y; + g2z = ak2 * trr_11z; + g2y -= 1 * 1; + g3x = ak2 * hrr_0111x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_1011z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0110y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0110y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0111y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_11x; + g2y = ak2 * hrr_0120y; + g2z = ak2 * trr_01z; + g2y -= 1 * hrr_0100y; + g3x = ak2 * hrr_1011x; + double hrr_0121y = hrr_1021y - yjyi * hrr_0021y; + g3y = ak2 * hrr_0121y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_0101y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + double hrr_0130y = trr_13y - yjyi * trr_03y; + g3y = ak2 * (ak2 * hrr_0130y - 3 * hrr_0110y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + double hrr_0112y = hrr_1012y - yjyi * hrr_0012y; + g3y = al2 * (al2 * hrr_0112y - 1 * hrr_0110y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1110y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1110y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1111y; + g1z = al2 * hrr_0001z; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_1120y; + g2z = ak2 * trr_01z; + g2y -= 1 * hrr_1100y; + g3x = ak2 * hrr_0011x; + double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; + double hrr_2021y = trr_23y - ylyk * trr_22y; + double hrr_1121y = hrr_2021y - yjyi * hrr_1021y; + g3y = ak2 * hrr_1121y; + g3z = ak2 * hrr_0011z; + g3y -= 1 * hrr_1101y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + double hrr_1130y = trr_23y - yjyi * trr_13y; + g3y = ak2 * (ak2 * hrr_1130y - 3 * hrr_1110y); + g3z = ak2 * (ak2 * trr_02z - 1 * wt); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + double hrr_2012y = hrr_2021y - ylyk * hrr_2011y; + double hrr_1112y = hrr_2012y - yjyi * hrr_1012y; + g3y = al2 * (al2 * hrr_1112y - 1 * hrr_1110y); + g3z = al2 * (al2 * hrr_0002z - 1 * wt); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0110y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0110y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0111y; + g1z = al2 * hrr_1001z; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_0120y; + g2z = ak2 * trr_11z; + g2y -= 1 * hrr_0100y; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0121y; + g3z = ak2 * hrr_1011z; + g3y -= 1 * hrr_0101y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_0130y - 3 * hrr_0110y); + g3z = ak2 * (ak2 * trr_12z - 1 * trr_10z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0112y - 1 * hrr_0110y); + g3z = al2 * (al2 * hrr_1002z - 1 * trr_10z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_02y; + g2z = ak2 * hrr_0110z; + g2y -= 1 * 1; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_0111z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1011y; + g1z = al2 * hrr_0101z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_12y; + g2z = ak2 * hrr_0110z; + g2y -= 1 * trr_10y; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1021y; + g3z = ak2 * hrr_0111z; + g3y -= 1 * hrr_1001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_13y - 3 * trr_11y); + g3z = ak2 * (ak2 * hrr_0120z - 1 * hrr_0100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1012y - 1 * trr_11y); + g3z = al2 * (al2 * hrr_0102z - 1 * hrr_0100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0011y; + g1z = al2 * hrr_1101z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_02y; + g2z = ak2 * hrr_1110z; + g2y -= 1 * 1; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0021y; + g3z = ak2 * hrr_1111z; + g3y -= 1 * hrr_0001y; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_03y - 3 * trr_01y); + g3z = ak2 * (ak2 * hrr_1120z - 1 * hrr_1100z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0012y - 1 * trr_01y); + g3z = al2 * (al2 * hrr_1102z - 1 * hrr_1100z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1101x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0011z; + g2x = ak2 * hrr_1110x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_1111x; + g3y = ak2 * hrr_0011y; + double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; + double hrr_0021z = trr_03z - zlzk * trr_02z; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_1120x - 1 * hrr_1100x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1102x - 1 * hrr_1100x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0101x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0011z; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_11y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_0111x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0101x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1011z; + g2x = ak2 * hrr_0110x; + g2y = ak2 * trr_01y; + g2z = ak2 * trr_12z; + g2z -= 1 * trr_10z; + g3x = ak2 * hrr_0111x; + g3y = ak2 * hrr_0011y; + double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; + double hrr_1021z = trr_13z - zlzk * trr_12z; + g3z = ak2 * hrr_1021z; + g3z -= 1 * hrr_1001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * hrr_0120x - 1 * hrr_0100x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + g3z = ak2 * (ak2 * trr_13z - 3 * trr_11z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0102x - 1 * hrr_0100x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; + g3z = al2 * (al2 * hrr_1012z - 1 * trr_11z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_0011z; + g2x = ak2 * trr_11x; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1101y; + g1z = al2 * hrr_0011z; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_1110y; + g2z = ak2 * trr_02z; + g2z -= 1 * wt; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1111y; + g3z = ak2 * hrr_0021z; + g3z -= 1 * hrr_0001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_1120y - 1 * hrr_1100y); + g3z = ak2 * (ak2 * trr_03z - 3 * trr_01z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1102y - 1 * hrr_1100y); + g3z = al2 * (al2 * hrr_0012z - 1 * trr_01z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0101y; + g1z = al2 * hrr_1011z; + g2x = ak2 * trr_01x; + g2y = ak2 * hrr_0110y; + g2z = ak2 * trr_12z; + g2z -= 1 * trr_10z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0111y; + g3z = ak2 * hrr_1021z; + g3z -= 1 * hrr_1001z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * hrr_0120y - 1 * hrr_0100y); + g3z = ak2 * (ak2 * trr_13z - 3 * trr_11z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0102y - 1 * hrr_0100y); + g3z = al2 * (al2 * hrr_1012z - 1 * trr_11z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0110z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_1001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_0111z; + g2x = ak2 * trr_11x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_0120z; + g2z -= 1 * hrr_0100z; + g3x = ak2 * hrr_1011x; + g3y = ak2 * hrr_0011y; + double hrr_0121z = hrr_1021z - zjzi * hrr_0021z; + g3z = ak2 * hrr_0121z; + g3z -= 1 * hrr_0101z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_12x - 1 * trr_10x); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_0130z = trr_13z - zjzi * trr_03z; + g3z = ak2 * (ak2 * hrr_0130z - 3 * hrr_0110z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_1002x - 1 * trr_10x); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_0112z = hrr_1012z - zjzi * hrr_0012z; + g3z = al2 * (al2 * hrr_0112z - 1 * hrr_0110z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_1001y; + g1z = al2 * hrr_0111z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_11y; + g2z = ak2 * hrr_0120z; + g2z -= 1 * hrr_0100z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_1011y; + g3z = ak2 * hrr_0121z; + g3z -= 1 * hrr_0101z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_12y - 1 * trr_10y); + g3z = ak2 * (ak2 * hrr_0130z - 3 * hrr_0110z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_1002y - 1 * trr_10y); + g3z = al2 * (al2 * hrr_0112z - 1 * hrr_0110z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + g1x = al2 * hrr_0001x; + g1y = al2 * hrr_0001y; + g1z = al2 * hrr_1111z; + g2x = ak2 * trr_01x; + g2y = ak2 * trr_01y; + g2z = ak2 * hrr_1120z; + g2z -= 1 * hrr_1100z; + g3x = ak2 * hrr_0011x; + g3y = ak2 * hrr_0011y; + double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; + double hrr_2021z = trr_23z - zlzk * trr_22z; + double hrr_1121z = hrr_2021z - zjzi * hrr_1021z; + g3z = ak2 * hrr_1121z; + g3z -= 1 * hrr_1101z; + g3x *= al2; + g3y *= al2; + g3z *= al2; + v2xx += g3x * prod_yz; + v2yy += g3y * prod_xz; + v2zz += g3z * prod_xy; + v2xy += g2x * g1y * Iz; + v2xz += g2x * g1z * Iy; + v2yx += g2y * g1x * Iz; + v2yz += g2y * g1z * Ix; + v2zx += g2z * g1x * Iy; + v2zy += g2z * g1y * Ix; + g3x = ak2 * (ak2 * trr_02x - 1 * fac); + g3y = ak2 * (ak2 * trr_02y - 1 * 1); + double hrr_1130z = trr_23z - zjzi * trr_13z; + g3z = ak2 * (ak2 * hrr_1130z - 3 * hrr_1110z); + v_kxx += g3x * prod_yz; + v_kyy += g3y * prod_xz; + v_kzz += g3z * prod_xy; + v_kxy += g2x * g2y * Iz; + v_kxz += g2x * g2z * Iy; + v_kyz += g2y * g2z * Ix; + g3x = al2 * (al2 * hrr_0002x - 1 * fac); + g3y = al2 * (al2 * hrr_0002y - 1 * 1); + double hrr_2012z = hrr_2021z - zlzk * hrr_2011z; + double hrr_1112z = hrr_2012z - zjzi * hrr_1012z; + g3z = al2 * (al2 * hrr_1112z - 1 * hrr_1110z); + v_lxx += g3x * prod_yz; + v_lyy += g3y * prod_xz; + v_lzz += g3z * prod_xy; + v_lxy += g1x * g1y * Iz; + v_lxz += g1x * g1z * Iy; + v_lyz += g1y * g1z * Ix; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ja)*9 + 0, v1xx); + atomicAdd(ejk + (ia*natm+ja)*9 + 1, v1xy); + atomicAdd(ejk + (ia*natm+ja)*9 + 2, v1xz); + atomicAdd(ejk + (ia*natm+ja)*9 + 3, v1yx); + atomicAdd(ejk + (ia*natm+ja)*9 + 4, v1yy); + atomicAdd(ejk + (ia*natm+ja)*9 + 5, v1yz); + atomicAdd(ejk + (ia*natm+ja)*9 + 6, v1zx); + atomicAdd(ejk + (ia*natm+ja)*9 + 7, v1zy); + atomicAdd(ejk + (ia*natm+ja)*9 + 8, v1zz); + atomicAdd(ejk + (ka*natm+la)*9 + 0, v2xx); + atomicAdd(ejk + (ka*natm+la)*9 + 1, v2xy); + atomicAdd(ejk + (ka*natm+la)*9 + 2, v2xz); + atomicAdd(ejk + (ka*natm+la)*9 + 3, v2yx); + atomicAdd(ejk + (ka*natm+la)*9 + 4, v2yy); + atomicAdd(ejk + (ka*natm+la)*9 + 5, v2yz); + atomicAdd(ejk + (ka*natm+la)*9 + 6, v2zx); + atomicAdd(ejk + (ka*natm+la)*9 + 7, v2zy); + atomicAdd(ejk + (ka*natm+la)*9 + 8, v2zz); + atomicAdd(ejk + (ia*natm+ia)*9 + 0, v_ixx*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 3, v_ixy); + atomicAdd(ejk + (ia*natm+ia)*9 + 4, v_iyy*.5); + atomicAdd(ejk + (ia*natm+ia)*9 + 6, v_ixz); + atomicAdd(ejk + (ia*natm+ia)*9 + 7, v_iyz); + atomicAdd(ejk + (ia*natm+ia)*9 + 8, v_izz*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 0, v_jxx*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 3, v_jxy); + atomicAdd(ejk + (ja*natm+ja)*9 + 4, v_jyy*.5); + atomicAdd(ejk + (ja*natm+ja)*9 + 6, v_jxz); + atomicAdd(ejk + (ja*natm+ja)*9 + 7, v_jyz); + atomicAdd(ejk + (ja*natm+ja)*9 + 8, v_jzz*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 0, v_kxx*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 3, v_kxy); + atomicAdd(ejk + (ka*natm+ka)*9 + 4, v_kyy*.5); + atomicAdd(ejk + (ka*natm+ka)*9 + 6, v_kxz); + atomicAdd(ejk + (ka*natm+ka)*9 + 7, v_kyz); + atomicAdd(ejk + (ka*natm+ka)*9 + 8, v_kzz*.5); + atomicAdd(ejk + (la*natm+la)*9 + 0, v_lxx*.5); + atomicAdd(ejk + (la*natm+la)*9 + 3, v_lxy); + atomicAdd(ejk + (la*natm+la)*9 + 4, v_lyy*.5); + atomicAdd(ejk + (la*natm+la)*9 + 6, v_lxz); + atomicAdd(ejk + (la*natm+la)*9 + 7, v_lyz); + atomicAdd(ejk + (la*natm+la)*9 + 8, v_lzz*.5); + } +} +__global__ +void rys_ejk_ip2_type12_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type12_1110(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +int rys_ejk_ip2_type12_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, + ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers) +{ + int li = bounds->li; + int lj = bounds->lj; + int lk = bounds->lk; + int ll = bounds->ll; + int threads = scheme[0] * scheme[1]; + int nroots = bounds->nroots; + int iprim = bounds->iprim; + int jprim = bounds->jprim; + int ij_prims = iprim * jprim; + int buflen = nroots*2 * threads + ij_prims*TILE2*4; + int ijkl = li*125 + lj*25 + lk*5 + ll; + switch (ijkl) { + case 0: rys_ejk_ip2_type12_0000<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 125: rys_ejk_ip2_type12_1000<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 130: rys_ejk_ip2_type12_1010<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 131: rys_ejk_ip2_type12_1011<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 150: rys_ejk_ip2_type12_1100<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 155: rys_ejk_ip2_type12_1110<<>>(*envs, *jk, *bounds, pool, batch_head); break; + default: return 0; + } + return 1; +} diff --git a/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type3.cu b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type3.cu new file mode 100644 index 00000000..0f31115e --- /dev/null +++ b/gpu4pyscf/lib/gvhf-rys/unrolled_ejk_ip2_type3.cu @@ -0,0 +1,13692 @@ +#include "vhf.cuh" +#include "rys_roots_unrolled.cu" +#include "create_tasks_ip1.cu" +int rys_ejk_ip2_type3_unrolled_lmax = 1; +int rys_ejk_ip2_type3_unrolled_max_order = 3; + + +__device__ static +void _rys_ejk_ip2_type3_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(2, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + gix = ai2 * trr_10x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double trr_01x = cpx * fac; + gkx = ak2 * trr_01x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + gikx = ai2 * trr_11x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0100x = trr_10x - xjxi * fac; + gjx = aj2 * hrr_0100x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjkx = aj2 * hrr_0110x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * trr_10z; + double hrr_0001x = trr_01x - xlxk * fac; + glx = al2 * hrr_0001x; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double hrr_1001x = trr_11x - xlxk * trr_10x; + gilx = ai2 * hrr_1001x; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gjlx = aj2 * hrr_0101x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_0000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_0000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type3_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(2, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(2, theta_rr, rw+4*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(2, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 2; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + gix = ai2 * trr_20x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + gix -= 1 * fac; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + gkx = ak2 * trr_11x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + gikx = ai2 * trr_21x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + double trr_01x = cpx * fac; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gjx = aj2 * hrr_1100x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gjkx = aj2 * hrr_1110x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + double hrr_1001x = trr_11x - xlxk * trr_10x; + glx = al2 * hrr_1001x; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double hrr_2001x = trr_21x - xlxk * trr_20x; + gilx = ai2 * hrr_2001x; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + double hrr_0001x = trr_01x - xlxk * fac; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + gjlx = aj2 * hrr_1101x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + giky = ai2 * trr_21y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0100x = trr_10x - xjxi * fac; + gjx = aj2 * hrr_0100x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjkx = aj2 * hrr_0110x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + double hrr_2001y = trr_21y - ylyk * trr_20y; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gjlx = aj2 * hrr_0101x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_11y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0110y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1001y; + double hrr_2001z = trr_21z - zlzk * trr_20z; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0101y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_1000(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_1000(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type3_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + gix = ai2 * trr_21x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_01x = cpx * fac; + gix -= 1 * trr_01x; + double b01 = .5/akl * (1 - rt_akl); + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + gkx = ak2 * trr_12x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + gikx = ai2 * trr_22x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + gikx -= 1 * trr_02x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gjx = aj2 * hrr_1110x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_12x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1120x = trr_22x - xjxi * trr_12x; + gjkx = aj2 * hrr_1120x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gjkx -= 1 * (aj2 * hrr_1100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + giky = ai2 * trr_21y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjx = aj2 * hrr_0110x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0120x = trr_12x - xjxi * trr_02x; + gjkx = aj2 * hrr_0120x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_0100x = trr_10x - xjxi * fac; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * trr_11y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_0110y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + giky = ai2 * trr_12y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + double hrr_0120y = trr_12y - yjyi * trr_02y; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + giky = ai2 * trr_22y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_02y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + double hrr_1120y = trr_22y - yjyi * trr_12y; + gjky = aj2 * hrr_1120y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_12y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_11y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + gikz = ai2 * trr_12z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0110y; + double hrr_0120z = trr_12z - zjzi * trr_02z; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_21y; + gikz = ai2 * trr_12z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_11y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + gikz = ai2 * trr_22z; + gikz -= 1 * trr_02z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1110z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0110y; + double hrr_1120z = trr_22z - zjzi * trr_12z; + gjkz = aj2 * hrr_1120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + gix = ai2 * trr_21x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_01x = cpx * fac; + gix -= 1 * trr_01x; + double b01 = .5/akl * (1 - rt_akl); + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + glx = al2 * hrr_1011x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + gilx = ai2 * hrr_2011x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + gilx -= 1 * hrr_0011x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gjx = aj2 * hrr_1110x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + gjlx = aj2 * hrr_1111x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjx = aj2 * hrr_0110x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + gjlx = aj2 * hrr_0111x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_1001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_0101y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + double hrr_1001x = trr_11x - xlxk * trr_10x; + glx = al2 * hrr_1001x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double hrr_2001x = trr_21x - xlxk * trr_20x; + gilx = ai2 * hrr_2001x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_1001z; + double hrr_0001x = trr_01x - xlxk * fac; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gjx = aj2 * hrr_1100x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + gjlx = aj2 * hrr_1101x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + gily = ai2 * hrr_2011y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0011y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0100x = trr_10x - xjxi * fac; + gjx = aj2 * hrr_0100x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gjlx = aj2 * hrr_0101x; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + gjly = aj2 * hrr_1111y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1001y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + gilz = ai2 * hrr_1011z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0101y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1011z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1001y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + gilz = ai2 * hrr_2011z; + gilz -= 1 * hrr_0011z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjz = aj2 * hrr_1110z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0101y; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + gjlz = aj2 * hrr_1111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_1010(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_1010(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type3_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1011x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1011x * Iy; + prod_xz = hrr_1011x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + gix = ai2 * hrr_2011x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + gix -= 1 * hrr_0011x; + double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; + double hrr_1021x = trr_13x - xlxk * trr_12x; + gkx = ak2 * hrr_1021x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + double hrr_1001x = trr_11x - xlxk * trr_10x; + gkx -= 1 * hrr_1001x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; + double hrr_2021x = trr_23x - xlxk * trr_22x; + gikx = ai2 * hrr_2021x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; + double hrr_0021x = trr_03x - xlxk * trr_02x; + gikx -= 1 * hrr_0021x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double hrr_0001x = trr_01x - xlxk * fac; + gikx -= 1 * (ai2 * hrr_2001x - 1 * hrr_0001x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + gjx = aj2 * hrr_1111x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gkx = ak2 * hrr_1021x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_1001x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1121x = hrr_2021x - xjxi * hrr_1021x; + gjkx = aj2 * hrr_1121x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + gjkx -= 1 * (aj2 * hrr_1101x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1011x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + gkx = ak2 * hrr_0021x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_0001x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1021x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + giky = ai2 * trr_21y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * hrr_1001x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + gjx = aj2 * hrr_0111x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * hrr_0021x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_0001x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0121x = hrr_1021x - xjxi * hrr_0021x; + gjkx = aj2 * hrr_0121x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gjkx -= 1 * (aj2 * hrr_0101x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1011x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * hrr_0021x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * hrr_0001x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1021x; + giky = ai2 * trr_11y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * hrr_1001x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0111x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gkx = ak2 * hrr_0021x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * hrr_0001x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0121x; + gjky = aj2 * hrr_0110y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0101x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_2001x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + gix -= 1 * hrr_0001x; + gkx = ak2 * hrr_1011x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_2011x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + giky = ai2 * trr_12y; + gikz = ai2 * trr_11z; + gikx -= 1 * hrr_0011x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1101x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * hrr_1011x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1111x; + double hrr_0120y = trr_12y - yjyi * trr_02y; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1011x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + giky = ai2 * trr_22y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_02y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0111x; + double hrr_1120y = trr_22y - yjyi * trr_12y; + gjky = aj2 * hrr_1120y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1011x; + giky = ai2 * trr_12y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0111x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_2001x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * hrr_0001x; + gkx = ak2 * hrr_1011x; + gky = ak2 * trr_01y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_2011x; + giky = ai2 * trr_11y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + gikz = ai2 * trr_12z; + gikx -= 1 * hrr_0011x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1101x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * hrr_1011x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1111x; + gjky = aj2 * hrr_0110y; + double hrr_0120z = trr_12z - zjzi * trr_02z; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1011x; + giky = ai2 * trr_21y; + gikz = ai2 * trr_12z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0111x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1011x; + giky = ai2 * trr_11y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + gikz = ai2 * trr_22z; + gikz -= 1 * trr_02z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1110z; + gkx = ak2 * hrr_0011x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0111x; + gjky = aj2 * hrr_0110y; + double hrr_1120z = trr_22z - zjzi * trr_12z; + gjkz = aj2 * hrr_1120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + double hrr_0001y = trr_01y - ylyk * 1; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0001y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_21x; + double hrr_1001y = trr_11y - ylyk * trr_10y; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_10z; + gix -= 1 * trr_01x; + gkx = ak2 * trr_12x; + double hrr_0011y = trr_02y - ylyk * trr_01y; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_22x; + double hrr_1011y = trr_12y - ylyk * trr_11y; + giky = ai2 * hrr_1011y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_02x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gjx = aj2 * hrr_1110x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_12x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1120x = trr_22x - xjxi * trr_12x; + gjkx = aj2 * hrr_1120x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + gjky = aj2 * hrr_0111y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gjkx -= 1 * (aj2 * hrr_1100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1001y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1001y * Iz; + gix = ai2 * trr_11x; + double hrr_2001y = trr_21y - ylyk * trr_20y; + giy = ai2 * hrr_2001y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0001y; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_1011y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + double hrr_2011y = trr_22y - ylyk * trr_21y; + giky = ai2 * hrr_2011y; + gikz = ai2 * trr_11z; + giky -= 1 * hrr_0011y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjx = aj2 * hrr_0110x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjy = aj2 * hrr_1101y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_1011y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0120x = trr_12x - xjxi * trr_02x; + gjkx = aj2 * hrr_0120x; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + gjky = aj2 * hrr_1111y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_0100x = trr_10x - xjxi * fac; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0001y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * hrr_1011y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_1100z; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_0111y; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0011y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0011y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1011y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; + double hrr_0021y = trr_03y - ylyk * trr_02y; + gky = ak2 * hrr_0021y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_0001y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; + double hrr_1021y = trr_13y - ylyk * trr_12y; + giky = ai2 * hrr_1021y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_1001y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0111y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0021y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_0001y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + double hrr_0121y = hrr_1021y - yjyi * hrr_0021y; + gjky = aj2 * hrr_0121y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0101y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1011y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1011y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2011y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0011y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1021y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_1001y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; + double hrr_2021y = trr_23y - ylyk * trr_22y; + giky = ai2 * hrr_2021y; + gikz = ai2 * trr_11z; + giky -= 1 * hrr_0021y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_2001y - 1 * hrr_0001y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1111y; + gjz = aj2 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1021y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_1001y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + double hrr_1121y = hrr_2021y - yjyi * hrr_1021y; + gjky = aj2 * hrr_1121y; + gjkz = aj2 * hrr_0110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1101y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0011y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0011y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1011y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0021y; + gkz = ak2 * trr_11z; + gky -= 1 * hrr_0001y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_1021y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_1001y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0111y; + gjz = aj2 * hrr_1100z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0021y; + gkz = ak2 * trr_11z; + gky -= 1 * hrr_0001y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0121y; + gjkz = aj2 * hrr_1110z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0101y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0001y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * hrr_1011y; + gikz = ai2 * trr_12z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0111y; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1001y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1001y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2001y; + giz = ai2 * trr_11z; + giy -= 1 * hrr_0001y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1011y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_2011y; + gikz = ai2 * trr_12z; + giky -= 1 * hrr_0011y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1101y; + gjz = aj2 * hrr_0110z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1011y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1111y; + gjkz = aj2 * hrr_0120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0001y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_1011y; + gikz = ai2 * trr_22z; + gikz -= 1 * trr_02z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_1110z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0011y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0111y; + gjkz = aj2 * hrr_1120z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + double hrr_0001z = trr_01z - zlzk * wt; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_21x; + giy = ai2 * trr_10y; + double hrr_1001z = trr_11z - zlzk * trr_10z; + giz = ai2 * hrr_1001z; + gix -= 1 * trr_01x; + gkx = ak2 * trr_12x; + gky = ak2 * trr_01y; + double hrr_0011z = trr_02z - zlzk * trr_01z; + gkz = ak2 * hrr_0011z; + gkx -= 1 * trr_10x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_22x; + giky = ai2 * trr_11y; + double hrr_1011z = trr_12z - zlzk * trr_11z; + gikz = ai2 * hrr_1011z; + gikx -= 1 * trr_02x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1110x; + gjy = aj2 * hrr_0100y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjz = aj2 * hrr_0101z; + gkx = ak2 * trr_12x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0011z; + gkx -= 1 * trr_10x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1120x; + gjky = aj2 * hrr_0110y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + gjkz = aj2 * hrr_0111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_1100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1001z; + giy -= 1 * 1; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0011z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * trr_21y; + gikz = ai2 * hrr_1011z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0101z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0011z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + double hrr_2001z = trr_21z - zlzk * trr_20z; + giz = ai2 * hrr_2001z; + giz -= 1 * hrr_0001z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1011z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * trr_11y; + double hrr_2011z = trr_22z - zlzk * trr_21z; + gikz = ai2 * hrr_2011z; + gikz -= 1 * hrr_0011z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjz = aj2 * hrr_1101z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1011z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_0110y; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + gjkz = aj2 * hrr_1111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_1001z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_0011z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_12y; + gikz = ai2 * hrr_1011z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0101z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_0011z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_0111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * hrr_1001z; + giy -= 1 * trr_01y; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * hrr_0011z; + gky -= 1 * trr_10y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_22y; + gikz = ai2 * hrr_1011z; + giky -= 1 * trr_02y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0101z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * hrr_0011z; + gky -= 1 * trr_10y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1120y; + gjkz = aj2 * hrr_0111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_2001z; + giz -= 1 * hrr_0001z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_1011z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_12y; + gikz = ai2 * hrr_2011z; + gikz -= 1 * hrr_0011z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1101z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_1011z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_1111z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0011z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1011z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; + double hrr_0021z = trr_03z - zlzk * trr_02z; + gkz = ak2 * hrr_0021z; + gkz -= 1 * hrr_0001z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_11y; + double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; + double hrr_1021z = trr_13z - zlzk * trr_12z; + gikz = ai2 * hrr_1021z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_1001z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0111z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0021z; + gkz -= 1 * hrr_0001z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0110y; + double hrr_0121z = hrr_1021z - zjzi * hrr_0021z; + gjkz = aj2 * hrr_0121z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0101z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1011z; + giy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0021z; + gkz -= 1 * hrr_0001z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_21y; + gikz = ai2 * hrr_1021z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_1001z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0111z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0021z; + gkz -= 1 * hrr_0001z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0121z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0101z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_2011z; + giz -= 1 * hrr_0011z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1021z; + gkz -= 1 * hrr_1001z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_11y; + double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; + double hrr_2021z = trr_23z - zlzk * trr_22z; + gikz = ai2 * hrr_2021z; + gikz -= 1 * hrr_0021z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_2001z - 1 * hrr_0001z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1111z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1021z; + gkz -= 1 * hrr_1001z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0110y; + double hrr_1121z = hrr_2021z - zjzi * hrr_1021z; + gjkz = aj2 * hrr_1121z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1101z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b00 = .5 * rt_aa; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double b01 = .5/akl * (1 - rt_akl); + double trr_01x = cpx * fac; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1011x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1011x * Iy; + prod_xz = hrr_1011x * Iz; + prod_yz = 1 * Iz; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + gix = ai2 * hrr_2011x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + gix -= 1 * hrr_0011x; + double trr_13x = cpx * trr_12x + 2*b01 * trr_11x + 1*b00 * trr_02x; + double hrr_1021x = trr_13x - xlxk * trr_12x; + double hrr_1012x = hrr_1021x - xlxk * hrr_1011x; + glx = al2 * hrr_1012x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + glx -= 1 * trr_11x; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double trr_23x = cpx * trr_22x + 2*b01 * trr_21x + 2*b00 * trr_12x; + double hrr_2021x = trr_23x - xlxk * trr_22x; + double hrr_2012x = hrr_2021x - xlxk * hrr_2011x; + gilx = ai2 * hrr_2012x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + double trr_03x = cpx * trr_02x + 2*b01 * trr_01x; + double hrr_0021x = trr_03x - xlxk * trr_02x; + double hrr_0012x = hrr_0021x - xlxk * hrr_0011x; + gilx -= 1 * hrr_0012x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_21x - 1 * trr_01x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + gjx = aj2 * hrr_1111x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1012x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + glx -= 1 * trr_11x; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1112x = hrr_2012x - xjxi * hrr_1012x; + gjlx = aj2 * hrr_1112x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gjlx -= 1 * (aj2 * hrr_1110x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1011x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + glx = al2 * hrr_0012x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + glx -= 1 * trr_01x; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1012x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_11x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + gjx = aj2 * hrr_0111x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0012x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + glx -= 1 * trr_01x; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0112x = hrr_1012x - xjxi * hrr_0012x; + gjlx = aj2 * hrr_0112x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gjlx -= 1 * (aj2 * hrr_0110x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0011x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0011x * Iy; + prod_xz = hrr_0011x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1011x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0012x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + glx -= 1 * trr_01x; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1012x; + gily = ai2 * hrr_1001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_11x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0111x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0012x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + glx -= 1 * trr_01x; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0112x; + gjly = aj2 * hrr_0101y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlx -= 1 * (aj2 * hrr_0110x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + double hrr_1001x = trr_11x - xlxk * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = trr_01y * Iz; + double hrr_2001x = trr_21x - xlxk * trr_20x; + gix = ai2 * hrr_2001x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + double hrr_0001x = trr_01x - xlxk * fac; + gix -= 1 * hrr_0001x; + double hrr_1002x = hrr_1011x - xlxk * hrr_1001x; + glx = al2 * hrr_1002x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + glx -= 1 * trr_10x; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double hrr_2002x = hrr_2011x - xlxk * hrr_2001x; + gilx = ai2 * hrr_2002x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_1001z; + double hrr_0002x = hrr_0011x - xlxk * hrr_0001x; + gilx -= 1 * hrr_0002x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + gjx = aj2 * hrr_1101x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1002x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + glx -= 1 * trr_10x; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1102x = hrr_2002x - xjxi * hrr_1002x; + gjlx = aj2 * hrr_1102x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gjlx -= 1 * (aj2 * hrr_1100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + glx = al2 * hrr_0002x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + glx -= 1 * fac; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1002x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + gily = ai2 * hrr_2011y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0011y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_10x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gjx = aj2 * hrr_0101x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0002x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + glx -= 1 * fac; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0102x = hrr_1002x - xjxi * hrr_0002x; + gjlx = aj2 * hrr_0102x; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + gjly = aj2 * hrr_1111y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + double hrr_0100x = trr_10x - xjxi * fac; + gjlx -= 1 * (aj2 * hrr_0100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0002x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + glx -= 1 * fac; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1002x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_10x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0002x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + glx -= 1 * fac; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0102x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlx -= 1 * (aj2 * hrr_0100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1001x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1001x * Iy; + prod_xz = hrr_1001x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_2001x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * hrr_0001x; + glx = al2 * hrr_1002x; + gly = al2 * hrr_0001y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + glz = al2 * hrr_0011z; + glx -= 1 * trr_10x; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2002x; + gily = ai2 * hrr_1001y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + gilz = ai2 * hrr_1011z; + gilx -= 1 * hrr_0002x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1101x; + gjy = aj2 * hrr_0100y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_1002x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0011z; + glx -= 1 * trr_10x; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1102x; + gjly = aj2 * hrr_0101y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlx -= 1 * (aj2 * hrr_1100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + glx = al2 * hrr_0002x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + glx -= 1 * fac; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1002x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1011z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_10x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_0002x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + glx -= 1 * fac; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0102x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlx -= 1 * (aj2 * hrr_0100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0001x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0001x * Iy; + prod_xz = hrr_0001x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1001x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + glx = al2 * hrr_0002x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + glx -= 1 * fac; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1002x; + gily = ai2 * hrr_1001y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + gilz = ai2 * hrr_2011z; + gilz -= 1 * hrr_0011z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilx -= 1 * (ai2 * trr_10x); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0101x; + gjy = aj2 * hrr_0100y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjz = aj2 * hrr_1110z; + glx = al2 * hrr_0002x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + glx -= 1 * fac; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0102x; + gjly = aj2 * hrr_0101y; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + gjlz = aj2 * hrr_1111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlx -= 1 * (aj2 * hrr_0100x); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0001y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_21x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_10z; + gix -= 1 * trr_01x; + glx = al2 * hrr_1011x; + double hrr_0002y = hrr_0011y - ylyk * hrr_0001y; + gly = al2 * hrr_0002y; + glz = al2 * hrr_0001z; + gly -= 1 * 1; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2011x; + double hrr_1002y = hrr_1011y - ylyk * hrr_1001y; + gily = ai2 * hrr_1002y; + gilz = ai2 * hrr_1001z; + gilx -= 1 * hrr_0011x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_10y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1110x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_0001z; + gly -= 1 * 1; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1111x; + double hrr_0102y = hrr_1002y - yjyi * hrr_0002y; + gjly = aj2 * hrr_0102y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1001y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1001y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * hrr_2001y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0001y; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1002y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_10y; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + double hrr_2002y = hrr_2011y - ylyk * hrr_2001y; + gily = ai2 * hrr_2002y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0002y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_1101y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1002y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_10y; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + double hrr_1102y = hrr_2002y - yjyi * hrr_1002y; + gjly = aj2 * hrr_1102y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_1100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0001y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_1001z; + gly -= 1 * 1; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_1002y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_10y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_1001z; + gly -= 1 * 1; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_0102y; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0011y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0011y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1011y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + double trr_03y = cpy * trr_02y + 2*b01 * trr_01y; + double hrr_0021y = trr_03y - ylyk * trr_02y; + double hrr_0012y = hrr_0021y - ylyk * hrr_0011y; + gly = al2 * hrr_0012y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_01y; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + double trr_13y = cpy * trr_12y + 2*b01 * trr_11y + 1*b00 * trr_02y; + double hrr_1021y = trr_13y - ylyk * trr_12y; + double hrr_1012y = hrr_1021y - ylyk * hrr_1011y; + gily = ai2 * hrr_1012y; + gilz = ai2 * hrr_1001z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_11y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0111y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0012y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_01y; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + double hrr_0112y = hrr_1012y - yjyi * hrr_0012y; + gjly = aj2 * hrr_0112y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0110y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1011y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1011y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2011y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0011y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1012y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_11y; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + double trr_23y = cpy * trr_22y + 2*b01 * trr_21y + 2*b00 * trr_12y; + double hrr_2021y = trr_23y - ylyk * trr_22y; + double hrr_2012y = hrr_2021y - ylyk * hrr_2011y; + gily = ai2 * hrr_2012y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0012y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_21y - 1 * trr_01y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1111y; + gjz = aj2 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1012y; + glz = al2 * hrr_0001z; + gly -= 1 * trr_11y; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + double hrr_1112y = hrr_2012y - yjyi * hrr_1012y; + gjly = aj2 * hrr_1112y; + gjlz = aj2 * hrr_0101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_1110y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0011y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0011y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1011y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0012y; + glz = al2 * hrr_1001z; + gly -= 1 * trr_01y; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1012y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_11y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0111y; + gjz = aj2 * hrr_1100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0012y; + glz = al2 * hrr_1001z; + gly -= 1 * trr_01y; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0112y; + gjlz = aj2 * hrr_1101z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0110y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0001y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_0011z; + gly -= 1 * 1; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1002y; + gilz = ai2 * hrr_1011z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_10y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_0011z; + gly -= 1 * 1; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0102y; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1001y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1001y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2001y; + giz = ai2 * trr_11z; + giy -= 1 * hrr_0001y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1002y; + glz = al2 * hrr_0011z; + gly -= 1 * trr_10y; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2002y; + gilz = ai2 * hrr_1011z; + gily -= 1 * hrr_0002y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1101y; + gjz = aj2 * hrr_0110z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1002y; + glz = al2 * hrr_0011z; + gly -= 1 * trr_10y; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1102y; + gjlz = aj2 * hrr_0111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_1100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+1]; + dd += dm[(j0+0)*nao+l0+1] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+1]; + dd += dm[(nao+j0+0)*nao+l0+1] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+1)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+1)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0001y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0001y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1001y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_1011z; + gly -= 1 * 1; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1002y; + gilz = ai2 * hrr_2011z; + gilz -= 1 * hrr_0011z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gily -= 1 * (ai2 * trr_10y); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0101y; + gjz = aj2 * hrr_1110z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0002y; + glz = al2 * hrr_1011z; + gly -= 1 * 1; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0102y; + gjlz = aj2 * hrr_1111z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjly -= 1 * (aj2 * hrr_0100y); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_21x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1001z; + gix -= 1 * trr_01x; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0001y; + double hrr_0002z = hrr_0011z - zlzk * hrr_0001z; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2011x; + gily = ai2 * hrr_1001y; + double hrr_1002z = hrr_1011z - zlzk * hrr_1001z; + gilz = ai2 * hrr_1002z; + gilx -= 1 * hrr_0011x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_10z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1110x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0101z; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1111x; + gjly = aj2 * hrr_0101y; + double hrr_0102z = hrr_1002z - zjzi * hrr_0002z; + gjlz = aj2 * hrr_0102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1001z; + giy -= 1 * 1; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1002z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_10z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0101z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1001z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_2001z; + giz -= 1 * hrr_0001z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1002z; + glz -= 1 * trr_10z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_1001y; + double hrr_2002z = hrr_2011z - zlzk * hrr_2001z; + gilz = ai2 * hrr_2002z; + gilz -= 1 * hrr_0002z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1101z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1002z; + glz -= 1 * trr_10z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_0101y; + double hrr_1102z = hrr_2002z - zjzi * hrr_1002z; + gjlz = aj2 * hrr_1102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_1100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0001z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_1001z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_1002z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_10z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0101z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_0102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * hrr_1001z; + giy -= 1 * trr_01y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2011y; + gilz = ai2 * hrr_1002z; + gily -= 1 * hrr_0011y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_10z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0101z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0002z; + glz -= 1 * wt; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1111y; + gjlz = aj2 * hrr_0102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1001z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_2001z; + giz -= 1 * hrr_0001z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1002z; + glz -= 1 * trr_10z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_2002z; + gilz -= 1 * hrr_0002z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1101z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1002z; + glz -= 1 * trr_10z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_1102z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_1100z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0011z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1011z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + double trr_03z = cpz * trr_02z + 2*b01 * trr_01z; + double hrr_0021z = trr_03z - zlzk * trr_02z; + double hrr_0012z = hrr_0021z - zlzk * hrr_0011z; + glz = al2 * hrr_0012z; + glz -= 1 * trr_01z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1001y; + double trr_13z = cpz * trr_12z + 2*b01 * trr_11z + 1*b00 * trr_02z; + double hrr_1021z = trr_13z - zlzk * trr_12z; + double hrr_1012z = hrr_1021z - zlzk * hrr_1011z; + gilz = ai2 * hrr_1012z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_11z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0111z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0012z; + glz -= 1 * trr_01z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0101y; + double hrr_0112z = hrr_1012z - zjzi * hrr_0012z; + gjlz = aj2 * hrr_0112z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0110z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1011z; + giy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0012z; + glz -= 1 * trr_01z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1012z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_11z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0111z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0012z; + glz -= 1 * trr_01z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0112z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_0110z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+2]; + dd += dm[(j0+0)*nao+l0+2] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+2]; + dd += dm[(nao+j0+0)*nao+l0+2] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+2)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+2)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1011z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_2011z; + giz -= 1 * hrr_0011z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1012z; + glz -= 1 * trr_11z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1001y; + double trr_23z = cpz * trr_22z + 2*b01 * trr_21z + 2*b00 * trr_12z; + double hrr_2021z = trr_23z - zlzk * trr_22z; + double hrr_2012z = hrr_2021z - zlzk * hrr_2011z; + gilz = ai2 * hrr_2012z; + gilz -= 1 * hrr_0012z; + gilx *= al2; + gily *= al2; + gilz *= al2; + gilz -= 1 * (ai2 * trr_21z - 1 * trr_01z); + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1111z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1012z; + glz -= 1 * trr_11z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0101y; + double hrr_1112z = hrr_2012z - zjzi * hrr_1012z; + gjlz = aj2 * hrr_1112z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + gjlz -= 1 * (aj2 * hrr_1110z); + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_1011(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_1011(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type3_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double hrr_2100x = trr_30x - xjxi * trr_20x; + gix = ai2 * hrr_2100x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double hrr_0100x = trr_10x - xjxi * fac; + gix -= 1 * hrr_0100x; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + gkx = ak2 * hrr_1110x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + gikx = ai2 * hrr_2110x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gikx -= 1 * hrr_0110x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + gjx = aj2 * hrr_1200x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_10x; + gkx = ak2 * hrr_1110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + gjkx = aj2 * hrr_1210x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_11x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1100x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + giky = ai2 * trr_21y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + gjx = aj2 * hrr_0200x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + gjkx = aj2 * hrr_0210x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + giky = ai2 * trr_11y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0210x; + gjky = aj2 * hrr_0110y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjkz = aj2 * hrr_1110z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * 1; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_10x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0100y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_01z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_2110y = trr_31y - yjyi * trr_21y; + giky = ai2 * hrr_2110y; + gikz = ai2 * trr_11z; + giky -= 1 * hrr_0110y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_10y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_01z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + gjky = aj2 * hrr_1210y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_11y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_11z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_11z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_1110z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1100z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0110z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_11y; + gikz = ai2 * hrr_1110z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0110z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0110y; + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1100z; + giy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0110z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_21y; + gikz = ai2 * hrr_1110z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0110z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1110z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_11y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_2110z = trr_31z - zjzi * trr_21z; + gikz = ai2 * hrr_2110z; + gikz -= 1 * hrr_0110z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1110z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0110y; + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + gjkz = aj2 * hrr_1210z; + gjkz -= 1 * trr_11z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double hrr_1100x = trr_20x - xjxi * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double hrr_2100x = trr_30x - xjxi * trr_20x; + gix = ai2 * hrr_2100x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double hrr_0100x = trr_10x - xjxi * fac; + gix -= 1 * hrr_0100x; + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + glx = al2 * hrr_1101x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_3001x = trr_31x - xlxk * trr_30x; + double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; + gilx = ai2 * hrr_2101x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + double trr_01x = cpx * fac; + double hrr_0001x = trr_01x - xlxk * fac; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gilx -= 1 * hrr_0101x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + gjx = aj2 * hrr_1200x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_10x; + glx = al2 * hrr_1101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1201x = hrr_2101x - xjxi * hrr_1101x; + gjlx = aj2 * hrr_1201x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_1001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1100x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + gjx = aj2 * hrr_0200x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0201x = hrr_1101x - xjxi * hrr_0101x; + gjlx = aj2 * hrr_0201x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + gily = ai2 * hrr_1001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0201x; + gjly = aj2 * hrr_0101y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjlz = aj2 * hrr_1101z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_1001z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * 1; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + double hrr_0201y = hrr_1101y - yjyi * hrr_0101y; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_10x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0100y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_3001y = trr_31y - ylyk * trr_30y; + double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; + gily = ai2 * hrr_2101y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0101y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_10y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + double hrr_1201y = hrr_2101y - yjyi * hrr_1101y; + gjly = aj2 * hrr_1201y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_1001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_1101z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1100z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1001y; + gilz = ai2 * hrr_1101z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0101y; + double hrr_0201z = hrr_1101z - zjzi * hrr_0101z; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1100z; + giy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1101z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1001y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_3001z = trr_31z - zlzk * trr_30z; + double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; + gilz = ai2 * hrr_2101z; + gilz -= 1 * hrr_0101z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0101y; + double hrr_1201z = hrr_2101z - zjzi * hrr_1101z; + gjlz = aj2 * hrr_1201z; + gjlz -= 1 * hrr_1001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_1100(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_1100(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +__device__ static +void _rys_ejk_ip2_type3_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *shl_quartet_idx, int ntasks, int ish0, int jsh0) +{ + int sq_id = threadIdx.x + blockDim.x * threadIdx.y; + int nsq_per_block = blockDim.x * blockDim.y; + int iprim = bounds.iprim; + int jprim = bounds.jprim; + int kprim = bounds.kprim; + int lprim = bounds.lprim; + int *ao_loc = envs.ao_loc; + int nbas = envs.nbas; + int nao = ao_loc[nbas]; + int *bas = envs.bas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int do_j = jk.j_factor != 0.; + int do_k = jk.k_factor != 0.; + double *dm = jk.dm; + extern __shared__ double Rpa_cicj[]; + double *rw = Rpa_cicj + iprim*jprim*TILE2*4; + for (int n = sq_id; n < iprim*jprim*TILE2; n += nsq_per_block) { + int ijp = n / TILE2; + int sh_ij = n % TILE2; + int ish = ish0 + sh_ij / TILE; + int jsh = jsh0 + sh_ij % TILE; + int ip = ijp / jprim; + int jp = ijp % jprim; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *ci = env + bas[ish*BAS_SLOTS+PTR_COEFF]; + double *cj = env + bas[jsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double ai = expi[ip]; + double aj = expj[jp]; + double aij = ai + aj; + double aj_aij = aj / aij; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + Rpa[sh_ij+0*TILE2] = xjxi * aj_aij; + Rpa[sh_ij+1*TILE2] = yjyi * aj_aij; + Rpa[sh_ij+2*TILE2] = zjzi * aj_aij; + double theta_ij = ai * aj_aij; + double Kab = exp(-theta_ij * (xjxi*xjxi+yjyi*yjyi+zjzi*zjzi)); + Rpa[sh_ij+3*TILE2] = ci[ip] * cj[jp] * Kab; + } + + for (int task0 = 0; task0 < ntasks; task0 += nsq_per_block) { + __syncthreads(); + int task_id = task0 + sq_id; + double fac_sym = PI_FAC; + ShellQuartet sq; + if (task_id >= ntasks) { + // To avoid __syncthreads blocking blocking idle warps, all remaining + // threads compute a valid shell quartet with zero normalization factor + sq = shl_quartet_idx[0]; + fac_sym = 0.; + } else { + sq = shl_quartet_idx[task_id]; + } + int ish = sq.i; + int jsh = sq.j; + int ksh = sq.k; + int lsh = sq.l; + int sh_ij = (ish % TILE) * TILE + (jsh % TILE); + if (ish == jsh) fac_sym *= .5; + if (ksh == lsh) fac_sym *= .5; + if (ish*nbas+jsh == ksh*nbas+lsh) fac_sym *= .5; + int i0 = ao_loc[ish]; + int j0 = ao_loc[jsh]; + int k0 = ao_loc[ksh]; + int l0 = ao_loc[lsh]; + double *expi = env + bas[ish*BAS_SLOTS+PTR_EXP]; + double *expj = env + bas[jsh*BAS_SLOTS+PTR_EXP]; + double *expk = env + bas[ksh*BAS_SLOTS+PTR_EXP]; + double *expl = env + bas[lsh*BAS_SLOTS+PTR_EXP]; + double *ck = env + bas[ksh*BAS_SLOTS+PTR_COEFF]; + double *cl = env + bas[lsh*BAS_SLOTS+PTR_COEFF]; + double *ri = env + bas[ish*BAS_SLOTS+PTR_BAS_COORD]; + double *rj = env + bas[jsh*BAS_SLOTS+PTR_BAS_COORD]; + double *rk = env + bas[ksh*BAS_SLOTS+PTR_BAS_COORD]; + double *rl = env + bas[lsh*BAS_SLOTS+PTR_BAS_COORD]; + double dd; + double Ix, Iy, Iz, prod_xy, prod_xz, prod_yz; + double gix, giy, giz; + double gjx, gjy, gjz; + double gkx, gky, gkz; + double glx, gly, glz; + double gikx, giky, gikz; + double gjkx, gjky, gjkz; + double gilx, gily, gilz; + double gjlx, gjly, gjlz; + double v_ixkx = 0; + double v_ixky = 0; + double v_ixkz = 0; + double v_iykx = 0; + double v_iyky = 0; + double v_iykz = 0; + double v_izkx = 0; + double v_izky = 0; + double v_izkz = 0; + double v_jxkx = 0; + double v_jxky = 0; + double v_jxkz = 0; + double v_jykx = 0; + double v_jyky = 0; + double v_jykz = 0; + double v_jzkx = 0; + double v_jzky = 0; + double v_jzkz = 0; + double v_ixlx = 0; + double v_ixly = 0; + double v_ixlz = 0; + double v_iylx = 0; + double v_iyly = 0; + double v_iylz = 0; + double v_izlx = 0; + double v_izly = 0; + double v_izlz = 0; + double v_jxlx = 0; + double v_jxly = 0; + double v_jxlz = 0; + double v_jylx = 0; + double v_jyly = 0; + double v_jylz = 0; + double v_jzlx = 0; + double v_jzly = 0; + double v_jzlz = 0; + + for (int klp = 0; klp < kprim*lprim; ++klp) { + int kp = klp / lprim; + int lp = klp % lprim; + double ak = expk[kp]; + double al = expl[lp]; + double ak2 = ak * 2; + double al2 = al * 2; + double akl = ak + al; + double al_akl = al / akl; + double xlxk = rl[0] - rk[0]; + double ylyk = rl[1] - rk[1]; + double zlzk = rl[2] - rk[2]; + double theta_kl = ak * al_akl; + double Kcd = exp(-theta_kl * (xlxk*xlxk+ylyk*ylyk+zlzk*zlzk)); + double ckcl = fac_sym * ck[kp] * cl[lp] * Kcd; + double xqc = xlxk * al_akl; + double yqc = ylyk * al_akl; + double zqc = zlzk * al_akl; + double xkl = rk[0] + xqc; + double ykl = rk[1] + yqc; + double zkl = rk[2] + zqc; + for (int ijp = 0; ijp < iprim*jprim; ++ijp) { + int ip = ijp / jprim; + int jp = ijp % jprim; + double ai = expi[ip]; + double aj = expj[jp]; + double ai2 = ai * 2; + double aj2 = aj * 2; + double aij = ai + aj; + double *Rpa = Rpa_cicj + ijp * TILE2*4; + double cicj = Rpa[sh_ij+3*TILE2]; + double fac = cicj * ckcl / (aij*akl*sqrt(aij+akl)); + double xpa = Rpa[sh_ij+0*TILE2]; + double ypa = Rpa[sh_ij+1*TILE2]; + double zpa = Rpa[sh_ij+2*TILE2]; + double xij = ri[0] + xpa; // (ai*xi+aj*xj)/aij + double yij = ri[1] + ypa; + double zij = ri[2] + zpa; + double xjxi = rj[0] - ri[0]; + double yjyi = rj[1] - ri[1]; + double zjzi = rj[2] - ri[2]; + double xpq = xij - xkl; + double ypq = yij - ykl; + double zpq = zij - zkl; + double theta = aij * akl / (aij + akl); + double rr = xpq * xpq + ypq * ypq + zpq * zpq; + double theta_rr = theta * rr; + if (omega == 0) { + rys_roots(3, theta_rr, rw); + } else if (omega > 0) { + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + fac *= sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + } + } else { + rys_roots(3, theta_rr, rw+6*nsq_per_block); + double theta_fac = omega * omega / (omega * omega + theta); + rys_roots(3, theta_fac*theta_rr, rw); + double sqrt_theta_fac = -sqrt(theta_fac); + for (int irys = 0; irys < 3; ++irys) { + rw[sq_id+ irys*2 *nsq_per_block] *= theta_fac; + rw[sq_id+(irys*2+1)*nsq_per_block] *= sqrt_theta_fac; + } + } + if (task_id < ntasks) { + for (int irys = 0; irys < bounds.nroots; ++irys) { + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1110x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1110x * Iy; + prod_xz = hrr_1110x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + gix = ai2 * hrr_2110x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gix -= 1 * hrr_0110x; + double b01 = .5/akl * (1 - rt_akl); + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1120x = trr_22x - xjxi * trr_12x; + gkx = ak2 * hrr_1120x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + gky = ak2 * trr_01y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + gkz = ak2 * trr_01z; + double hrr_1100x = trr_20x - xjxi * trr_10x; + gkx -= 1 * hrr_1100x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; + double hrr_2120x = trr_32x - xjxi * trr_22x; + gikx = ai2 * hrr_2120x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + giky = ai2 * trr_11y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + gikz = ai2 * trr_11z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0120x = trr_12x - xjxi * trr_02x; + gikx -= 1 * hrr_0120x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + double hrr_2100x = trr_30x - xjxi * trr_20x; + double hrr_0100x = trr_10x - xjxi * fac; + gikx -= 1 * (ai2 * hrr_2100x - 1 * hrr_0100x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + gjx = aj2 * hrr_1210x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_11x; + gkx = ak2 * hrr_1120x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_1100x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_1220x = hrr_2120x - xjxi * hrr_1120x; + gjkx = aj2 * hrr_1220x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjky = aj2 * hrr_0110y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_12x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + gjkx -= 1 * (aj2 * hrr_1200x - 1 * trr_10x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1110x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + gkx = ak2 * hrr_0120x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_0100x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1120x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + giky = ai2 * trr_21y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * hrr_1100x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + gjx = aj2 * hrr_0210x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_01x; + gkx = ak2 * hrr_0120x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_01z; + gkx -= 1 * hrr_0100x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + double hrr_0220x = hrr_1120x - xjxi * hrr_0120x; + gjkx = aj2 * hrr_0220x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_02x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + gjkx -= 1 * (aj2 * hrr_0200x - 1 * fac); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1110x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * hrr_0120x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * hrr_0100x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1120x; + giky = ai2 * trr_11y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * hrr_1100x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0210x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gjx -= 1 * trr_01x; + gkx = ak2 * hrr_0120x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_11z; + gkx -= 1 * hrr_0100x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0220x; + gjky = aj2 * hrr_0110y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjkz = aj2 * hrr_1110z; + gjkx -= 1 * trr_02x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0200x - 1 * fac); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_21x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_10z; + gix -= 1 * trr_01x; + gkx = ak2 * trr_12x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_22x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_02x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1110x; + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * 1; + gkx = ak2 * trr_12x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_01z; + gkx -= 1 * trr_10x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1120x; + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_1100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_11x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0100y; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_2110y = trr_31y - yjyi * trr_21y; + giky = ai2 * hrr_2110y; + gikz = ai2 * trr_11z; + giky -= 1 * hrr_0110y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_10y; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_01z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + gjky = aj2 * hrr_1210y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_11y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * 1; + gkx = ak2 * trr_02x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_11z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_1110z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_21x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1100z; + gix -= 1 * trr_01x; + gkx = ak2 * trr_12x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0110z; + gkx -= 1 * trr_10x; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_22x; + giky = ai2 * trr_11y; + gikz = ai2 * hrr_1110z; + gikx -= 1 * trr_02x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_20x - 1 * fac); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1110x; + gjy = aj2 * hrr_0100y; + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_12x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0110z; + gkx -= 1 * trr_10x; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1120x; + gjky = aj2 * hrr_0110y; + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_1100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1100z; + giy -= 1 * 1; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0110z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * trr_21y; + gikz = ai2 * hrr_1110z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_02x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0110z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1110z; + gkx -= 1 * fac; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_12x; + giky = ai2 * trr_11y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_2110z = trr_31z - zjzi * trr_21z; + gikz = ai2 * hrr_2110z; + gikz -= 1 * hrr_0110z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikx -= 1 * (ai2 * trr_10x); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + gkx = ak2 * trr_02x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1110z; + gkx -= 1 * fac; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0120x; + gjky = aj2 * hrr_0110y; + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + gjkz = aj2 * hrr_1210z; + gjkz -= 1 * trr_11z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkx -= 1 * (aj2 * hrr_0100x); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_2100x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + gix -= 1 * hrr_0100x; + gkx = ak2 * hrr_1110x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_2110x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + giky = ai2 * trr_12y; + gikz = ai2 * trr_11z; + gikx -= 1 * hrr_0110x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1200x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_10x; + gkx = ak2 * hrr_1110x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_01z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1210x; + double hrr_0120y = trr_12y - yjyi * trr_02y; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_11x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + giky = ai2 * trr_22y; + gikz = ai2 * trr_11z; + giky -= 1 * trr_02y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_12y; + gkz = ak2 * trr_01z; + gky -= 1 * trr_10y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0210x; + double hrr_1120y = trr_22y - yjyi * trr_12y; + gjky = aj2 * hrr_1120y; + gjkz = aj2 * hrr_0110z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + giky = ai2 * trr_12y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_02y; + gkz = ak2 * trr_11z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0210x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_1110z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0110y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0110y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1110y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0120y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_0100y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * hrr_1120y; + gikz = ai2 * trr_11z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_1100y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0210y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_01y; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0120y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_0100y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + double hrr_0220y = hrr_1120y - yjyi * hrr_0120y; + gjky = aj2 * hrr_0220y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_02y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0200y - 1 * 1); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1110y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1110y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2110y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0110y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1120y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_1100y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; + double hrr_2120y = trr_32y - yjyi * trr_22y; + giky = ai2 * hrr_2120y; + gikz = ai2 * trr_11z; + giky -= 1 * hrr_0120y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_2100y - 1 * hrr_0100y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1210y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_11y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1120y; + gkz = ak2 * trr_01z; + gky -= 1 * hrr_1100y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + double hrr_1220y = hrr_2120y - yjyi * hrr_1120y; + gjky = aj2 * hrr_1220y; + gjkz = aj2 * hrr_0110z; + gjky -= 1 * trr_12y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1200y - 1 * trr_10y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0110y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0110y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1110y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0120y; + gkz = ak2 * trr_11z; + gky -= 1 * hrr_0100y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_1120y; + gikz = ai2 * trr_21z; + gikz -= 1 * trr_01z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * hrr_1100y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0210y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * trr_01y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0120y; + gkz = ak2 * trr_11z; + gky -= 1 * hrr_0100y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0220y; + gjkz = aj2 * hrr_1110z; + gjky -= 1 * trr_02y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0200y - 1 * 1); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_1100z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_0110z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_12y; + gikz = ai2 * hrr_1110z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_11x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_0110z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * hrr_1100z; + giy -= 1 * trr_01y; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * hrr_0110z; + gky -= 1 * trr_10y; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_22y; + gikz = ai2 * hrr_1110z; + giky -= 1 * trr_02y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_20y - 1 * 1); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + gkx = ak2 * trr_01x; + gky = ak2 * trr_12y; + gkz = ak2 * hrr_0110z; + gky -= 1 * trr_10y; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1120y; + gjkz = aj2 * hrr_0210z; + gjkz -= 1 * trr_01z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_1100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_1110z; + gky -= 1 * 1; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_12y; + gikz = ai2 * hrr_2110z; + gikz -= 1 * hrr_0110z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + giky -= 1 * (ai2 * trr_10y); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_02y; + gkz = ak2 * hrr_1110z; + gky -= 1 * 1; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0120y; + gjkz = aj2 * hrr_1210z; + gjkz -= 1 * trr_11z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjky -= 1 * (aj2 * hrr_0100y); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_2100x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * hrr_0100x; + gkx = ak2 * hrr_1110x; + gky = ak2 * trr_01y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_2110x; + giky = ai2 * trr_11y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + gikz = ai2 * trr_12z; + gikx -= 1 * hrr_0110x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1200x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0110z; + gjx -= 1 * trr_10x; + gkx = ak2 * hrr_1110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1210x; + gjky = aj2 * hrr_0110y; + double hrr_0120z = trr_12z - zjzi * trr_02z; + gjkz = aj2 * hrr_0120z; + gjkx -= 1 * trr_11x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + giky = ai2 * trr_21y; + gikz = ai2 * trr_12z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_11y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0210x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0120z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * hrr_1110x; + giky = ai2 * trr_11y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + gikz = ai2 * trr_22z; + gikz -= 1 * trr_02z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1110z; + gjx -= 1 * fac; + gkx = ak2 * hrr_0110x; + gky = ak2 * trr_01y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0210x; + gjky = aj2 * hrr_0110y; + double hrr_1120z = trr_22z - zjzi * trr_12z; + gjkz = aj2 * hrr_1120z; + gjkx -= 1 * trr_01x; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_12z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0110z; + gjy -= 1 * 1; + gkx = ak2 * trr_11x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_0120z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_11z; + giy -= 1 * hrr_0100y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_2110y; + gikz = ai2 * trr_12z; + giky -= 1 * hrr_0110y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_10z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0110z; + gjy -= 1 * trr_10y; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_1110y; + gkz = ak2 * trr_02z; + gkz -= 1 * wt; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1210y; + gjkz = aj2 * hrr_0120z; + gjky -= 1 * trr_11y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * hrr_1110y; + gikz = ai2 * trr_22z; + gikz -= 1 * trr_02z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * trr_20z - 1 * wt); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1110z; + gjy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * hrr_0110y; + gkz = ak2 * trr_12z; + gkz -= 1 * trr_10z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0210y; + gjkz = aj2 * hrr_1120z; + gjky -= 1 * trr_01y; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1100z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0110z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1110z; + gix -= 1 * fac; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0120z; + gkz -= 1 * hrr_0100z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_21x; + giky = ai2 * trr_11y; + gikz = ai2 * hrr_1120z; + gikx -= 1 * trr_01x; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_1100z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_0210z; + gjz -= 1 * trr_01z; + gkx = ak2 * trr_11x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_0120z; + gkz -= 1 * hrr_0100z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_1110x; + gjky = aj2 * hrr_0110y; + double hrr_0220z = hrr_1120z - zjzi * hrr_0120z; + gjkz = aj2 * hrr_0220z; + gjkz -= 1 * trr_02z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0200z - 1 * wt); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1110z; + giy -= 1 * 1; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0120z; + gkz -= 1 * hrr_0100z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_21y; + gikz = ai2 * hrr_1120z; + giky -= 1 * trr_01y; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_1100z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0210z; + gjz -= 1 * trr_01z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_11y; + gkz = ak2 * hrr_0120z; + gkz -= 1 * hrr_0100z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_1110y; + gjkz = aj2 * hrr_0220z; + gjkz -= 1 * trr_02z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_0200z - 1 * wt); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_2110z; + giz -= 1 * hrr_0110z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1120z; + gkz -= 1 * hrr_1100z; + v_ixky += gix * gky * Iz; + v_ixkz += gix * gkz * Iy; + v_iykx += giy * gkx * Iz; + v_iykz += giy * gkz * Ix; + v_izkx += giz * gkx * Iy; + v_izky += giz * gky * Ix; + gikx = ai2 * trr_11x; + giky = ai2 * trr_11y; + double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; + double hrr_2120z = trr_32z - zjzi * trr_22z; + gikz = ai2 * hrr_2120z; + gikz -= 1 * hrr_0120z; + gikx *= ak2; + giky *= ak2; + gikz *= ak2; + gikz -= 1 * (ai2 * hrr_2100z - 1 * hrr_0100z); + v_ixkx += gikx * prod_yz; + v_iyky += giky * prod_xz; + v_izkz += gikz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + gjz = aj2 * hrr_1210z; + gjz -= 1 * trr_11z; + gkx = ak2 * trr_01x; + gky = ak2 * trr_01y; + gkz = ak2 * hrr_1120z; + gkz -= 1 * hrr_1100z; + v_jxky += gjx * gky * Iz; + v_jxkz += gjx * gkz * Iy; + v_jykx += gjy * gkx * Iz; + v_jykz += gjy * gkz * Ix; + v_jzkx += gjz * gkx * Iy; + v_jzky += gjz * gky * Ix; + gjkx = aj2 * hrr_0110x; + gjky = aj2 * hrr_0110y; + double hrr_1220z = hrr_2120z - zjzi * hrr_1120z; + gjkz = aj2 * hrr_1220z; + gjkz -= 1 * trr_12z; + gjkx *= ak2; + gjky *= ak2; + gjkz *= ak2; + gjkz -= 1 * (aj2 * hrr_1200z - 1 * trr_10z); + v_jxkx += gjkx * prod_yz; + v_jyky += gjky * prod_xz; + v_jzkz += gjkz * prod_xy; + } + { + double wt = rw[sq_id + (2*irys+1)*nsq_per_block]; + double rt = rw[sq_id + 2*irys *nsq_per_block]; + double rt_aa = rt / (aij + akl); + double rt_akl = rt_aa * aij; + double cpx = xqc + xpq*rt_akl; + double rt_aij = rt_aa * akl; + double c0x = Rpa[0*TILE2+sh_ij] - xpq*rt_aij; + double trr_10x = c0x * fac; + double b10 = .5/aij * (1 - rt_aij); + double trr_20x = c0x * trr_10x + 1*b10 * fac; + double b00 = .5 * rt_aa; + double trr_21x = cpx * trr_20x + 2*b00 * trr_10x; + double trr_11x = cpx * trr_10x + 1*b00 * fac; + double hrr_1110x = trr_21x - xjxi * trr_11x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1110x * dd; + Iy = 1 * dd; + Iz = wt * dd; + prod_xy = hrr_1110x * Iy; + prod_xz = hrr_1110x * Iz; + prod_yz = 1 * Iz; + double trr_30x = c0x * trr_20x + 2*b10 * trr_10x; + double trr_31x = cpx * trr_30x + 3*b00 * trr_20x; + double hrr_2110x = trr_31x - xjxi * trr_21x; + gix = ai2 * hrr_2110x; + double c0y = Rpa[1*TILE2+sh_ij] - ypq*rt_aij; + double trr_10y = c0y * 1; + giy = ai2 * trr_10y; + double c0z = Rpa[2*TILE2+sh_ij] - zpq*rt_aij; + double trr_10z = c0z * wt; + giz = ai2 * trr_10z; + double trr_01x = cpx * fac; + double hrr_0110x = trr_11x - xjxi * trr_01x; + gix -= 1 * hrr_0110x; + double b01 = .5/akl * (1 - rt_akl); + double trr_22x = cpx * trr_21x + 1*b01 * trr_20x + 2*b00 * trr_11x; + double hrr_2011x = trr_22x - xlxk * trr_21x; + double trr_12x = cpx * trr_11x + 1*b01 * trr_10x + 1*b00 * trr_01x; + double hrr_1011x = trr_12x - xlxk * trr_11x; + double hrr_1111x = hrr_2011x - xjxi * hrr_1011x; + glx = al2 * hrr_1111x; + double cpy = yqc + ypq*rt_akl; + double trr_01y = cpy * 1; + double hrr_0001y = trr_01y - ylyk * 1; + gly = al2 * hrr_0001y; + double cpz = zqc + zpq*rt_akl; + double trr_01z = cpz * wt; + double hrr_0001z = trr_01z - zlzk * wt; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double trr_32x = cpx * trr_31x + 1*b01 * trr_30x + 3*b00 * trr_21x; + double hrr_3011x = trr_32x - xlxk * trr_31x; + double hrr_2111x = hrr_3011x - xjxi * hrr_2011x; + gilx = ai2 * hrr_2111x; + double trr_11y = cpy * trr_10y + 1*b00 * 1; + double hrr_1001y = trr_11y - ylyk * trr_10y; + gily = ai2 * hrr_1001y; + double trr_11z = cpz * trr_10z + 1*b00 * wt; + double hrr_1001z = trr_11z - zlzk * trr_10z; + gilz = ai2 * hrr_1001z; + double trr_02x = cpx * trr_01x + 1*b01 * fac; + double hrr_0011x = trr_02x - xlxk * trr_01x; + double hrr_0111x = hrr_1011x - xjxi * hrr_0011x; + gilx -= 1 * hrr_0111x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1210x = hrr_2110x - xjxi * hrr_1110x; + gjx = aj2 * hrr_1210x; + double hrr_0100y = trr_10y - yjyi * 1; + gjy = aj2 * hrr_0100y; + double hrr_0100z = trr_10z - zjzi * wt; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_11x; + glx = al2 * hrr_1111x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1211x = hrr_2111x - xjxi * hrr_1111x; + gjlx = aj2 * hrr_1211x; + double hrr_0101y = hrr_1001y - yjyi * hrr_0001y; + gjly = aj2 * hrr_0101y; + double hrr_0101z = hrr_1001z - zjzi * hrr_0001z; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_1011x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = trr_10y * dd; + Iz = wt * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1110x; + double trr_20y = c0y * trr_10y + 1*b10 * 1; + giy = ai2 * trr_20y; + giz = ai2 * trr_10z; + giy -= 1 * 1; + glx = al2 * hrr_0111x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1111x; + double trr_21y = cpy * trr_20y + 2*b00 * trr_10y; + double hrr_2001y = trr_21y - ylyk * trr_20y; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0210x = hrr_1110x - xjxi * hrr_0110x; + gjx = aj2 * hrr_0210x; + double hrr_1100y = trr_20y - yjyi * trr_10y; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_01x; + glx = al2 * hrr_0111x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0211x = hrr_1111x - xjxi * hrr_0111x; + gjlx = aj2 * hrr_0211x; + double hrr_1101y = hrr_2001y - yjyi * hrr_1001y; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_0011x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0110x * dd; + Iy = 1 * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0110x * Iy; + prod_xz = hrr_0110x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1110x; + giy = ai2 * trr_10y; + double trr_20z = c0z * trr_10z + 1*b10 * wt; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0111x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1111x; + gily = ai2 * hrr_1001y; + double trr_21z = cpz * trr_20z + 2*b00 * trr_10z; + double hrr_2001z = trr_21z - zlzk * trr_20z; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0210x; + gjy = aj2 * hrr_0100y; + double hrr_1100z = trr_20z - zjzi * trr_10z; + gjz = aj2 * hrr_1100z; + gjx -= 1 * trr_01x; + glx = al2 * hrr_0111x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0211x; + gjly = aj2 * hrr_0101y; + double hrr_1101z = hrr_2001z - zjzi * hrr_1001z; + gjlz = aj2 * hrr_1101z; + gjlx -= 1 * hrr_0011x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = hrr_0100y * dd; + Iz = wt * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_21x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_10z; + gix -= 1 * trr_01x; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2011x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_1001z; + gilx -= 1 * hrr_0011x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1110x; + double hrr_0200y = hrr_1100y - yjyi * hrr_0100y; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * 1; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1111x; + double hrr_0201y = hrr_1101y - yjyi * hrr_0101y; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_1100y * dd; + Iz = wt * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_11x; + double trr_30y = c0y * trr_20y + 2*b10 * trr_10y; + double hrr_2100y = trr_30y - yjyi * trr_20y; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0100y; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + double trr_31y = cpy * trr_30y + 3*b00 * trr_20y; + double hrr_3001y = trr_31y - ylyk * trr_30y; + double hrr_2101y = hrr_3001y - yjyi * hrr_2001y; + gily = ai2 * hrr_2101y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0101y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + double hrr_1200y = hrr_2100y - yjyi * hrr_1100y; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_10y; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + double hrr_1201y = hrr_2101y - yjyi * hrr_1101y; + gjly = aj2 * hrr_1201y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_1001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = hrr_0100y * dd; + Iz = trr_10z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * 1; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_1101z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_11x * dd; + Iy = 1 * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_11x * Iy; + prod_xz = trr_11x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_21x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1100z; + gix -= 1 * trr_01x; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2011x; + gily = ai2 * hrr_1001y; + gilz = ai2 * hrr_1101z; + gilx -= 1 * hrr_0011x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1110x; + gjy = aj2 * hrr_0100y; + double hrr_0200z = hrr_1100z - zjzi * hrr_0100z; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_1011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1111x; + gjly = aj2 * hrr_0101y; + double hrr_0201z = hrr_1101z - zjzi * hrr_0101z; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = trr_10y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1100z; + giy -= 1 * 1; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1101z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_0011x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+0] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+0]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+0] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+0]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+0]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+0; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_01x * dd; + Iy = 1 * dd; + Iz = hrr_1100z * dd; + prod_xy = trr_01x * Iy; + prod_xz = trr_01x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_11x; + giy = ai2 * trr_10y; + double trr_30z = c0z * trr_20z + 2*b10 * trr_10z; + double hrr_2100z = trr_30z - zjzi * trr_20z; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1011x; + gily = ai2 * hrr_1001y; + double trr_31z = cpz * trr_30z + 3*b00 * trr_20z; + double hrr_3001z = trr_31z - zlzk * trr_30z; + double hrr_2101z = hrr_3001z - zjzi * hrr_2001z; + gilz = ai2 * hrr_2101z; + gilz -= 1 * hrr_0101z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0110x; + gjy = aj2 * hrr_0100y; + double hrr_1200z = hrr_2100z - zjzi * hrr_1100z; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + glx = al2 * hrr_0011x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0111x; + gjly = aj2 * hrr_0101y; + double hrr_1201z = hrr_2101z - zjzi * hrr_1101z; + gjlz = aj2 * hrr_1201z; + gjlz -= 1 * hrr_1001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + double hrr_1100x = trr_20x - xjxi * trr_10x; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = trr_01y * dd; + Iz = wt * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = trr_01y * Iz; + double hrr_2100x = trr_30x - xjxi * trr_20x; + gix = ai2 * hrr_2100x; + giy = ai2 * trr_11y; + giz = ai2 * trr_10z; + double hrr_0100x = trr_10x - xjxi * fac; + gix -= 1 * hrr_0100x; + double hrr_2001x = trr_21x - xlxk * trr_20x; + double hrr_1001x = trr_11x - xlxk * trr_10x; + double hrr_1101x = hrr_2001x - xjxi * hrr_1001x; + glx = al2 * hrr_1101x; + double trr_02y = cpy * trr_01y + 1*b01 * 1; + double hrr_0011y = trr_02y - ylyk * trr_01y; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + double hrr_3001x = trr_31x - xlxk * trr_30x; + double hrr_2101x = hrr_3001x - xjxi * hrr_2001x; + gilx = ai2 * hrr_2101x; + double trr_12y = cpy * trr_11y + 1*b01 * trr_10y + 1*b00 * trr_01y; + double hrr_1011y = trr_12y - ylyk * trr_11y; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_1001z; + double hrr_0001x = trr_01x - xlxk * fac; + double hrr_0101x = hrr_1001x - xjxi * hrr_0001x; + gilx -= 1 * hrr_0101x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_1200x = hrr_2100x - xjxi * hrr_1100x; + gjx = aj2 * hrr_1200x; + double hrr_0110y = trr_11y - yjyi * trr_01y; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * trr_10x; + glx = al2 * hrr_1101x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_1201x = hrr_2101x - xjxi * hrr_1101x; + gjlx = aj2 * hrr_1201x; + double hrr_0111y = hrr_1011y - yjyi * hrr_0011y; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_1001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_11y * dd; + Iz = wt * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_21y; + giz = ai2 * trr_10z; + giy -= 1 * trr_01y; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + double trr_22y = cpy * trr_21y + 1*b01 * trr_20y + 2*b00 * trr_11y; + double hrr_2011y = trr_22y - ylyk * trr_21y; + gily = ai2 * hrr_2011y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0011y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + double hrr_0200x = hrr_1100x - xjxi * hrr_0100x; + gjx = aj2 * hrr_0200x; + double hrr_1110y = trr_21y - yjyi * trr_11y; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0100z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + double hrr_0201x = hrr_1101x - xjxi * hrr_0101x; + gjlx = aj2 * hrr_0201x; + double hrr_1111y = hrr_2011y - yjyi * hrr_1011y; + gjly = aj2 * hrr_1111y; + gjlz = aj2 * hrr_0101z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_01y * dd; + Iz = trr_10z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_11y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1100z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0201x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_1101z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0110y * dd; + Iz = wt * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0110y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1110y; + giz = ai2 * trr_10z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0111y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1111y; + gilz = ai2 * hrr_1001z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + double hrr_0210y = hrr_1110y - yjyi * hrr_0110y; + gjy = aj2 * hrr_0210y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_01y; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0111y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + double hrr_0211y = hrr_1111y - yjyi * hrr_0111y; + gjly = aj2 * hrr_0211y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_0011y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1110y * dd; + Iz = wt * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1110y * Iz; + gix = ai2 * trr_10x; + double hrr_2110y = trr_31y - yjyi * trr_21y; + giy = ai2 * hrr_2110y; + giz = ai2 * trr_10z; + giy -= 1 * hrr_0110y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1111y; + glz = al2 * hrr_0001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + double trr_32y = cpy * trr_31y + 1*b01 * trr_30y + 3*b00 * trr_21y; + double hrr_3011y = trr_32y - ylyk * trr_31y; + double hrr_2111y = hrr_3011y - yjyi * hrr_2011y; + gily = ai2 * hrr_2111y; + gilz = ai2 * hrr_1001z; + gily -= 1 * hrr_0111y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + double hrr_1210y = hrr_2110y - yjyi * hrr_1110y; + gjy = aj2 * hrr_1210y; + gjz = aj2 * hrr_0100z; + gjy -= 1 * trr_11y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1111y; + glz = al2 * hrr_0001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + double hrr_1211y = hrr_2111y - yjyi * hrr_1111y; + gjly = aj2 * hrr_1211y; + gjlz = aj2 * hrr_0101z; + gjly -= 1 * hrr_1011y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0110y * dd; + Iz = trr_10z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0110y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1110y; + giz = ai2 * trr_20z; + giz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0111y; + glz = al2 * hrr_1001z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1111y; + gilz = ai2 * hrr_2001z; + gilz -= 1 * hrr_0001z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0210y; + gjz = aj2 * hrr_1100z; + gjy -= 1 * trr_01y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0111y; + glz = al2 * hrr_1001z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0211y; + gjlz = aj2 * hrr_1101z; + gjly -= 1 * hrr_0011y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = trr_01y * dd; + Iz = hrr_0100z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_1100z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_1101z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_11y * dd; + Iz = hrr_0100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_11y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_21y; + giz = ai2 * hrr_1100z; + giy -= 1 * trr_01y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2011y; + gilz = ai2 * hrr_1101z; + gily -= 1 * hrr_0011y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1110y; + gjz = aj2 * hrr_0200z; + gjz -= 1 * wt; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1011y; + glz = al2 * hrr_0101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1111y; + gjlz = aj2 * hrr_0201z; + gjlz -= 1 * hrr_0001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+1] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+1]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+1] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+1]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+1]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+1; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_01y * dd; + Iz = hrr_1100z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_01y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_11y; + giz = ai2 * hrr_2100z; + giz -= 1 * hrr_0100z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1101z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1011y; + gilz = ai2 * hrr_2101z; + gilz -= 1 * hrr_0101z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0110y; + gjz = aj2 * hrr_1200z; + gjz -= 1 * trr_10z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0011y; + glz = al2 * hrr_1101z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0111y; + gjlz = aj2 * hrr_1201z; + gjlz -= 1 * hrr_1001z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_1100x * dd; + Iy = 1 * dd; + Iz = trr_01z * dd; + prod_xy = hrr_1100x * Iy; + prod_xz = hrr_1100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_2100x; + giy = ai2 * trr_10y; + giz = ai2 * trr_11z; + gix -= 1 * hrr_0100x; + glx = al2 * hrr_1101x; + gly = al2 * hrr_0001y; + double trr_02z = cpz * trr_01z + 1*b01 * wt; + double hrr_0011z = trr_02z - zlzk * trr_01z; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2101x; + gily = ai2 * hrr_1001y; + double trr_12z = cpz * trr_11z + 1*b01 * trr_10z + 1*b00 * trr_01z; + double hrr_1011z = trr_12z - zlzk * trr_11z; + gilz = ai2 * hrr_1011z; + gilx -= 1 * hrr_0101x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1200x; + gjy = aj2 * hrr_0100y; + double hrr_0110z = trr_11z - zjzi * trr_01z; + gjz = aj2 * hrr_0110z; + gjx -= 1 * trr_10x; + glx = al2 * hrr_1101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1201x; + gjly = aj2 * hrr_0101y; + double hrr_0111z = hrr_1011z - zjzi * hrr_0011z; + gjlz = aj2 * hrr_0111z; + gjlx -= 1 * hrr_1001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = trr_10y * dd; + Iz = trr_01z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_20y; + giz = ai2 * trr_11z; + giy -= 1 * 1; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1011z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0110z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0201x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0111z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+0)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+0)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+0)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+0)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+0)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+0)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = hrr_0100x * dd; + Iy = 1 * dd; + Iz = trr_11z * dd; + prod_xy = hrr_0100x * Iy; + prod_xz = hrr_0100x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * hrr_1100x; + giy = ai2 * trr_10y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1101x; + gily = ai2 * hrr_1001y; + double trr_22z = cpz * trr_21z + 1*b01 * trr_20z + 2*b00 * trr_11z; + double hrr_2011z = trr_22z - zlzk * trr_21z; + gilz = ai2 * hrr_2011z; + gilz -= 1 * hrr_0011z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0200x; + gjy = aj2 * hrr_0100y; + double hrr_1110z = trr_21z - zjzi * trr_11z; + gjz = aj2 * hrr_1110z; + gjx -= 1 * fac; + glx = al2 * hrr_0101x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0201x; + gjly = aj2 * hrr_0101y; + double hrr_1111z = hrr_2011z - zjzi * hrr_1011z; + gjlz = aj2 * hrr_1111z; + gjlx -= 1 * hrr_0001x; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = hrr_0100y * dd; + Iz = trr_01z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_20x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_11z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_1011z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_0110z; + gjy -= 1 * 1; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_0111z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_1100y * dd; + Iz = trr_01z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_1100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_2100y; + giz = ai2 * trr_11z; + giy -= 1 * hrr_0100y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2101y; + gilz = ai2 * hrr_1011z; + gily -= 1 * hrr_0101y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1200y; + gjz = aj2 * hrr_0110z; + gjy -= 1 * trr_10y; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1101y; + glz = al2 * hrr_0011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1201y; + gjlz = aj2 * hrr_0111z; + gjly -= 1 * hrr_1001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+1)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+1)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+1)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+1)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+1)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+1)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = hrr_0100y * dd; + Iz = trr_11z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = hrr_0100y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * hrr_1100y; + giz = ai2 * trr_21z; + giz -= 1 * trr_01z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1011z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1101y; + gilz = ai2 * hrr_2011z; + gilz -= 1 * hrr_0011z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0200y; + gjz = aj2 * hrr_1110z; + gjy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0101y; + glz = al2 * hrr_1011z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0201y; + gjlz = aj2 * hrr_1111z; + gjly -= 1 * hrr_0001y; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+0)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+0)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+0)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+0)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+0] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+0; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = trr_10x * dd; + Iy = 1 * dd; + Iz = hrr_0110z * dd; + prod_xy = trr_10x * Iy; + prod_xz = trr_10x * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_20x; + giy = ai2 * trr_10y; + giz = ai2 * hrr_1110z; + gix -= 1 * fac; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0111z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_2001x; + gily = ai2 * hrr_1001y; + gilz = ai2 * hrr_1111z; + gilx -= 1 * hrr_0001x; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_1100x; + gjy = aj2 * hrr_0100y; + double hrr_0210z = hrr_1110z - zjzi * hrr_0110z; + gjz = aj2 * hrr_0210z; + gjz -= 1 * trr_01z; + glx = al2 * hrr_1001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_0111z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_1101x; + gjly = aj2 * hrr_0101y; + double hrr_0211z = hrr_1111z - zjzi * hrr_0111z; + gjlz = aj2 * hrr_0211z; + gjlz -= 1 * hrr_0011z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+1)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+1)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+1)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+1)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+1] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+1; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = trr_10y * dd; + Iz = hrr_0110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = trr_10y * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_20y; + giz = ai2 * hrr_1110z; + giy -= 1 * 1; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0111z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_2001y; + gilz = ai2 * hrr_1111z; + gily -= 1 * hrr_0001y; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_1100y; + gjz = aj2 * hrr_0210z; + gjz -= 1 * trr_01z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_1001y; + glz = al2 * hrr_0111z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_1101y; + gjlz = aj2 * hrr_0211z; + gjlz -= 1 * hrr_0011z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + if (do_k) { + dd = dm[(j0+2)*nao+k0+2] * dm[(i0+2)*nao+l0+0]; + dd += dm[(j0+2)*nao+l0+0] * dm[(i0+2)*nao+k0+2]; + if (jk.n_dm > 1) { + dd += dm[(nao+j0+2)*nao+k0+2] * dm[(nao+i0+2)*nao+l0+0]; + dd += dm[(nao+j0+2)*nao+l0+0] * dm[(nao+i0+2)*nao+k0+2]; + } + dd *= jk.k_factor; + } else { + dd = 0.; + } + if (do_j) { + if (jk.n_dm == 1) { + dd += jk.j_factor * dm[(j0+2)*nao+i0+2] * dm[(l0+0)*nao+k0+2]; + } else { + int ji = (j0+2)*nao+i0+2; + int lk = (l0+0)*nao+k0+2; + dd += jk.j_factor * (dm[ji] + dm[nao*nao+ji]) * (dm[lk] + dm[nao*nao+lk]); + } + } + Ix = fac * dd; + Iy = 1 * dd; + Iz = hrr_1110z * dd; + prod_xy = fac * Iy; + prod_xz = fac * Iz; + prod_yz = 1 * Iz; + gix = ai2 * trr_10x; + giy = ai2 * trr_10y; + double hrr_2110z = trr_31z - zjzi * trr_21z; + giz = ai2 * hrr_2110z; + giz -= 1 * hrr_0110z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1111z; + v_ixly += gix * gly * Iz; + v_ixlz += gix * glz * Iy; + v_iylx += giy * glx * Iz; + v_iylz += giy * glz * Ix; + v_izlx += giz * glx * Iy; + v_izly += giz * gly * Ix; + gilx = ai2 * hrr_1001x; + gily = ai2 * hrr_1001y; + double trr_32z = cpz * trr_31z + 1*b01 * trr_30z + 3*b00 * trr_21z; + double hrr_3011z = trr_32z - zlzk * trr_31z; + double hrr_2111z = hrr_3011z - zjzi * hrr_2011z; + gilz = ai2 * hrr_2111z; + gilz -= 1 * hrr_0111z; + gilx *= al2; + gily *= al2; + gilz *= al2; + v_ixlx += gilx * prod_yz; + v_iyly += gily * prod_xz; + v_izlz += gilz * prod_xy; + gjx = aj2 * hrr_0100x; + gjy = aj2 * hrr_0100y; + double hrr_1210z = hrr_2110z - zjzi * hrr_1110z; + gjz = aj2 * hrr_1210z; + gjz -= 1 * trr_11z; + glx = al2 * hrr_0001x; + gly = al2 * hrr_0001y; + glz = al2 * hrr_1111z; + v_jxly += gjx * gly * Iz; + v_jxlz += gjx * glz * Iy; + v_jylx += gjy * glx * Iz; + v_jylz += gjy * glz * Ix; + v_jzlx += gjz * glx * Iy; + v_jzly += gjz * gly * Ix; + gjlx = aj2 * hrr_0101x; + gjly = aj2 * hrr_0101y; + double hrr_1211z = hrr_2111z - zjzi * hrr_1111z; + gjlz = aj2 * hrr_1211z; + gjlz -= 1 * hrr_1011z; + gjlx *= al2; + gjly *= al2; + gjlz *= al2; + v_jxlx += gjlx * prod_yz; + v_jyly += gjly * prod_xz; + v_jzlz += gjlz * prod_xy; + } + } + } + } + } + if (task_id >= ntasks) { + continue; + } + int ia = bas[ish*BAS_SLOTS+ATOM_OF]; + int ja = bas[jsh*BAS_SLOTS+ATOM_OF]; + int ka = bas[ksh*BAS_SLOTS+ATOM_OF]; + int la = bas[lsh*BAS_SLOTS+ATOM_OF]; + int natm = envs.natm; + double *ejk = jk.ejk; + atomicAdd(ejk + (ia*natm+ka)*9 + 0, v_ixkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 1, v_ixky); + atomicAdd(ejk + (ia*natm+ka)*9 + 2, v_ixkz); + atomicAdd(ejk + (ia*natm+ka)*9 + 3, v_iykx); + atomicAdd(ejk + (ia*natm+ka)*9 + 4, v_iyky); + atomicAdd(ejk + (ia*natm+ka)*9 + 5, v_iykz); + atomicAdd(ejk + (ia*natm+ka)*9 + 6, v_izkx); + atomicAdd(ejk + (ia*natm+ka)*9 + 7, v_izky); + atomicAdd(ejk + (ia*natm+ka)*9 + 8, v_izkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 0, v_jxkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 1, v_jxky); + atomicAdd(ejk + (ja*natm+ka)*9 + 2, v_jxkz); + atomicAdd(ejk + (ja*natm+ka)*9 + 3, v_jykx); + atomicAdd(ejk + (ja*natm+ka)*9 + 4, v_jyky); + atomicAdd(ejk + (ja*natm+ka)*9 + 5, v_jykz); + atomicAdd(ejk + (ja*natm+ka)*9 + 6, v_jzkx); + atomicAdd(ejk + (ja*natm+ka)*9 + 7, v_jzky); + atomicAdd(ejk + (ja*natm+ka)*9 + 8, v_jzkz); + atomicAdd(ejk + (ia*natm+la)*9 + 0, v_ixlx); + atomicAdd(ejk + (ia*natm+la)*9 + 1, v_ixly); + atomicAdd(ejk + (ia*natm+la)*9 + 2, v_ixlz); + atomicAdd(ejk + (ia*natm+la)*9 + 3, v_iylx); + atomicAdd(ejk + (ia*natm+la)*9 + 4, v_iyly); + atomicAdd(ejk + (ia*natm+la)*9 + 5, v_iylz); + atomicAdd(ejk + (ia*natm+la)*9 + 6, v_izlx); + atomicAdd(ejk + (ia*natm+la)*9 + 7, v_izly); + atomicAdd(ejk + (ia*natm+la)*9 + 8, v_izlz); + atomicAdd(ejk + (ja*natm+la)*9 + 0, v_jxlx); + atomicAdd(ejk + (ja*natm+la)*9 + 1, v_jxly); + atomicAdd(ejk + (ja*natm+la)*9 + 2, v_jxlz); + atomicAdd(ejk + (ja*natm+la)*9 + 3, v_jylx); + atomicAdd(ejk + (ja*natm+la)*9 + 4, v_jyly); + atomicAdd(ejk + (ja*natm+la)*9 + 5, v_jylz); + atomicAdd(ejk + (ja*natm+la)*9 + 6, v_jzlx); + atomicAdd(ejk + (ja*natm+la)*9 + 7, v_jzly); + atomicAdd(ejk + (ja*natm+la)*9 + 8, v_jzlz); + } +} +__global__ +void rys_ejk_ip2_type3_1110(RysIntEnvVars envs, JKEnergy jk, BoundsInfo bounds, + ShellQuartet *pool, uint32_t *batch_head) +{ + int b_id = blockIdx.x; + int t_id = threadIdx.x + blockDim.x * threadIdx.y; + ShellQuartet *shl_quartet_idx = pool + b_id * QUEUE_DEPTH; + __shared__ int batch_id; + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + } + __syncthreads(); + int nbatches_kl = (bounds.ntile_kl_pairs + TILES_IN_BATCH - 1) / TILES_IN_BATCH; + int nbatches = bounds.ntile_ij_pairs * nbatches_kl; + while (batch_id < nbatches) { + int batch_ij = batch_id / nbatches_kl; + int batch_kl = batch_id % nbatches_kl; + int nbas = envs.nbas; + double *env = envs.env; + double omega = env[PTR_RANGE_OMEGA]; + int ntasks; + if (omega >= 0) { + ntasks = _fill_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } else { + ntasks = _fill_sr_ejk_tasks(shl_quartet_idx, envs, jk, bounds, + batch_ij, batch_kl); + } + if (ntasks > 0) { + int tile_ij = bounds.tile_ij_mapping[batch_ij]; + int nbas_tiles = nbas / TILE; + int tile_i = tile_ij / nbas_tiles; + int tile_j = tile_ij % nbas_tiles; + int ish0 = tile_i * TILE; + int jsh0 = tile_j * TILE; + _rys_ejk_ip2_type3_1110(envs, jk, bounds, shl_quartet_idx, ntasks, ish0, jsh0); + } + if (t_id == 0) { + batch_id = atomicAdd(batch_head, 1); + atomicAdd(batch_head+1, ntasks); + } + __syncthreads(); + } +} + +int rys_ejk_ip2_type3_unrolled(RysIntEnvVars *envs, JKEnergy *jk, BoundsInfo *bounds, + ShellQuartet *pool, uint32_t *batch_head, int *scheme, int workers) +{ + int li = bounds->li; + int lj = bounds->lj; + int lk = bounds->lk; + int ll = bounds->ll; + int threads = scheme[0] * scheme[1]; + int nroots = bounds->nroots; + int iprim = bounds->iprim; + int jprim = bounds->jprim; + int ij_prims = iprim * jprim; + int buflen = nroots*2 * threads + ij_prims*TILE2*4; + int ijkl = li*125 + lj*25 + lk*5 + ll; + switch (ijkl) { + case 0: rys_ejk_ip2_type3_0000<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 125: rys_ejk_ip2_type3_1000<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 130: rys_ejk_ip2_type3_1010<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 131: rys_ejk_ip2_type3_1011<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 150: rys_ejk_ip2_type3_1100<<>>(*envs, *jk, *bounds, pool, batch_head); break; + case 155: rys_ejk_ip2_type3_1110<<>>(*envs, *jk, *bounds, pool, batch_head); break; + default: return 0; + } + return 1; +} diff --git a/gpu4pyscf/lib/gvhf-rys/vhf.cuh b/gpu4pyscf/lib/gvhf-rys/vhf.cuh index 13ca09e6..00b86ef6 100644 --- a/gpu4pyscf/lib/gvhf-rys/vhf.cuh +++ b/gpu4pyscf/lib/gvhf-rys/vhf.cuh @@ -64,6 +64,14 @@ typedef struct { uint16_t atom_offset; } JKMatrix; +typedef struct { + double *ejk; + double *dm; + double j_factor; + double k_factor; + uint16_t n_dm; +} JKEnergy; + typedef struct { uint8_t li; uint8_t lj; diff --git a/gpu4pyscf/properties/ir.py b/gpu4pyscf/properties/ir.py index 9f813df9..33a8fd3a 100644 --- a/gpu4pyscf/properties/ir.py +++ b/gpu4pyscf/properties/ir.py @@ -21,6 +21,7 @@ from scipy.constants import physical_constants from gpu4pyscf.lib import logger from gpu4pyscf.lib.cupy_helper import contract +from gpu4pyscf.scf.hf import RHF LINDEP_THRESHOLD = 1e-7 @@ -36,6 +37,7 @@ def eval_ir_freq_intensity(mf, hessian_obj): polarizability (numpy.array): polarizability """ log = logger.new_logger(hessian_obj, mf.mol.verbose) + assert isinstance(mf, RHF) hessian = hessian_obj.kernel() hartree_kj = nist.HARTREE2J*1e3 unit2cm = ((hartree_kj * nist.AVOGADRO)**.5 / (nist.BOHR*1e-10) @@ -93,7 +95,8 @@ def eval_ir_freq_intensity(mf, hessian_obj): # ! Different from PySCF, mo1 is all in mo! mo1, mo_e1 = hessian_obj.solve_mo1(mo_energy, mo_coeff, mo_occ, h1ao, None, atmlst, hessian_obj.max_memory, log) - + mo1 = cupy.asarray(mo1) + mo_e1 = cupy.asarray(mo_e1) tmp = cupy.empty((3, 3, natm)) # dipole moment, x,y,z aoslices = mf.mol.aoslice_by_atom() diff --git a/gpu4pyscf/scf/cphf.py b/gpu4pyscf/scf/cphf.py index 3807b732..d746f5ce 100644 --- a/gpu4pyscf/scf/cphf.py +++ b/gpu4pyscf/scf/cphf.py @@ -53,7 +53,7 @@ def solve_nos1(fvind, mo_energy, mo_occ, h1, level_shift=0): '''For field independent basis. First order overlap matrix is zero''' log = logger.new_logger(verbose=verbose) - t0 = (logger.process_clock(), logger.perf_counter()) + t0 = log.init_timer() e_a = mo_energy[mo_occ==0] e_i = mo_energy[mo_occ>0] @@ -88,7 +88,7 @@ def solve_withs1(fvind, mo_energy, mo_occ, h1, s1, energy matrix ''' log = logger.new_logger(verbose=verbose) - t0 = (logger.process_clock(), logger.perf_counter()) + t0 = log.init_timer() occidx = mo_occ > 0 viridx = mo_occ == 0 diff --git a/gpu4pyscf/scf/jk.py b/gpu4pyscf/scf/jk.py index 939ba956..bc0a4996 100644 --- a/gpu4pyscf/scf/jk.py +++ b/gpu4pyscf/scf/jk.py @@ -715,11 +715,9 @@ def quartets_scheme(mol, l_ctr_pattern, shm_size=SHM_SIZE): nps = l_ctr_pattern[:,1] ij_prims = nps[0] * nps[1] nroots = order // 2 + 1 - - if mol.omega >= 0: - unit = nroots*2 + g_size*3 + ij_prims*4 - else: # SR - unit = nroots*4 + g_size*3 + ij_prims*4 + unit = nroots*2 + g_size*3 + ij_prims*4 + if mol.omega < 0: # SR + unit += nroots * 2 counts = shm_size // (unit*8) n = min(THREADS, _nearest_power2(counts)) gout_stride = THREADS // n diff --git a/gpu4pyscf/solvent/pcm.py b/gpu4pyscf/solvent/pcm.py index c3345470..ef8c0749 100644 --- a/gpu4pyscf/solvent/pcm.py +++ b/gpu4pyscf/solvent/pcm.py @@ -364,6 +364,8 @@ def _get_vind(self, dms): dms = dms.reshape(-1,nao,nao) if dms.shape[0] == 2: dms = (dms[0] + dms[1]).reshape(-1,nao,nao) + if not isinstance(dms, cupy.ndarray): + dms = cupy.asarray(dms) K = self._intermediates['K'] R = self._intermediates['R'] v_grids_e = self._get_v(dms)