From cd29ecc466f49d50e33e8e79e936e5978a67b476 Mon Sep 17 00:00:00 2001 From: "Jean M. Sexton" Date: Wed, 4 Sep 2024 16:06:48 -0700 Subject: [PATCH] Make push_back option for cpu-only build --- .../DarkMatterParticleContainer.cpp | 123 ++++++++++++++---- 1 file changed, 99 insertions(+), 24 deletions(-) diff --git a/Exec/ParticleFilterTest/DarkMatterParticleContainer.cpp b/Exec/ParticleFilterTest/DarkMatterParticleContainer.cpp index 4e655eb8..f920e7f8 100644 --- a/Exec/ParticleFilterTest/DarkMatterParticleContainer.cpp +++ b/Exec/ParticleFilterTest/DarkMatterParticleContainer.cpp @@ -1,6 +1,5 @@ #include -#include #include #ifdef AMREX_USE_HDF5 #include @@ -8,6 +7,8 @@ using namespace amrex; #include + + /// These are helper functions used when initializing from a morton-ordered /// binary particle file. namespace { @@ -91,7 +92,7 @@ struct ShellFilter const Box& domain) : m_plo(plo), m_phi(phi), m_center(center), m_radius_inner(radius_inner), m_radius_outer(radius_outer), m_z(z), m_t(t), m_dt(dt), m_domain(domain) {} - +#ifdef AMREX_USE_GPU //c_light is the constant for speed of light [cm/s] template AMREX_GPU_HOST_DEVICE @@ -129,6 +130,45 @@ struct ShellFilter } }; +#else + //c_light is the constant for speed of light [cm/s] + template + AMREX_GPU_HOST_DEVICE + bool operator() (const SrcData& src, int i) const noexcept + { + bool result=false; + if(m_radius_inner<=0 || m_radius_outer<=0) + return false; + if(src.m_aos[i].id()>0) { + + Real xlen, ylen, zlen; + + Real lenx = m_phi[0]-m_plo[0]; + Real leny = m_phi[1]-m_plo[1]; + Real lenz = m_phi[2]-m_plo[2]; + int maxind[3]; + maxind[0] = floor((m_radius_outer+lenx*0.5)/lenx); + maxind[1] = floor((m_radius_outer+leny*0.5)/leny); + maxind[2] = floor((m_radius_outer+lenz*0.5)/lenz); + + //printf("Value is %d\n", maxind); + + for(int idir=-maxind[0];idir<=maxind[0];idir++) + for(int jdir=-maxind[1];jdir<=maxind[1];jdir++) + for(int kdir=-maxind[2];kdir<=maxind[2];kdir++) + { + xlen = src.m_aos[i].rdata(0+1+3)+(idir)*(m_phi[0]-m_plo[0]) - m_center[0]; + ylen = src.m_aos[i].rdata(1+1+3)+(jdir)*(m_phi[1]-m_plo[1]) - m_center[1]; + zlen = src.m_aos[i].rdata(2+1+3)+(kdir)*(m_phi[2]-m_plo[2]) - m_center[2]; + Real mag = sqrt(xlen*xlen+ylen*ylen+zlen*zlen); + result=result? true : (mag>m_radius_inner && mag m_plo, m_phi, m_center; @@ -198,6 +238,7 @@ struct ShellStoreFilter } } }; + // the steps would be count number of output particles // prefix sum the counts // resize the destination tile @@ -334,8 +375,7 @@ DarkMatterParticleContainer::moveKickDrift (amrex::MultiFab& acceleration, ShellPC->resizeData(); ParticleContainer<10,0>::ParticleInitData pdata = {{}, {}, {}, {}}; ShellPC->InitNRandomPerCell(1,pdata); - - // ShellPC->resize(pc->TotalNumberOfParticles()); + // ShellPC->resize(pc.TotalNumberOfParticles()); auto geom_test=pc->Geom(lev); const GpuArray phi=geom_test.ProbHiArray(); const GpuArray center({AMREX_D_DECL((phi[0]-plo[0])*0.5,(phi[1]-plo[1])*0.5,(phi[2]-plo[2])*0.5)}); @@ -349,7 +389,6 @@ DarkMatterParticleContainer::moveKickDrift (amrex::MultiFab& acceleration, amrex::Real dt_a_cur_inv = dt * a_cur_inv; ShellFilter shell_filter_test(plo, phi, center, radius_inner, radius_outer, z, t, dt, domain); ShellStoreFilter shell_store_filter_test(plo, phi, center, radius_inner, radius_outer, z, t, dt, domain, dt_a_cur_inv); - #ifdef _OPENMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -387,7 +426,7 @@ DarkMatterParticleContainer::moveKickDrift (amrex::MultiFab& acceleration, ParticleTile ptile_tmp; Gpu::Device::streamSynchronize(); - }*/ + }*/ #ifdef _OPENMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -400,7 +439,7 @@ DarkMatterParticleContainer::moveKickDrift (amrex::MultiFab& acceleration, auto& particles2 = (ShellPC->ParticlesAt(lev,pti)).GetArrayOfStructs(); auto* pstruct2 = particles2().data(); - +#ifdef AMREX_USE_GPU const long np = pti.numParticles(); int grid = pti.index(); ParticleContainer<10,0>::ParticleTileType ptile_tmp; @@ -409,6 +448,38 @@ DarkMatterParticleContainer::moveKickDrift (amrex::MultiFab& acceleration, ptile.swap(ptile_tmp); ptile.resize(num_output); } +#else + auto ptile_tmp = ptile; + // ptile_tmp.resize((ShellPC->ParticlesAt(lev,pti)).size()); + ptile_tmp.resize((this->ParticlesAt(lev,pti)).size()); + + const long np = pti.numParticles(); + int grid = pti.index(); + + const FArrayBox& accel_fab= ((*ac_ptr)[0]); + Array4 accel= accel_fab.array(); + + int nc=AMREX_SPACEDIM; + int num_output=0; + amrex::ParallelFor(np, + [=] AMREX_GPU_HOST_DEVICE ( long i) + { + store_dm_particle_single(pstruct[i],pstruct2[i],nc, + accel,plo,phi,dxi,dt,a_old, + a_half,do_move, radius_inner, radius_outer); + }); + for(int i=0;i::SuperParticleType p = pstruct2[i]; + if(shell_filter_test(ptile.getConstParticleTileData(),i)) { + ptile_tmp.push_back(pstruct2[i]); + num_output++; + } + } + // auto num_output = amrex::filterParticles(ptile_tmp, ptile, shell_filter_test); + ptile.swap(ptile_tmp); + ptile.resize(num_output); +} +#endif #ifdef _OPENMP #pragma omp parallel if (Gpu::notInLaunchRegion()) @@ -645,17 +716,17 @@ void store_dm_particle_single (amrex::ParticleContainer<1+AMREX_SPACEDIM, 0>::Su if (do_move == 1) { p2.rdata(0)=p.rdata(0); - bool result=false; + bool result=false; Real lenx = phi[0]-plo[0]; - Real leny = phi[1]-plo[1]; - Real lenz = phi[2]-plo[2]; - int maxind[3]; - maxind[0] = floor((radius_outer+lenx*0.5)/lenx); - maxind[1] = floor((radius_outer+leny*0.5)/leny); - maxind[2] = floor((radius_outer+lenz*0.5)/lenz); + Real leny = phi[1]-plo[1]; + Real lenz = phi[2]-plo[2]; + int maxind[3]; + maxind[0] = floor((radius_outer+lenx*0.5)/lenx); + maxind[1] = floor((radius_outer+leny*0.5)/leny); + maxind[2] = floor((radius_outer+lenz*0.5)/lenz); - Real xlen, ylen, zlen; - //printf("Value is %d\n", maxind); + Real xlen, ylen, zlen; + //printf("Value is %d\n", maxind); for(int idir=-maxind[0];idir<=maxind[0];idir++) for(int jdir=-maxind[1];jdir<=maxind[1];jdir++) @@ -669,19 +740,23 @@ void store_dm_particle_single (amrex::ParticleContainer<1+AMREX_SPACEDIM, 0>::Su if((mag>radius_inner && mag