From 628a3686e6c719c752245056b4938b616796873a Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Sat, 2 Mar 2024 14:07:21 -0800
Subject: [PATCH 01/25] add pml parameters for ref patch : abcinpml,
 cubicsigma, dampingstrength

---
 Source/BoundaryConditions/PML.H               |  22 ++-
 Source/BoundaryConditions/PML.cpp             |  86 +++++++----
 Source/BoundaryConditions/WarpXEvolvePML.cpp  |  14 +-
 Source/BoundaryConditions/WarpX_PML_kernels.H | 146 ++++++++++++++++--
 Source/Initialization/WarpXInitData.cpp       |   2 +
 Source/WarpX.H                                |   3 +
 Source/WarpX.cpp                              |   4 +
 7 files changed, 220 insertions(+), 57 deletions(-)

diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H
index 5f41ecbd706..2cba95f779b 100644
--- a/Source/BoundaryConditions/PML.H
+++ b/Source/BoundaryConditions/PML.H
@@ -44,15 +44,18 @@ struct SigmaBox
 {
     SigmaBox (const amrex::Box& box, const amrex::BoxArray& grids,
               const amrex::Real* dx, const amrex::IntVect& ncell, const amrex::IntVect& delta,
-              const amrex::Box& regdomain, amrex::Real v_sigma);
+              const amrex::Box& regdomain, amrex::Real v_sigma,
+              const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength);
 
     void define_single (const amrex::Box& regdomain, const amrex::IntVect& ncell,
                         const amrex::Array<amrex::Real,AMREX_SPACEDIM>& fac,
-                        amrex::Real v_sigma);
+                        amrex::Real v_sigma,
+                        const int do_cubic_sigma_pml);
     void define_multiple (const amrex::Box& box, const amrex::BoxArray& grids,
                           const amrex::IntVect& ncell,
                           const amrex::Array<amrex::Real,AMREX_SPACEDIM>& fac,
-                          amrex::Real v_sigma);
+                          amrex::Real v_sigma,
+                          const int do_cubic_sigma_pml);
 
     void ComputePMLFactorsB (const amrex::Real* dx, amrex::Real dt);
     void ComputePMLFactorsE (const amrex::Real* dx, amrex::Real dt);
@@ -79,8 +82,9 @@ class SigmaBoxFactory
 public:
     SigmaBoxFactory (const amrex::BoxArray& grid_ba, const amrex::Real* dx,
                      const amrex::IntVect& ncell, const amrex::IntVect& delta,
-                     const amrex::Box& regular_domain, const amrex::Real v_sigma_sb)
-        : m_grids(grid_ba), m_dx(dx), m_ncell(ncell), m_delta(delta), m_regdomain(regular_domain), m_v_sigma_sb(v_sigma_sb) {}
+                     const amrex::Box& regular_domain, const amrex::Real v_sigma_sb,
+                     const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength)
+        : m_grids(grid_ba), m_dx(dx), m_ncell(ncell), m_delta(delta), m_regdomain(regular_domain), m_v_sigma_sb(v_sigma_sb), m_do_cubic_sigma_pml(do_cubic_sigma_pml), m_pml_damping_strength(pml_damping_strength) {}
     ~SigmaBoxFactory () override = default;
 
     SigmaBoxFactory (const SigmaBoxFactory&) = default;
@@ -93,7 +97,7 @@ public:
     [[nodiscard]] SigmaBox* create (const amrex::Box& box, int /*ncomps*/,
         const amrex::FabInfo& /*info*/, int /*box_index*/) const final
     {
-        return new SigmaBox(box, m_grids, m_dx, m_ncell, m_delta, m_regdomain, m_v_sigma_sb);
+        return new SigmaBox(box, m_grids, m_dx, m_ncell, m_delta, m_regdomain, m_v_sigma_sb, m_do_cubic_sigma_pml, m_pml_damping_strength);
     }
 
     void destroy (SigmaBox* fab) const final
@@ -114,6 +118,8 @@ private:
     amrex::IntVect m_delta;
     amrex::Box m_regdomain;
     amrex::Real m_v_sigma_sb;
+    int m_do_cubic_sigma_pml;
+    amrex::Real m_pml_damping_strength;
 };
 
 class MultiSigmaBox
@@ -123,7 +129,8 @@ public:
     MultiSigmaBox(const amrex::BoxArray& ba, const amrex::DistributionMapping& dm,
                   const amrex::BoxArray& grid_ba, const amrex::Real* dx,
                   const amrex::IntVect& ncell, const amrex::IntVect& delta,
-                  const amrex::Box& regular_domain, amrex::Real v_sigma_sb);
+                  const amrex::Box& regular_domain, amrex::Real v_sigma_sb,
+                  const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength);
     void ComputePMLFactorsB (const amrex::Real* dx, amrex::Real dt);
     void ComputePMLFactorsE (const amrex::Real* dx, amrex::Real dt);
 private:
@@ -147,6 +154,7 @@ public:
          const amrex::IntVect& fill_guards_fields,
          const amrex::IntVect& fill_guards_current,
          int max_guard_EB, amrex::Real v_sigma_sb,
+         const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength,
          amrex::IntVect do_pml_Lo = amrex::IntVect::TheUnitVector(),
          amrex::IntVect do_pml_Hi = amrex::IntVect::TheUnitVector());
 
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index 805b4fec181..611f67485dc 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -61,11 +61,11 @@ namespace
     void FillLo (Sigma& sigma, Sigma& sigma_cumsum,
                         Sigma& sigma_star, Sigma& sigma_star_cumsum,
                         const int olo, const int ohi, const int glo, Real fac,
-                        const amrex::Real v_sigma)
+                        const amrex::Real v_sigma, const int do_cubic_sigma_pml)
     {
         const int slo = sigma.m_lo;
         const int sslo = sigma_star.m_lo;
-
+        int sigma_exp = (do_cubic_sigma_pml==1) ? 3 : 2;
         const int N = ohi+1-olo+1;
         Real* p_sigma = sigma.data();
         Real* p_sigma_cumsum = sigma_cumsum.data();
@@ -75,14 +75,22 @@ namespace
         {
             i += olo;
             Real offset = static_cast<Real>(glo-i);
-            p_sigma[i-slo] = fac*(offset*offset);
+            amrex::Real offset_exp = 1._rt;
+            for (int iexp = 0; iexp < sigma_exp; ++iexp){
+                offset_exp *= offset;
+            }
+            p_sigma[i-slo] = fac*(offset_exp);
             // sigma_cumsum is the analytical integral of sigma function at same points than sigma
-            p_sigma_cumsum[i-slo] = (fac*(offset*offset*offset)/3._rt)/v_sigma;
+            p_sigma_cumsum[i-slo] = (fac*(offset*offset_exp)/(sigma_exp+1._rt))/v_sigma;
             if (i <= ohi+1) {
                 offset = static_cast<Real>(glo-i) - 0.5_rt;
-                p_sigma_star[i-sslo] = fac*(offset*offset);
+                offset_exp = 1._rt;
+                for (int iexp = 0; iexp < sigma_exp; ++iexp){
+                    offset_exp *= offset;
+                }
+                p_sigma_star[i-sslo] = fac*(offset_exp);
                 // sigma_star_cumsum is the analytical integral of sigma function at same points than sigma_star
-                p_sigma_star_cumsum[i-sslo] = (fac*(offset*offset*offset)/3._rt)/v_sigma;
+                p_sigma_star_cumsum[i-sslo] = (fac*(offset*offset_exp)/(sigma_exp + 1._rt))/v_sigma;
             }
         });
     }
@@ -90,10 +98,11 @@ namespace
     void FillHi (Sigma& sigma, Sigma& sigma_cumsum,
                         Sigma& sigma_star, Sigma& sigma_star_cumsum,
                         const int olo, const int ohi, const int ghi, Real fac,
-                        const amrex::Real v_sigma)
+                        const amrex::Real v_sigma, const int do_cubic_sigma_pml)
     {
         const int slo = sigma.m_lo;
         const int sslo = sigma_star.m_lo;
+        int sigma_exp = (do_cubic_sigma_pml==1) ? 3 : 2;
 
         const int N = ohi+1-olo+1;
         Real* p_sigma = sigma.data();
@@ -104,12 +113,20 @@ namespace
         {
             i += olo;
             Real offset = static_cast<Real>(i-ghi-1);
-            p_sigma[i-slo] = fac*(offset*offset);
-            p_sigma_cumsum[i-slo] = (fac*(offset*offset*offset)/3._rt)/v_sigma;
+            amrex::Real offset_exp = 1._rt;
+            for (int iexp = 0; iexp < sigma_exp; ++iexp){
+                offset_exp *= offset;
+            }
+            p_sigma[i-slo] = fac*(offset_exp);
+            p_sigma_cumsum[i-slo] = (fac*(offset*offset_exp)/(sigma_exp+1._rt))/v_sigma;
             if (i <= ohi+1) {
                 offset = static_cast<Real>(i-ghi) - 0.5_rt;
-                p_sigma_star[i-sslo] = fac*(offset*offset);
-                p_sigma_star_cumsum[i-sslo] = (fac*(offset*offset*offset)/3._rt)/v_sigma;
+                offset_exp = 1._rt;
+                for (int iexp = 0; iexp < sigma_exp; ++iexp){
+                    offset_exp *= offset;
+                }
+                p_sigma_star[i-sslo] = fac*(offset_exp);
+                p_sigma_star_cumsum[i-sslo] = (fac*(offset*offset_exp)/(sigma_exp + 1._rt))/v_sigma;
             }
         });
     }
@@ -143,7 +160,8 @@ namespace
 
 
 SigmaBox::SigmaBox (const Box& box, const BoxArray& grids, const Real* dx, const IntVect& ncell,
-                    const IntVect& delta, const amrex::Box& regdomain, const amrex::Real v_sigma_sb)
+                    const IntVect& delta, const amrex::Box& regdomain, const amrex::Real v_sigma_sb,
+                    const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength)
 {
     BL_ASSERT(box.cellCentered());
 
@@ -179,22 +197,27 @@ SigmaBox::SigmaBox (const Box& box, const BoxArray& grids, const Real* dx, const
         sigma_star_cumsum_fac[idim].m_lo = lo[idim];
         sigma_star_cumsum_fac[idim].m_hi = hi[idim]+1;
     }
-
+    int sigma_exp = (do_cubic_sigma_pml==1) ? 3 : 2;
     Array<Real,AMREX_SPACEDIM> fac;
     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-        fac[idim] = 4.0_rt*PhysConst::c/(dx[idim]*static_cast<Real>(delta[idim]*delta[idim]));
+        amrex::Real delta_exp = 1._rt;
+        for (int iexp = 0; iexp < sigma_exp; ++iexp) {
+            delta_exp *= static_cast<amrex::Real>(delta[idim]);
+        }
+        fac[idim] = pml_damping_strength*PhysConst::c/(dx[idim]*delta_exp);
     }
 
     if (regdomain.ok()) { // The union of the regular grids is a single box
-        define_single(regdomain, ncell, fac, v_sigma_sb);
+        define_single(regdomain, ncell, fac, v_sigma_sb, do_cubic_sigma_pml);
     } else {
-        define_multiple(box, grids, ncell, fac, v_sigma_sb);
+        define_multiple(box, grids, ncell, fac, v_sigma_sb, do_cubic_sigma_pml);
     }
 }
 
 void SigmaBox::define_single (const Box& regdomain, const IntVect& ncell,
                               const Array<Real,AMREX_SPACEDIM>& fac,
-                              const amrex::Real v_sigma_sb)
+                              const amrex::Real v_sigma_sb,
+                              const int do_cubic_sigma_pml)
 {
     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
         const int slo = sigma[idim].lo();
@@ -208,7 +231,8 @@ void SigmaBox::define_single (const Box& regdomain, const IntVect& ncell,
         if (ohi >= olo) {
             FillLo(sigma[idim], sigma_cumsum[idim],
                    sigma_star[idim], sigma_star_cumsum[idim],
-                   olo, ohi, dlo, fac[idim], v_sigma_sb);
+                   olo, ohi, dlo, fac[idim], v_sigma_sb,
+                   do_cubic_sigma_pml);
         }
 
 #if (AMREX_SPACEDIM != 1)
@@ -228,7 +252,7 @@ void SigmaBox::define_single (const Box& regdomain, const IntVect& ncell,
         if (ohi >= olo) {
             FillHi(sigma[idim], sigma_cumsum[idim],
                    sigma_star[idim], sigma_star_cumsum[idim],
-                   olo, ohi, dhi, fac[idim], v_sigma_sb);
+                   olo, ohi, dhi, fac[idim], v_sigma_sb, do_cubic_sigma_pml);
         }
     }
 
@@ -236,7 +260,8 @@ void SigmaBox::define_single (const Box& regdomain, const IntVect& ncell,
 }
 
 void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const IntVect& ncell,
-                                const Array<Real,AMREX_SPACEDIM>& fac, const amrex::Real v_sigma_sb)
+                                const Array<Real,AMREX_SPACEDIM>& fac, const amrex::Real v_sigma_sb,
+                                const int do_cubic_sigma_pml)
 {
     const std::vector<std::pair<int,Box> >& isects = grids.intersections(box, false, ncell);
 
@@ -306,7 +331,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillLo(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim], sigma_star_cumsum[idim],
                        looverlap.smallEnd(idim), looverlap.bigEnd(idim),
-                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             Box hibox = amrex::adjCellHi(grid_box, idim, ncell[idim]);
@@ -319,7 +344,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillHi(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim],  sigma_star_cumsum[idim],
                        hioverlap.smallEnd(idim), hioverlap.bigEnd(idim),
-                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
@@ -355,7 +380,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillLo(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim],  sigma_star_cumsum[idim],
                        looverlap.smallEnd(idim), looverlap.bigEnd(idim),
-                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             Box hibox = amrex::adjCellHi(grid_box, idim, ncell[idim]);
@@ -364,7 +389,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillHi(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim], sigma_star_cumsum[idim],
                        hioverlap.smallEnd(idim), hioverlap.bigEnd(idim),
-                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
@@ -405,7 +430,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillLo(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim], sigma_star_cumsum[idim],
                        looverlap.smallEnd(idim), looverlap.bigEnd(idim),
-                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.smallEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             const Box& hibox = amrex::adjCellHi(grid_box, idim, ncell[idim]);
@@ -414,7 +439,7 @@ void SigmaBox::define_multiple (const Box& box, const BoxArray& grids, const Int
                 FillHi(sigma[idim], sigma_cumsum[idim],
                        sigma_star[idim], sigma_star_cumsum[idim],
                        hioverlap.smallEnd(idim), hioverlap.bigEnd(idim),
-                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb);
+                       grid_box.bigEnd(idim), fac[idim], v_sigma_sb, do_cubic_sigma_pml);
             }
 
             WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
@@ -505,9 +530,9 @@ SigmaBox::ComputePMLFactorsE (const Real* a_dx, Real dt)
 MultiSigmaBox::MultiSigmaBox (const BoxArray& ba, const DistributionMapping& dm,
                               const BoxArray& grid_ba, const Real* dx,
                               const IntVect& ncell, const IntVect& delta,
-                              const amrex::Box& regular_domain, const amrex::Real v_sigma_sb)
+                              const amrex::Box& regular_domain, const amrex::Real v_sigma_sb, const int do_cubic_sigma_pml, const amrex::Real pml_damping_strength)
     : FabArray<SigmaBox>(ba,dm,1,0,MFInfo(),
-                         SigmaBoxFactory(grid_ba,dx,ncell,delta, regular_domain, v_sigma_sb))
+                         SigmaBoxFactory(grid_ba,dx,ncell,delta, regular_domain, v_sigma_sb,do_cubic_sigma_pml,pml_damping_strength))
 {}
 
 void
@@ -552,6 +577,7 @@ PML::PML (const int lev, const BoxArray& grid_ba, const DistributionMapping& gri
           const amrex::IntVect& fill_guards_fields,
           const amrex::IntVect& fill_guards_current,
           int max_guard_EB, const amrex::Real v_sigma_sb,
+          const int do_cubic_sigma_pml, amrex::Real pml_damping_strength,
           const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi)
     : m_dive_cleaning(do_pml_dive_cleaning),
       m_divb_cleaning(do_pml_divb_cleaning),
@@ -737,7 +763,7 @@ PML::PML (const int lev, const BoxArray& grid_ba, const DistributionMapping& gri
     Box single_domain_box = is_single_box_domain ? domain0 : Box();
     // Empty box (i.e., Box()) means it's not a single box domain.
     sigba_fp = std::make_unique<MultiSigmaBox>(ba, dm, grid_ba_reduced, geom->CellSize(),
-                                               IntVect(ncell), IntVect(delta), single_domain_box, v_sigma_sb);
+                                               IntVect(ncell), IntVect(delta), single_domain_box, v_sigma_sb, do_cubic_sigma_pml, pml_damping_strength);
 
     if (WarpX::electromagnetic_solver_id == ElectromagneticSolverAlgo::PSATD) {
 #ifndef WARPX_USE_PSATD
@@ -851,7 +877,7 @@ PML::PML (const int lev, const BoxArray& grid_ba, const DistributionMapping& gri
 
         single_domain_box = is_single_box_domain ? cdomain : Box();
         sigba_cp = std::make_unique<MultiSigmaBox>(cba, cdm, grid_cba_reduced, cgeom->CellSize(),
-                                                   cncells, cdelta, single_domain_box, v_sigma_sb);
+                                                   cncells, cdelta, single_domain_box, v_sigma_sb, do_cubic_sigma_pml, pml_damping_strength);
 
         if (WarpX::electromagnetic_solver_id == ElectromagneticSolverAlgo::PSATD) {
 #ifndef WARPX_USE_PSATD
diff --git a/Source/BoundaryConditions/WarpXEvolvePML.cpp b/Source/BoundaryConditions/WarpXEvolvePML.cpp
index af721d70b6d..5952d59e8df 100644
--- a/Source/BoundaryConditions/WarpXEvolvePML.cpp
+++ b/Source/BoundaryConditions/WarpXEvolvePML.cpp
@@ -77,7 +77,7 @@ WarpX::DampPML_Cartesian (const int lev, PatchType patch_type)
 {
     const bool dive_cleaning = WarpX::do_pml_dive_cleaning;
     const bool divb_cleaning = WarpX::do_pml_divb_cleaning;
-
+    const bool abc_in_pml = WarpX::do_abc_in_pml;
     if (pml[lev]->ok())
     {
         const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
@@ -154,19 +154,19 @@ WarpX::DampPML_Cartesian (const int lev, PatchType patch_type)
 
                 warpx_damp_pml_ex(i, j, k, pml_Exfab, Ex_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  dive_cleaning);
+                                  dive_cleaning, abc_in_pml);
             },
             [=] AMREX_GPU_DEVICE (int i, int j, int k) {
 
                 warpx_damp_pml_ey(i, j, k, pml_Eyfab, Ey_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  dive_cleaning);
+                                  dive_cleaning, abc_in_pml);
             },
             [=] AMREX_GPU_DEVICE (int i, int j, int k) {
 
                 warpx_damp_pml_ez(i, j, k, pml_Ezfab, Ez_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  dive_cleaning);
+                                  dive_cleaning, abc_in_pml);
             });
 
             amrex::ParallelFor(tbx, tby, tbz,
@@ -174,19 +174,19 @@ WarpX::DampPML_Cartesian (const int lev, PatchType patch_type)
 
                 warpx_damp_pml_bx(i, j, k, pml_Bxfab, Bx_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  divb_cleaning);
+                                  divb_cleaning, abc_in_pml);
             },
             [=] AMREX_GPU_DEVICE (int i, int j, int k) {
 
                 warpx_damp_pml_by(i, j, k, pml_Byfab, By_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  divb_cleaning);
+                                  divb_cleaning, abc_in_pml);
             },
             [=] AMREX_GPU_DEVICE (int i, int j, int k) {
 
                 warpx_damp_pml_bz(i, j, k, pml_Bzfab, Bz_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
                                   sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, x_lo, y_lo, z_lo,
-                                  divb_cleaning);
+                                  divb_cleaning, abc_in_pml);
             });
 
             // For warpx_damp_pml_F(), mfi.nodaltilebox is used in the ParallelFor loop and here we
diff --git a/Source/BoundaryConditions/WarpX_PML_kernels.H b/Source/BoundaryConditions/WarpX_PML_kernels.H
index 7aaf5a57369..97003d8d2b6 100644
--- a/Source/BoundaryConditions/WarpX_PML_kernels.H
+++ b/Source/BoundaryConditions/WarpX_PML_kernels.H
@@ -24,11 +24,11 @@ void warpx_damp_pml_ex (int i, int j, int k, amrex::Array4<amrex::Real> const& E
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool dive_cleaning)
+                        const bool dive_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, Ex, Ex_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
@@ -53,8 +53,14 @@ void warpx_damp_pml_ex (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Exz
     if (sz == 0) {
         Ex(i,j,k,PMLComp::xz) *= sigma_star_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xy) *= sigma_star_fac_z[j-zlo];
+        }
     } else {
         Ex(i,j,k,PMLComp::xz) *= sigma_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xy) *= sigma_fac_z[j-zlo];
+        }
     }
 
 #elif defined(WARPX_DIM_3D)
@@ -77,15 +83,27 @@ void warpx_damp_pml_ex (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Exy
     if (sy == 0) {
         Ex(i,j,k,PMLComp::xy) *= sigma_star_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xz) *= sigma_star_fac_y[j-ylo];
+        }
     } else {
         Ex(i,j,k,PMLComp::xy) *= sigma_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xz) *= sigma_fac_y[j-ylo];
+        }
     }
 
     // Exz
     if (sz == 0) {
         Ex(i,j,k,PMLComp::xz) *= sigma_star_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xy) *= sigma_star_fac_z[k-zlo];
+        }
     } else {
         Ex(i,j,k,PMLComp::xz) *= sigma_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Ex(i,j,k,PMLComp::xy) *= sigma_fac_z[k-zlo];
+        }
     }
 
 #endif
@@ -101,17 +119,17 @@ void warpx_damp_pml_ey (int i, int j, int k, amrex::Array4<amrex::Real> const& E
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool dive_cleaning)
+                        const bool dive_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, Ey, Ey_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
 
-    amrex::ignore_unused(sigma_fac_y, sigma_star_fac_y, ylo, dive_cleaning);
+    amrex::ignore_unused(sigma_fac_y, sigma_star_fac_y, ylo, dive_cleaning, abc_in_pml);
 
     // sx = 0 means that Ey is staggered in x, while sx = 1 means that Ey is nodal in x (same for z)
     const int sx = Ey_stag[0];
@@ -120,15 +138,27 @@ void warpx_damp_pml_ey (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Eyx
     if (sx == 0) {
         Ey(i,j,k,PMLComp::yx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yz) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Ey(i,j,k,PMLComp::yx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yz) *= sigma_fac_x[i-xlo];
+        }
     }
 
     // Eyz
     if (sz == 0) {
         Ey(i,j,k,PMLComp::yz) *= sigma_star_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yx) *= sigma_star_fac_z[j-zlo];
+        }
     } else {
         Ey(i,j,k,PMLComp::yz) *= sigma_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yx) *= sigma_fac_z[j-zlo];
+        }
     }
 
 #elif defined(WARPX_DIM_3D)
@@ -141,8 +171,14 @@ void warpx_damp_pml_ey (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Eyx
     if (sx == 0) {
         Ey(i,j,k,PMLComp::yx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yz) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Ey(i,j,k,PMLComp::yx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yz) *= sigma_fac_x[i-xlo];
+        }
     }
 
     if (dive_cleaning)
@@ -158,8 +194,14 @@ void warpx_damp_pml_ey (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Eyz
     if (sz == 0) {
         Ey(i,j,k,PMLComp::yz) *= sigma_star_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yx) *= sigma_star_fac_z[k-zlo];
+        }
     } else {
         Ey(i,j,k,PMLComp::yz) *= sigma_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Ey(i,j,k,PMLComp::yx) *= sigma_fac_z[k-zlo];
+        }
     }
 
 #endif
@@ -175,11 +217,11 @@ void warpx_damp_pml_ez (int i, int j, int k, amrex::Array4<amrex::Real> const& E
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool dive_cleaning)
+                        const bool dive_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, Ez, Ez_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, dive_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
@@ -194,8 +236,14 @@ void warpx_damp_pml_ez (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Ezx
     if (sx == 0) {
         Ez(i,j,k,PMLComp::zx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zy) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Ez(i,j,k,PMLComp::zx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zy) *= sigma_fac_x[i-xlo];
+        }
     }
 
     if (dive_cleaning)
@@ -218,15 +266,27 @@ void warpx_damp_pml_ez (int i, int j, int k, amrex::Array4<amrex::Real> const& E
     // Ezx
     if (sx == 0) {
         Ez(i,j,k,PMLComp::zx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zy) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Ez(i,j,k,PMLComp::zx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zy) *= sigma_fac_x[i-xlo];
+        }
     }
 
     // Ezy
     if (sy == 0) {
         Ez(i,j,k,PMLComp::zy) *= sigma_star_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zx) *= sigma_star_fac_y[j-ylo];
+        }
     } else {
         Ez(i,j,k,PMLComp::zy) *= sigma_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Ez(i,j,k,PMLComp::zx) *= sigma_fac_y[j-ylo];
+        }
     }
 
     if (dive_cleaning)
@@ -252,11 +312,11 @@ void warpx_damp_pml_bx (int i, int j, int k, amrex::Array4<amrex::Real> const& B
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool divb_cleaning)
+                        const bool divb_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, Bx, Bx_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
@@ -281,8 +341,14 @@ void warpx_damp_pml_bx (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Bxz
     if (sz == 0) {
         Bx(i,j,k,PMLComp::xz) *= sigma_star_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xy) *= sigma_star_fac_z[j-zlo];
+        }
     } else {
         Bx(i,j,k,PMLComp::xz) *= sigma_fac_z[j-zlo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xy) *= sigma_fac_z[j-zlo];
+        }
     }
 
 #elif defined(WARPX_DIM_3D)
@@ -305,15 +371,27 @@ void warpx_damp_pml_bx (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Bxy
     if (sy == 0) {
         Bx(i,j,k,PMLComp::xy) *= sigma_star_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xz) *= sigma_star_fac_y[j-ylo];
+        }
     } else {
         Bx(i,j,k,PMLComp::xy) *= sigma_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xz) *= sigma_fac_y[j-ylo];
+        }
     }
 
     // Bxz
     if (sz == 0) {
         Bx(i,j,k,PMLComp::xz) *= sigma_star_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xy) *= sigma_star_fac_z[k-zlo];
+        }
     } else {
         Bx(i,j,k,PMLComp::xz) *= sigma_fac_z[k-zlo];
+        if (abc_in_pml) {
+            Bx(i,j,k,PMLComp::xy) *= sigma_fac_z[k-zlo];
+        }
     }
 
 #endif
@@ -329,11 +407,11 @@ void warpx_damp_pml_by (int i, int j, int k, amrex::Array4<amrex::Real> const& B
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool divb_cleaning)
+                        const bool divb_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, By, By_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
@@ -348,15 +426,27 @@ void warpx_damp_pml_by (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Byx
     if (sx == 0) {
         By(i,j,k,PMLComp::yx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yz) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         By(i,j,k,PMLComp::yx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yz) *= sigma_fac_x[i-xlo];
+        }
     }
 
     // Byz
     if (sz == 0) {
         By(i,j,k,PMLComp::yz) *= sigma_star_fac_z[j-zlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yx) *= sigma_star_fac_z[j-zlo];
+        }
     } else {
         By(i,j,k,PMLComp::yz) *= sigma_fac_z[j-zlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yx) *= sigma_fac_z[j-zlo];
+        }
     }
 
 #elif defined(WARPX_DIM_3D)
@@ -369,8 +459,14 @@ void warpx_damp_pml_by (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Byx
     if (sx == 0) {
         By(i,j,k,PMLComp::yx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yz) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         By(i,j,k,PMLComp::yx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yz) *= sigma_fac_x[i-xlo];
+        }
     }
 
     if (divb_cleaning)
@@ -386,8 +482,14 @@ void warpx_damp_pml_by (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Byz
     if (sz == 0) {
         By(i,j,k,PMLComp::yz) *= sigma_star_fac_z[k-zlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yx) *= sigma_star_fac_z[k-zlo];
+        }
     } else {
         By(i,j,k,PMLComp::yz) *= sigma_fac_z[k-zlo];
+        if (abc_in_pml) {
+            By(i,j,k,PMLComp::yx) *= sigma_fac_z[k-zlo];
+        }
     }
 
 #endif
@@ -403,11 +505,11 @@ void warpx_damp_pml_bz (int i, int j, int k, amrex::Array4<amrex::Real> const& B
                         const amrex::Real* const sigma_star_fac_y,
                         const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
-                        const bool divb_cleaning)
+                        const bool divb_cleaning, const bool abc_in_pml)
 {
 #if defined(WARPX_DIM_1D_Z)
     amrex::ignore_unused(i, j, k, Bz, Bz_stag, sigma_fac_x, sigma_fac_y, sigma_fac_z,
-           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning);
+           sigma_star_fac_x, sigma_star_fac_y, sigma_star_fac_z, xlo, ylo, zlo, divb_cleaning, abc_in_pml);
     amrex::Abort("PML not implemented in Cartesian 1D geometry");
 #endif
 
@@ -422,8 +524,14 @@ void warpx_damp_pml_bz (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Bzx
     if (sx == 0) {
         Bz(i,j,k,PMLComp::zx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zy) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Bz(i,j,k,PMLComp::zx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zy) *= sigma_fac_x[i-xlo];
+        }
     }
 
     if (divb_cleaning)
@@ -446,15 +554,27 @@ void warpx_damp_pml_bz (int i, int j, int k, amrex::Array4<amrex::Real> const& B
     // Bzx
     if (sx == 0) {
         Bz(i,j,k,PMLComp::zx) *= sigma_star_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zy) *= sigma_star_fac_x[i-xlo];
+        }
     } else {
         Bz(i,j,k,PMLComp::zx) *= sigma_fac_x[i-xlo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zy) *= sigma_fac_x[i-xlo];
+        }
     }
 
     // Bzy
     if (sy == 0) {
         Bz(i,j,k,PMLComp::zy) *= sigma_star_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zx) *= sigma_star_fac_y[j-ylo];
+        }
     } else {
         Bz(i,j,k,PMLComp::zy) *= sigma_fac_y[j-ylo];
+        if (abc_in_pml) {
+            Bz(i,j,k,PMLComp::zx) *= sigma_fac_y[j-ylo];
+        }
     }
 
     if (divb_cleaning)
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 4963c66077b..dda3e637aa5 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -579,6 +579,7 @@ WarpX::InitPML ()
                              amrex::IntVect(0), amrex::IntVect(0),
                              guard_cells.ng_FieldSolver.max(),
                              v_particle_pml,
+                             do_cubic_sigma_pml, pml_damping_strength,
                              do_pml_Lo[0], do_pml_Hi[0]);
 #endif
 
@@ -617,6 +618,7 @@ WarpX::InitPML ()
                                    amrex::IntVect(0), amrex::IntVect(0),
                                    guard_cells.ng_FieldSolver.max(),
                                    v_particle_pml,
+                                   do_cubic_sigma_pml, pml_damping_strength,
                                    do_pml_Lo[lev], do_pml_Hi[lev]);
         }
     }
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 79352274068..63f22742e14 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -1587,8 +1587,11 @@ private:
     int do_pml_j_damping = 0;
     int do_pml_in_domain = 0;
     static int do_similar_dm_pml;
+    int do_cubic_sigma_pml = 0;
+    amrex::Real pml_damping_strength = 4.;
     bool do_pml_dive_cleaning; // default set in WarpX.cpp
     bool do_pml_divb_cleaning; // default set in WarpX.cpp
+    static bool do_abc_in_pml;
     amrex::Vector<amrex::IntVect> do_pml_Lo;
     amrex::Vector<amrex::IntVect> do_pml_Hi;
     amrex::Vector<std::unique_ptr<PML> > pml;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index daed6a52fb4..6e66d05a9cb 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -126,6 +126,7 @@ int WarpX::macroscopic_solver_algo;
 bool WarpX::do_single_precision_comms = false;
 bool WarpX::do_subcycle_current = false;
 int WarpX::n_subcycle_current = 1;
+bool WarpX::do_abc_in_pml = false;
 
 bool WarpX::do_shared_mem_charge_deposition = false;
 bool WarpX::do_shared_mem_current_deposition = false;
@@ -909,6 +910,9 @@ WarpX::ReadParameters ()
         pp_warpx.query("do_pml_j_damping", do_pml_j_damping);
         pp_warpx.query("do_pml_in_domain", do_pml_in_domain);
         pp_warpx.query("do_similar_dm_pml", do_similar_dm_pml);
+        pp_warpx.query("do_cubic_sigma_pml",do_cubic_sigma_pml);
+        pp_warpx.query("pml_damping_strength",pml_damping_strength);
+        pp_warpx.query("do_abc_in_pml",do_abc_in_pml);
         // Read `v_particle_pml` in units of the speed of light
         v_particle_pml = 1._rt;
         utils::parser::queryWithParser(pp_warpx, "v_particle_pml", v_particle_pml);

From c6675cddc2d698c37ac4af235f8e5a9dbbf5527e Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Mon, 18 Mar 2024 13:20:12 -0700
Subject: [PATCH 02/25] smooth tanh weighting for fp and cp interpolation in
 buffer region

---
 Source/Parallelization/WarpXRegrid.cpp        |   1 +
 Source/Particles/PhysicalParticleContainer.H  |  22 +++
 .../Particles/PhysicalParticleContainer.cpp   | 142 ++++++++++++++++--
 Source/WarpX.H                                |  36 +++++
 Source/WarpX.cpp                              |  88 ++++++++++-
 5 files changed, 275 insertions(+), 14 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index dcea3cab58a..1bd05a2d115 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -334,6 +334,7 @@ WarpX::RemakeLevel (int lev, Real /*time*/, const BoxArray& ba, const Distributi
             // we can avoid redistributing these since we immediately re-build the values via BuildBufferMasks()
             RemakeMultiFab(current_buffer_masks[lev], dm, false ,lev);
             RemakeMultiFab(gather_buffer_masks[lev], dm, false ,lev);
+            RemakeMultiFab(interp_weight_gbuffer[lev], dm, false, lev);
 
             if (current_buffer_masks[lev] || gather_buffer_masks[lev]) {
                 BuildBufferMasks();
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 286d0675da6..73e68fa9a9b 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -320,6 +320,28 @@ public:
         amrex::FArrayBox const * & ez_ptr, amrex::FArrayBox const * & bx_ptr,
         amrex::FArrayBox const * & by_ptr, amrex::FArrayBox const * & bz_ptr);
 
+    void InterpolateFieldsInGatherBuffer (const amrex::Box& box,
+        amrex::FArrayBox& bufferEx, amrex::FArrayBox& bufferEy, amrex::FArrayBox& bufferEz,
+        amrex::FArrayBox& bufferBx, amrex::FArrayBox& bufferBy, amrex::FArrayBox& bufferBz,
+        amrex::Elixir& buf_exeli, amrex::Elixir& buf_eyeli, amrex::Elixir& buf_ezeli,
+        amrex::Elixir& buf_bxeli, amrex::Elixir& buf_byeli, amrex::Elixir& buf_bzeli,
+        amrex::FArrayBox const *& ex_ptr, amrex::FArrayBox const *& ey_ptr, amrex::FArrayBox const* & ez_ptr,
+        amrex::FArrayBox const *& bx_ptr, amrex::FArrayBox const *& by_ptr, amrex::FArrayBox const* & bz_ptr,
+        amrex::FArrayBox const *& cex_ptr, amrex::FArrayBox const *& cey_ptr, amrex::FArrayBox const* & cez_ptr,
+        amrex::FArrayBox const *& cbx_ptr, amrex::FArrayBox const *& cby_ptr, amrex::FArrayBox const* & cbz_ptr,
+        const amrex::IArrayBox& gather_mask, const amrex::FArrayBox& weight_gbuffer,
+        const amrex::IntVect ref_ratio);
+
+    void WeightedSumInGatherBuffer (const amrex::Box& tmp_xbox, const amrex::Box& tmp_ybox, const amrex::Box& tmp_zbox,
+                      amrex::Array4<amrex::Real> const & xbuf_field_arr, amrex::Array4<amrex::Real> const& ybuf_field_arr,
+                      amrex::Array4<amrex::Real> const & zbuf_field_arr,
+                      amrex::Array4<const amrex::Real> cfx_arr, amrex::Array4<const amrex::Real> cfy_arr,
+                      amrex::Array4<const amrex::Real> cfz_arr,
+                      amrex::Array4<const amrex::Real> fx_arr, amrex::Array4<const amrex::Real> fy_arr,
+                      amrex::Array4<const amrex::Real> fz_arr,
+                      amrex::Array4<const int > gm_arr, amrex::Array4<const amrex::Real> wt_arr,
+                      const amrex::IntVect ref_ratio);
+
     /**
     * \brief This function determines if resampling should be done for the current species, and
     * if so, performs the resampling.
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 160eac0d19c..f55ced70d13 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -2010,12 +2010,11 @@ PhysicalParticleContainer::Evolve (int lev,
     WARPX_PROFILE_VAR_NS("PhysicalParticleContainer::Evolve::GatherAndPush", blp_fg);
 
     BL_ASSERT(OnSameGrids(lev,jx));
-
     amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev);
 
     const iMultiFab* current_masks = WarpX::CurrentBufferMasks(lev);
     const iMultiFab* gather_masks = WarpX::GatherBufferMasks(lev);
-
+    const amrex::MultiFab* weight_gbuffer = WarpX::GetInstance().getInterpWeightGBuffer(lev);
     const bool has_buffer = cEx || cjx;
 
     if (m_do_back_transformed_particles)
@@ -2044,7 +2043,8 @@ PhysicalParticleContainer::Evolve (int lev,
 
         FArrayBox filtered_Ex, filtered_Ey, filtered_Ez;
         FArrayBox filtered_Bx, filtered_By, filtered_Bz;
-
+        FArrayBox bufferEx, bufferEy, bufferEz;
+        FArrayBox bufferBx, bufferBy, bufferBz;
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
             if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
@@ -2073,6 +2073,7 @@ PhysicalParticleContainer::Evolve (int lev,
             FArrayBox const* bzfab = &Bz[pti];
 
             Elixir exeli, eyeli, ezeli, bxeli, byeli, bzeli;
+            Elixir buf_exeli, buf_eyeli, buf_ezeli, buf_bxeli, buf_byeli, buf_bzeli;
 
             if (WarpX::use_fdtd_nci_corr)
             {
@@ -2114,7 +2115,7 @@ PhysicalParticleContainer::Evolve (int lev,
 
                 DepositCharge(pti, wp, ion_lev, rho, 0, 0,
                               np_current, thread_num, lev, lev);
-                if (has_buffer){
+                if ((np-np_current)> 0 ){
                     DepositCharge(pti, wp, ion_lev, crho, 0, np_current,
                                   np-np_current, thread_num, lev, lev-1);
                 }
@@ -2157,8 +2158,19 @@ PhysicalParticleContainer::Evolve (int lev,
                     FArrayBox const* cbyfab = &(*cBy)[pti];
                     FArrayBox const* cbzfab = &(*cBz)[pti];
 
+                    //// buffer box (Both 2D and 3D)
+                    InterpolateFieldsInGatherBuffer(box, bufferEx, bufferEy, bufferEz,
+                                                         bufferBx, bufferBy, bufferBz,
+                                                         buf_exeli, buf_eyeli, buf_ezeli,
+                                                         buf_bxeli, buf_byeli, buf_bzeli,
+                                                         exfab, eyfab, ezfab, bxfab, byfab, bzfab,
+                                                         cexfab, ceyfab, cezfab, cbxfab, cbyfab, cbzfab,
+                                                         (*gather_masks)[pti], (*weight_gbuffer)[pti],
+                                                         ref_ratio);
+
                     if (WarpX::use_fdtd_nci_corr)
                     {
+                        // should this be bufferEFields*
                         // Filter arrays (*cEx)[pti], store the result in
                         // filtered_Ex and update pointer cexfab so that it
                         // points to filtered_Ex (and do the same for all
@@ -2170,15 +2182,17 @@ PhysicalParticleContainer::Evolve (int lev,
                                        (*cBx)[pti], (*cBy)[pti], (*cBz)[pti],
                                        cexfab, ceyfab, cezfab, cbxfab, cbyfab, cbzfab);
                     }
-
                     // Field gather and push for particles in gather buffers
                     e_is_nodal = cEx->is_nodal() and cEy->is_nodal() and cEz->is_nodal();
                     if (push_type == PushType::Explicit) {
-                        PushPX(pti, cexfab, ceyfab, cezfab,
-                               cbxfab, cbyfab, cbzfab,
+                        //PushPX(pti, cexfab, ceyfab, cezfab,
+                        //       cbxfab, cbyfab, cbzfab,
+                        PushPX(pti, &bufferEx, &bufferEy, &bufferEz,
+                               &bufferBx, &bufferBy, &bufferBz,
                                cEx->nGrowVect(), e_is_nodal,
                                nfine_gather, np-nfine_gather,
-                               lev, lev-1, dt, ScaleFields(false), a_dt_type);
+                               lev, gather_lev, dt, ScaleFields(false), a_dt_type);
+                               //lev, lev-1, dt, ScaleFields(false), a_dt_type);
                     } else if (push_type == PushType::Implicit) {
                         ImplicitPushXP(pti, cexfab, ceyfab, cezfab,
                                        cbxfab, cbyfab, cbzfab,
@@ -2187,7 +2201,6 @@ PhysicalParticleContainer::Evolve (int lev,
                                        lev, lev-1, dt, ScaleFields(false), a_dt_type);
                     }
                 }
-
                 WARPX_PROFILE_VAR_STOP(blp_fg);
 
                 // Current Deposition
@@ -2204,7 +2217,7 @@ PhysicalParticleContainer::Evolve (int lev,
                                    0, np_current, thread_num,
                                    lev, lev, dt, relative_time, push_type);
 
-                    if (has_buffer)
+                    if ((np-np_current)>0)
                     {
                         // Deposit in buffers
                         DepositCurrent(pti, wp, uxp, uyp, uzp, ion_lev, cjx, cjy, cjz,
@@ -2224,7 +2237,7 @@ PhysicalParticleContainer::Evolve (int lev,
 
                     DepositCharge(pti, wp, ion_lev, rho, 1, 0,
                                   np_current, thread_num, lev, lev);
-                    if (has_buffer){
+                    if ((np-np_current)>0 ){
                         DepositCharge(pti, wp, ion_lev, crho, 1, np_current,
                                       np-np_current, thread_num, lev, lev-1);
                     }
@@ -2327,6 +2340,113 @@ PhysicalParticleContainer::applyNCIFilter (
 #endif
 }
 
+void
+PhysicalParticleContainer::InterpolateFieldsInGatherBuffer (const Box& box,
+    amrex::FArrayBox& bufferEx, amrex::FArrayBox& bufferEy, amrex::FArrayBox& bufferEz,
+    amrex::FArrayBox& bufferBx, amrex::FArrayBox& bufferBy, amrex::FArrayBox& bufferBz,
+    Elixir& buf_exeli, Elixir& buf_eyeli, Elixir& buf_ezeli,
+    Elixir& buf_bxeli, Elixir& buf_byeli, Elixir& buf_bzeli,
+    FArrayBox const *& ex_ptr, FArrayBox const *& ey_ptr, FArrayBox const* & ez_ptr,
+    FArrayBox const *& bx_ptr, FArrayBox const *& by_ptr, FArrayBox const* & bz_ptr,
+    FArrayBox const *& cex_ptr, FArrayBox const *& cey_ptr, FArrayBox const* & cez_ptr,
+    FArrayBox const *& cbx_ptr, FArrayBox const *& cby_ptr, FArrayBox const* & cbz_ptr,
+    const IArrayBox& gather_mask, const FArrayBox& weight_gbuffer,
+    const amrex::IntVect ref_ratio)
+{
+    amrex::Box tmp_exbox = amrex::convert(box,ex_ptr->box().ixType());
+    amrex::Box tmp_eybox = amrex::convert(box,ey_ptr->box().ixType());
+    amrex::Box tmp_ezbox = amrex::convert(box,ez_ptr->box().ixType());
+
+    bufferEx.resize(tmp_exbox);
+    buf_exeli = bufferEx.elixir();
+    bufferEy.resize(tmp_eybox);
+    buf_eyeli = bufferEy.elixir();
+    bufferEz.resize(tmp_ezbox);
+    buf_ezeli = bufferEz.elixir();
+    WeightedSumInGatherBuffer(tmp_exbox, tmp_eybox, tmp_ezbox,
+                    bufferEx.array(), bufferEy.array(), bufferEz.array(),
+                    cex_ptr->array(), cey_ptr->array(), cez_ptr->array(),
+                    ex_ptr->array(), ey_ptr->array(), ez_ptr->array(),
+                    gather_mask.array(), weight_gbuffer.array(), ref_ratio);
+
+    amrex::Box tmp_bxbox = amrex::convert(box,bx_ptr->box().ixType());
+    amrex::Box tmp_bybox = amrex::convert(box,by_ptr->box().ixType());
+    amrex::Box tmp_bzbox = amrex::convert(box,bz_ptr->box().ixType());
+    bufferBx.resize(tmp_bxbox);
+    buf_bxeli = bufferBx.elixir();
+    bufferBy.resize(tmp_bybox);
+    buf_byeli = bufferBy.elixir();
+    bufferBz.resize(tmp_bzbox);
+    buf_bzeli = bufferBz.elixir();
+
+    WeightedSumInGatherBuffer(tmp_bxbox, tmp_bybox, tmp_bzbox,
+                    bufferBx.array(), bufferBy.array(), bufferBz.array(),
+                    cbx_ptr->array(), cby_ptr->array(), cbz_ptr->array(),
+                    bx_ptr->array(), by_ptr->array(), bz_ptr->array(),
+                    gather_mask.array(), weight_gbuffer.array(), ref_ratio);
+}
+
+void
+PhysicalParticleContainer::WeightedSumInGatherBuffer (
+                      const amrex::Box& tmp_xbox, const amrex::Box& tmp_ybox, const amrex::Box& tmp_zbox,
+                      amrex::Array4<amrex::Real> const & xbuf_field_arr, amrex::Array4<amrex::Real> const& ybuf_field_arr,
+                      amrex::Array4<amrex::Real> const & zbuf_field_arr,
+                      amrex::Array4<const amrex::Real> cfx_arr, amrex::Array4<const amrex::Real> cfy_arr,
+                      amrex::Array4<const amrex::Real> cfz_arr,
+                      amrex::Array4<const amrex::Real> fx_arr, amrex::Array4<const amrex::Real> fy_arr,
+                      amrex::Array4<const amrex::Real> fz_arr,
+                      amrex::Array4<const int > gm_arr, amrex::Array4<const amrex::Real> wt_arr,
+                      const amrex::IntVect ref_ratio)
+{
+    amrex::ParallelFor( tmp_xbox, tmp_ybox, tmp_zbox,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
+            int ii = amrex::coarsen(i,ref_ratio[0]);
+            int jj = j;
+            int kk = k;
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+            jj = amrex::coarsen(j,ref_ratio[1]);
+#elif defined(WARPX_DIM_3D)
+            kk = amrex::coarsen(k,ref_ratio[2]);
+#endif
+            if (gm_arr(i,j,k) == 0) {
+                xbuf_field_arr(i,j,k) = wt_arr(i,j,k)*fx_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfx_arr(ii,jj,kk);
+            } else {
+                xbuf_field_arr(i,j,k) = fx_arr(i,j,k);
+            }
+        },
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
+            int ii = amrex::coarsen(i,ref_ratio[0]);
+            int jj = j;
+            int kk = k;
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+            jj = amrex::coarsen(j,ref_ratio[1]);
+#elif defined(WARPX_DIM_3D)
+            kk = amrex::coarsen(k,ref_ratio[2]);
+#endif
+            if (gm_arr(i,j,k) == 0) {
+                ybuf_field_arr(i,j,k) = wt_arr(i,j,k)*fy_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfy_arr(ii,jj,kk);
+            } else {
+                ybuf_field_arr(i,j,k) = fy_arr(i,j,k);
+            }
+        },
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
+            int ii = amrex::coarsen(i,ref_ratio[0]);
+            int jj = j;
+            int kk = k;
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+            jj = amrex::coarsen(j,ref_ratio[1]);
+#elif defined(WARPX_DIM_3D)
+            kk = amrex::coarsen(k,ref_ratio[2]);
+#endif
+            if (gm_arr(i,j,k) == 0) {
+               zbuf_field_arr(i,j,k) = wt_arr(i,j,k)*fz_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfz_arr(ii,jj,kk);
+            } else {
+               zbuf_field_arr(i,j,k) = fz_arr(i,j,k);
+            }
+        }
+    );
+}
+
 // Loop over all particles in the particle container and
 // split particles tagged with p.id()=DoSplitParticleID
 void
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 6526171ff67..faea121f5ee 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -355,6 +355,32 @@ public:
     //! #n_field_gather_buffer cells of the edge of the patch, will gather the fields
     //! from the lower refinement level instead of the refinement patch itself
     static int n_field_gather_buffer;
+
+    /**
+     * If true, particles located inside refinement patch but within
+     *  #n_field_gather_buffer cells of the patch edge will gather fields
+     * that are a weighted sum of the fields from lower refinement
+     * level and the refinement patch. By default, no interpolation will be done,
+     * and the fields will be gathered only from the lower refinement level.
+     * The option can be used to allow for a smooth transition from the fine
+     * and coarse solutions with a tanh profile, such that, the value
+     * of the gather fields is close to the solution on the refinement patch
+     * near the gather buffer edge, and close to the solution on the lower
+     * refinement level close to the edge of the patch.
+     * #tanh_midpoint_gather_buffer can be used to set the tanh profile
+     */
+    static bool do_fieldinterp_gather_buffer;
+
+    /**
+     * With mesh-refinement, finite number of gather buffer cells, and
+     * do_fieldinterp_gather_buffer, the particles on the fine patch
+     * gather a weighted sum of fields from fine patch and coarse patch.
+     * A tanh profile is used to set the smoothing kernel
+     * #tanh_midpoint_gather_buffer must be a fraction of the buffer width
+     * measured from coarsefine interface, where the tanh profile is 0.5
+     * The default value is 0.5
+     */
+    static amrex::Real tanh_midpoint_gather_buffer;
     //! With mesh refinement, particles located inside a refinement patch, but within
     //! #n_current_deposition_buffer cells of the edge of the patch, will deposit their charge
     //! and current onto the lower refinement level instead of the refinement patch itself
@@ -878,6 +904,10 @@ public:
     static std::array<amrex::Real,3> CellSize (int lev);
     static amrex::RealBox getRealBox(const amrex::Box& bx, int lev);
 
+    [[nodiscard]] const amrex::MultiFab* getInterpWeightGBuffer (int lev) const
+    {
+        return interp_weight_gbuffer[lev].get();
+    }
     /**
      * \brief Return the lower corner of the box in real units.
      * \param bx The box
@@ -1392,6 +1422,7 @@ private:
         return gather_buffer_masks[lev].get();
     }
 
+
     /**
      * \brief Re-orders the Fornberg coefficients so that they can be used more conveniently for
      * finite-order centering operations. For example, for finite-order centering of order 6,
@@ -1565,6 +1596,11 @@ private:
     amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3 > > Bfield_cax;
     amrex::Vector<std::unique_ptr<amrex::iMultiFab> > current_buffer_masks;
     amrex::Vector<std::unique_ptr<amrex::iMultiFab> > gather_buffer_masks;
+    /** Multifab that stores weights for interpolating fine and coarse solutions
+     *  in the buffer gather region, such that, the weight for fine solution is
+     *  0 near the coarse-fine interface, and 1 as the buffer gather region terminates in the fine patch
+     */
+    amrex::Vector<std::unique_ptr<amrex::MultiFab> > interp_weight_gbuffer;
 
     // If charge/current deposition buffers are used
     amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_buf;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index c929718b043..57d287e234d 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -207,6 +207,8 @@ IntVect WarpX::filter_npass_each_dir(1);
 
 int WarpX::n_field_gather_buffer = -1;
 int WarpX::n_current_deposition_buffer = -1;
+bool WarpX::do_fieldinterp_gather_buffer = false;
+amrex::Real WarpX::tanh_midpoint_gather_buffer = 0.5;
 
 short WarpX::grid_type;
 amrex::IntVect m_rho_nodal_flag;
@@ -399,6 +401,7 @@ WarpX::WarpX ()
     gather_buffer_masks.resize(nlevs_max);
     current_buf.resize(nlevs_max);
     charge_buf.resize(nlevs_max);
+    interp_weight_gbuffer.resize(nlevs_max);
 
     pml.resize(nlevs_max);
 #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD)
@@ -888,6 +891,10 @@ WarpX::ReadParameters ()
             pp_warpx, "n_field_gather_buffer", n_field_gather_buffer);
         utils::parser::queryWithParser(
             pp_warpx, "n_current_deposition_buffer", n_current_deposition_buffer);
+        utils::parser::queryWithParser(
+            pp_warpx, "do_fieldinterp_gather_buffer", do_fieldinterp_gather_buffer);
+        utils::parser::queryWithParser(
+            pp_warpx, "tanh_midpoint_gather_buffer", tanh_midpoint_gather_buffer);
 
         amrex::Real quantum_xi_tmp;
         const auto quantum_xi_is_specified =
@@ -2067,6 +2074,7 @@ WarpX::ClearLevel (int lev)
 
     current_buffer_masks[lev].reset();
     gather_buffer_masks[lev].reset();
+    interp_weight_gbuffer[lev].reset();
 
     F_fp  [lev].reset();
     G_fp  [lev].reset();
@@ -2688,20 +2696,23 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
                 AllocInitMultiFab(Efield_cax[lev][1], amrex::convert(cba,Ey_nodal_flag),dm,ncomps,ngEB,lev, "Efield_cax[y]");
                 AllocInitMultiFab(Efield_cax[lev][2], amrex::convert(cba,Ez_nodal_flag),dm,ncomps,ngEB,lev, "Efield_cax[z]");
             }
-
-            AllocInitMultiFab(gather_buffer_masks[lev], ba, dm, ncomps, amrex::IntVect(1), lev, "gather_buffer_masks");
+            amrex::Print() << " ba for allocating gahter buffer mask " << "\n";
             // Gather buffer masks have 1 ghost cell, because of the fact
             // that particles may move by more than one cell when using subcycling.
+            AllocInitMultiFab(gather_buffer_masks[lev], ba, dm, ncomps, amrex::IntVect(1), lev, "gather_buffer_masks");
+            AllocInitMultiFab(interp_weight_gbuffer[lev], amrex::convert(ba, IntVect::TheNodeVector()), dm, ncomps, ngEB, lev, "interp_weight_gbuffer", 0.0_rt);
         }
 
         if (n_current_deposition_buffer > 0) {
+            amrex::Print() << " current dep buffer : " << n_current_deposition_buffer << "\n";
             AllocInitMultiFab(current_buf[lev][0], amrex::convert(cba,jx_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[x]");
             AllocInitMultiFab(current_buf[lev][1], amrex::convert(cba,jy_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[y]");
             AllocInitMultiFab(current_buf[lev][2], amrex::convert(cba,jz_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[z]");
             if (rho_cp[lev]) {
                 AllocInitMultiFab(charge_buf[lev], amrex::convert(cba,rho_nodal_flag),dm,2*ncomps,ngRho,lev, "charge_buf");
             }
-            AllocInitMultiFab(current_buffer_masks[lev], ba, dm, ncomps, amrex::IntVect(1), lev, "current_buffer_masks");
+            amrex::Print() << " allocate current buffer mask \n";
+            AllocInitMultiFab(current_buffer_masks[lev], ba, dm, ncomps, ngJ, lev, "current_buffer_masks");
             // Current buffer masks have 1 ghost cell, because of the fact
             // that particles may move by more than one cell when using subcycling.
         }
@@ -3063,6 +3074,8 @@ WarpX::getLoadBalanceEfficiency (const int lev)
 void
 WarpX::BuildBufferMasks ()
 {
+    bool do_interpolate = WarpX::do_fieldinterp_gather_buffer;
+    amrex::Real tanh_midpoint =  WarpX::tanh_midpoint_gather_buffer;
     for (int lev = 1; lev <= maxLevel(); ++lev)
     {
         for (int ipass = 0; ipass < 2; ++ipass)
@@ -3088,6 +3101,75 @@ WarpX::BuildBufferMasks ()
                     const Box tbx = mfi.growntilebox();
                     BuildBufferMasksInBox( tbx, (*bmasks)[mfi], tmp[mfi], ngbuffer );
                 }
+                if (ipass==0) continue;
+                amrex::MultiFab* weight_gbuffer = interp_weight_gbuffer[lev].get();
+                // Using tmp to also set weights in the interp_weight_gbuffer multifab
+                for (MFIter mfi(*weight_gbuffer, true); mfi.isValid(); ++mfi)
+                {
+                    const Box& tbx = mfi.tilebox(IntVect::TheNodeVector(),weight_gbuffer->nGrowVect());
+                    auto const& gmsk = tmp[mfi].const_array();
+                    auto const& bmsk = (*bmasks)[mfi].array();
+                    auto const& wtmsk = (*weight_gbuffer)[mfi].array();
+                    amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        wtmsk(i,j,k) = 0._rt;
+                        if (bmsk(i,j,k) == 0 && do_interpolate) {
+                            if(gmsk(i,j,k)==0) {
+                                wtmsk(i,j,k) = 0.;
+                                return;
+                            }
+                            //for (int ii = i-1; ii >= i-ngbuffer; --ii) {
+                            //    if (gmsk(ii,j,k)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                            //        amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
+                            //        return;
+                            //    }
+                            //}
+                            //for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
+                            //    if (gmsk(ii,j,k)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                            //        amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
+                            //        return;
+                            //    }
+                            //}
+                            for (int jj = j-1; jj >= j-ngbuffer; --jj) {
+                                if (gmsk(i,jj,k)==0) {
+                                    amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
+                                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                                    wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                                    return;
+                                }
+                            }
+                            for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
+                                if (gmsk(i,jj,k)==0) {
+                                    amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
+                                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                                    wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                                    return;
+                                }
+                            }
+                            //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
+                            //    if (gmsk(i,j,kk)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                            //        return;
+                            //    }
+                            //}
+                            //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
+                            //    if (gmsk(i,j,kk)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                            //        return;
+                            //    }
+                            //}
+                        }
+                    });
+                }
             }
         }
     }

From 3feb26ae2d0c598dcce2cea657f521ecc85b9c75 Mon Sep 17 00:00:00 2001
From: Hannah Klion <hannah.klion@gmail.com>
Date: Thu, 29 Feb 2024 14:49:06 -0800
Subject: [PATCH 03/25] add diagnostic for processor number

---
 .../ComputeDiagFunctors/CMakeLists.txt        |  1 +
 .../ComputeDiagFunctors/ProcessorMapFunctor.H | 45 +++++++++++++++++++
 .../ProcessorMapFunctor.cpp                   | 37 +++++++++++++++
 Source/Diagnostics/FullDiagnostics.cpp        |  3 ++
 4 files changed, 86 insertions(+)
 create mode 100644 Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.H
 create mode 100644 Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.cpp

diff --git a/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt b/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
index 2ef6af16bfe..f0ff1df2d22 100644
--- a/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
+++ b/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
@@ -12,5 +12,6 @@ foreach(D IN LISTS WarpX_DIMS)
         BackTransformFunctor.cpp
         BackTransformParticleFunctor.cpp
         ParticleReductionFunctor.cpp
+        ProcessorMapFunctor.cpp
     )
 endforeach()
diff --git a/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.H b/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.H
new file mode 100644
index 00000000000..506f2501d43
--- /dev/null
+++ b/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.H
@@ -0,0 +1,45 @@
+#ifndef WARPX_PROCESSORMAPFUNCTOR_H_
+#define WARPX_PROCESSORMAPFUNCTOR_H_
+
+#include "ComputeDiagFunctor.H"
+
+#include <AMReX_BaseFwd.H>
+
+/**
+ * \brief Functor to cell-center MF and store result in mf_out.
+ */
+class ProcessorMapFunctor final : public ComputeDiagFunctor
+{
+public:
+    /** Constructor.
+     *
+     * \param[in] mf_src source multifab.
+     * \param[in] lev level of multifab. Used for averaging in rz.
+     * \param[in] crse_ratio for interpolating field values from the simulation MultiFab, src_mf,
+                  to the output diagnostic MultiFab, mf_dst.
+     * \param[in] convertRZmodes2cartesian (in cylindrical) whether to
+     *            sum all modes in mf_src before cell-centering into dst multifab.
+     * \param[in] ncomp Number of component of mf_src to cell-center in dst multifab.
+     */
+    ProcessorMapFunctor(const amrex::MultiFab * mf_src, int lev,
+                      amrex::IntVect crse_ratio,
+                      bool convertRZmodes2cartesian=true, int ncomp=1);
+    /** \brief Cell-center m_mf_src and write the result in mf_dst.
+     *
+     * In cylindrical geometry, by default this functor average all components
+     * of a MultiFab and writes into one single component.
+     *
+     * \param[out] mf_dst output MultiFab where the result is written
+     * \param[in] dcomp first component of mf_dst in which cell-centered
+     *            data is stored
+     */
+    void operator()(amrex::MultiFab& mf_dst, int dcomp, int /*i_buffer=0*/) const override;
+private:
+    /** pointer to source multifab (can be multi-component) */
+    amrex::MultiFab const * const m_mf_src = nullptr;
+    int m_lev; /**< level on which mf_src is defined (used in cylindrical) */
+    /**< (for cylindrical) whether to average all modes into 1 comp */
+    bool m_convertRZmodes2cartesian;
+};
+
+#endif // WARPX_CELLCENTERFUNCTOR_H_
diff --git a/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.cpp b/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.cpp
new file mode 100644
index 00000000000..33cb8cb9233
--- /dev/null
+++ b/Source/Diagnostics/ComputeDiagFunctors/ProcessorMapFunctor.cpp
@@ -0,0 +1,37 @@
+#include "ProcessorMapFunctor.H"
+
+#include "WarpX.H"
+
+#include <AMReX.H>
+#include <AMReX_IntVect.H>
+#include <AMReX_MultiFab.H>
+
+ProcessorMapFunctor::ProcessorMapFunctor(amrex::MultiFab const * mf_src, int lev,
+                                     amrex::IntVect crse_ratio,
+                                     bool convertRZmodes2cartesian, int ncomp)
+    : ComputeDiagFunctor(ncomp, crse_ratio), m_mf_src(mf_src), m_lev(lev),
+      m_convertRZmodes2cartesian(convertRZmodes2cartesian)
+{}
+
+void
+ProcessorMapFunctor::operator()(amrex::MultiFab& mf_dst, int dcomp, const int /*i_buffer*/) const
+{
+    std::unique_ptr<amrex::MultiFab> tmp;
+    tmp = std::make_unique<amrex::MultiFab>(m_mf_src->boxArray(), m_mf_src->DistributionMap(), 1, m_mf_src->nGrowVect());
+
+    auto& warpx = WarpX::GetInstance();
+    const amrex::DistributionMapping& dm = warpx.DistributionMap(m_lev);
+    for (amrex::MFIter mfi(*tmp, false); mfi.isValid(); ++mfi)
+    {
+        auto bx = mfi.growntilebox(tmp->nGrowVect());
+        auto arr = tmp->array(mfi);
+        int iproc = dm[mfi.index()];
+        amrex::ParallelFor(bx,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            {
+                arr(i,j,k) = iproc;
+            });
+    }
+    InterpolateMFForDiag(mf_dst, *tmp, dcomp, warpx.DistributionMap(m_lev),
+                         m_convertRZmodes2cartesian);
+}
diff --git a/Source/Diagnostics/FullDiagnostics.cpp b/Source/Diagnostics/FullDiagnostics.cpp
index d93c67fe893..838d4789e32 100644
--- a/Source/Diagnostics/FullDiagnostics.cpp
+++ b/Source/Diagnostics/FullDiagnostics.cpp
@@ -7,6 +7,7 @@
 #include "ComputeDiagFunctors/PartPerGridFunctor.H"
 #include "ComputeDiagFunctors/ParticleReductionFunctor.H"
 #include "ComputeDiagFunctors/RhoFunctor.H"
+#include "ComputeDiagFunctors/ProcessorMapFunctor.H"
 #include "Diagnostics/Diagnostics.H"
 #include "Diagnostics/ParticleDiag/ParticleDiag.H"
 #include "FlushFormats/FlushFormat.H"
@@ -659,6 +660,8 @@ FullDiagnostics::InitializeFieldFunctors (int lev)
             m_all_field_functors[lev][comp] = std::make_unique<DivBFunctor>(warpx.get_array_Bfield_aux(lev), lev, m_crse_ratio);
         } else if ( m_varnames[comp] == "divE" ){
             m_all_field_functors[lev][comp] = std::make_unique<DivEFunctor>(warpx.get_array_Efield_aux(lev), lev, m_crse_ratio);
+        } else if ( m_varnames[comp] == "proc_number" ){
+            m_all_field_functors[lev][comp] = std::make_unique<ProcessorMapFunctor>(warpx.get_pointer_Efield_aux(lev,0), lev, m_crse_ratio);
         }
         else {
 

From 350b959531a19749cb6d9c82f164b73599c08fed Mon Sep 17 00:00:00 2001
From: Hannah Klion <hannah.klion@gmail.com>
Date: Thu, 29 Feb 2024 14:52:34 -0800
Subject: [PATCH 04/25] use MakeSimilarDM and also attempt to average dowm the
 costs

---
 Source/Parallelization/WarpXRegrid.cpp | 38 +++++++++++++++++++-------
 Source/WarpX.H                         |  2 ++
 Source/WarpX.cpp                       |  1 +
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index dcea3cab58a..17dfd6b93d6 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -19,6 +19,8 @@
 #include "Utils/WarpXAlgorithmSelection.H"
 #include "Utils/WarpXProfilerWrapper.H"
 
+#include <ablastr/coarsen/sample.H>
+
 #include <AMReX.H>
 #include <AMReX_BLassert.H>
 #include <AMReX_Box.H>
@@ -69,6 +71,12 @@ WarpX::LoadBalance ()
     int loadBalancedAnyLevel = false;
 
     const int nLevels = finestLevel();
+    if (do_similar_dm_refpatch) {
+        for (int lev = nLevels; lev > 0; --lev) {
+            ablastr::coarsen::sample::Coarsen(*costs[lev-1], *costs[lev],0,0,1,0,WarpX::RefRatio(lev-1));
+        }
+    }
+
     for (int lev = 0; lev <= nLevels; ++lev)
     {
         int doLoadBalance = false;
@@ -83,16 +91,26 @@ WarpX::LoadBalance ()
         amrex::Real currentEfficiency = 0.0;
         amrex::Real proposedEfficiency = 0.0;
 
-        newdm = (load_balance_with_sfc)
-            ? DistributionMapping::makeSFC(*costs[lev],
-                                           currentEfficiency, proposedEfficiency,
-                                           false,
-                                           ParallelDescriptor::IOProcessorNumber())
-            : DistributionMapping::makeKnapSack(*costs[lev],
-                                                currentEfficiency, proposedEfficiency,
-                                                nmax,
-                                                false,
-                                                ParallelDescriptor::IOProcessorNumber());
+        if (lev == 0 || !do_similar_dm_refpatch) {
+            newdm = (load_balance_with_sfc)
+                ? DistributionMapping::makeSFC(*costs[lev],
+                                               currentEfficiency, proposedEfficiency,
+                                               false,
+                                               ParallelDescriptor::IOProcessorNumber())
+                : DistributionMapping::makeKnapSack(*costs[lev],
+                                                    currentEfficiency, proposedEfficiency,
+                                                    nmax,
+                                                    false,
+                                                    ParallelDescriptor::IOProcessorNumber());
+        } else {
+            amrex::BoxArray coarse_ba = boxArray(lev-1);
+            amrex::DistributionMapping coarse_dm = DistributionMap(lev-1);
+            amrex::BoxArray ba = boxArray(lev);
+            ba.coarsen(WarpX::RefRatio(lev-1));
+            newdm = amrex::MakeSimilarDM(ba, coarse_ba, coarse_dm, getngEB());
+        }
+        Print() << "new dm on lev " << lev << ": \n";
+        Print() << newdm << std::endl;
         // As specified in the above calls to makeSFC and makeKnapSack, the new
         // distribution mapping is NOT communicated to all ranks; the loadbalanced
         // dm is up-to-date only on root, and we can decide whether to broadcast
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 6526171ff67..d24ab99e6aa 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -1641,6 +1641,8 @@ private:
      * uniform plasma on a domain of size 128 by 128 by 128, from which the approximate
      * time per iteration per particle is computed. */
     amrex::Real costs_heuristic_particles_wt = amrex::Real(0);
+    /** Whether to use MakeSimilarDM when load balancing lev > 1 */
+    int do_similar_dm_refpatch = 0;
 
     // Determines timesteps for override sync
     utils::parser::IntervalsParser override_sync_intervals;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index c929718b043..272e40ef0c4 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1307,6 +1307,7 @@ WarpX::ReadParameters ()
         load_balance_intervals = utils::parser::IntervalsParser(
             load_balance_intervals_string_vec);
         pp_algo.query("load_balance_with_sfc", load_balance_with_sfc);
+        pp_algo.query("do_similar_dm_refpatch", do_similar_dm_refpatch);
         // Knapsack factor only used with non-SFC strategy
         if (!load_balance_with_sfc) {
             pp_algo.query("load_balance_knapsack_factor", load_balance_knapsack_factor);

From 2dd4822e9aff74a12576039711dd8e82f091f5f3 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Thu, 29 Feb 2024 17:55:00 -0800
Subject: [PATCH 05/25] Add Andrews SFC using vectorized levels

---
 Source/Parallelization/WarpXRegrid.cpp | 196 ++++++++++++++++++-------
 Source/WarpX.H                         |   2 +
 Source/WarpX.cpp                       |   1 +
 3 files changed, 147 insertions(+), 52 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index 17dfd6b93d6..68b9cb88d5f 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -71,82 +71,174 @@ WarpX::LoadBalance ()
     int loadBalancedAnyLevel = false;
 
     const int nLevels = finestLevel();
-    if (do_similar_dm_refpatch) {
-        for (int lev = nLevels; lev > 0; --lev) {
-            ablastr::coarsen::sample::Coarsen(*costs[lev-1], *costs[lev],0,0,1,0,WarpX::RefRatio(lev-1));
+    if (do_SFC_dm_vectorlevel) {
+        amrex::Vector<amrex::Real> rcost;
+        amrex::Vector<int> current_pmap;
+        for (int lev = 0; lev <= nLevels; ++lev)
+        {
+            amrex::Vector<amrex::Real> rcost_lev(costs[lev]->size());
+            amrex::ParallelDescriptor::GatherLayoutDataToVector<amrex::Real>(*costs[lev],rcost_lev,
+                amrex::ParallelDescriptor::IOProcessorNumber());
+            rcost.insert(rcost.end(), rcost_lev.begin(), rcost_lev.end());
+            auto& pmap_lev = costs[lev]->DistributionMap().ProcessorMap();
+            current_pmap.insert(current_pmap.end(), pmap_lev.begin(), pmap_lev.end());
         }
-    }
-
-    for (int lev = 0; lev <= nLevels; ++lev)
-    {
         int doLoadBalance = false;
-
-        // Compute the new distribution mapping
-        DistributionMapping newdm;
-        const amrex::Real nboxes = costs[lev]->size();
+        amrex::Real currentEfficiency = 0.;
+        amrex::Real proposedEfficiency = 0.;
+        const amrex::Real nboxes = rcost.size();
         const amrex::Real nprocs = ParallelContext::NProcsSub();
         const int nmax = static_cast<int>(std::ceil(nboxes/nprocs*load_balance_knapsack_factor));
-        // These store efficiency (meaning, the  average 'cost' over all ranks,
-        // normalized to max cost) for current and proposed distribution mappings
-        amrex::Real currentEfficiency = 0.0;
-        amrex::Real proposedEfficiency = 0.0;
-
-        if (lev == 0 || !do_similar_dm_refpatch) {
-            newdm = (load_balance_with_sfc)
-                ? DistributionMapping::makeSFC(*costs[lev],
-                                               currentEfficiency, proposedEfficiency,
-                                               false,
-                                               ParallelDescriptor::IOProcessorNumber())
-                : DistributionMapping::makeKnapSack(*costs[lev],
-                                                    currentEfficiency, proposedEfficiency,
-                                                    nmax,
-                                                    false,
-                                                    ParallelDescriptor::IOProcessorNumber());
-        } else {
-            amrex::BoxArray coarse_ba = boxArray(lev-1);
-            amrex::DistributionMapping coarse_dm = DistributionMap(lev-1);
-            amrex::BoxArray ba = boxArray(lev);
-            ba.coarsen(WarpX::RefRatio(lev-1));
-            newdm = amrex::MakeSimilarDM(ba, coarse_ba, coarse_dm, getngEB());
-        }
-        Print() << "new dm on lev " << lev << ": \n";
-        Print() << newdm << std::endl;
-        // As specified in the above calls to makeSFC and makeKnapSack, the new
-        // distribution mapping is NOT communicated to all ranks; the loadbalanced
-        // dm is up-to-date only on root, and we can decide whether to broadcast
-        if ((load_balance_efficiency_ratio_threshold > 0.0)
-            && (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber()))
+
+        amrex::BoxArray refined_ba = boxArray(0);
+        for (int lev = 1; lev <= nLevels; ++lev)
         {
-            doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
+            refined_ba.refine(refRatio(lev-1));
+            amrex::BoxList refined_bl = refined_ba.boxList();
+            refined_bl.join(boxArray(lev).boxList());
+            refined_ba = amrex::BoxArray(refined_bl);
         }
 
+        amrex::Vector<amrex::DistributionMapping> newdm(nLevels+1);
+        amrex::DistributionMapping r;
+        if (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber())
+        {
+            std::vector<amrex::Long> cost(rcost.size());
+
+            amrex::Real wmax = *std::max_element(rcost.begin(), rcost.end());
+            amrex::Real scale = (wmax == 0) ? 1.e9_rt : 1.e9_rt/wmax;
+
+            for (int i = 0; i < rcost.size(); ++i) {
+                cost[i] = amrex::Long(rcost[i]*scale) + 1L;
+            }
+
+            // `sort` needs to be false here since there's a parallel reduce function
+            // in the processor map function, but we are executing only on root
+            int nprocs = ParallelDescriptor::NProcs();
+            r.SFCProcessorMap(refined_ba, cost, nprocs, proposedEfficiency, false);
+
+            amrex::DistributionMapping::ComputeDistributionMappingEfficiency(r,
+                                                                             rcost,
+                                                                             &currentEfficiency);
+
+            if ((load_balance_efficiency_ratio_threshold > 0.0))
+            {
+                doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
+            }
+            amrex::Print() << " doloadbalance : " << doLoadBalance << "\n";
+            amrex::Print() << proposedEfficiency << "\n";
+            amrex::Print() << currentEfficiency << "\n";
+        }
         ParallelDescriptor::Bcast(&doLoadBalance, 1,
                                   ParallelDescriptor::IOProcessorNumber());
 
         if (doLoadBalance)
         {
-            Vector<int> pmap;
+            amrex::Vector<int> pmap(rcost.size());
             if (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber())
             {
-                pmap = newdm.ProcessorMap();
+                pmap = r.ProcessorMap();
             } else
             {
                 pmap.resize(static_cast<std::size_t>(nboxes));
             }
-            ParallelDescriptor::Bcast(pmap.data(), pmap.size(), ParallelDescriptor::IOProcessorNumber());
+            // Broadcast vector from which to construct new distribution mapping
+            amrex::ParallelDescriptor::Bcast(&pmap[0], pmap.size(), ParallelDescriptor::IOProcessorNumber());
 
-            if (ParallelDescriptor::MyProc() != ParallelDescriptor::IOProcessorNumber())
+            int lev_start = 0;
+            for (int lev = 0; lev <= nLevels; ++lev)
             {
-                newdm = DistributionMapping(pmap);
+                amrex::Vector<int> pmap_lev(pmap.begin() + lev_start,
+                                            pmap.begin() + lev_start + costs[lev]->size());
+                newdm[lev] = amrex::DistributionMapping(pmap_lev);
+                lev_start += costs[lev]->size();
             }
 
-            RemakeLevel(lev, t_new[lev], boxArray(lev), newdm);
-
-            // Record the load balance efficiency
-            setLoadBalanceEfficiency(lev, proposedEfficiency);
+            for (int lev = 0; lev <= nLevels; ++lev)
+            {
+                RemakeLevel(lev, t_new[lev], boxArray(lev), newdm[lev]);
+                setLoadBalanceEfficiency(lev, proposedEfficiency);
+            }
         }
-
         loadBalancedAnyLevel = loadBalancedAnyLevel || doLoadBalance;
+    } else {
+        //if (do_similar_dm_refpatch) {
+        //    for (int lev = nLevels; lev > 0; --lev) {
+        //        ablastr::coarsen::sample::Coarsen(*costs[lev-1], *costs[lev],0,0,1,0,WarpX::RefRatio(lev-1));
+        //    }
+        //}
+
+        for (int lev = 0; lev <= nLevels; ++lev)
+        {
+            int doLoadBalance = false;
+
+            // Compute the new distribution mapping
+            DistributionMapping newdm;
+            const amrex::Real nboxes = costs[lev]->size();
+            const amrex::Real nprocs = ParallelContext::NProcsSub();
+            const int nmax = static_cast<int>(std::ceil(nboxes/nprocs*load_balance_knapsack_factor));
+            // These store efficiency (meaning, the  average 'cost' over all ranks,
+            // normalized to max cost) for current and proposed distribution mappings
+            amrex::Real currentEfficiency = 0.0;
+            amrex::Real proposedEfficiency = 0.0;
+
+            if (lev == 0 || !do_similar_dm_refpatch) {
+                newdm = (load_balance_with_sfc)
+                    ? DistributionMapping::makeSFC(*costs[lev],
+                                                   currentEfficiency, proposedEfficiency,
+                                                   false,
+                                                   ParallelDescriptor::IOProcessorNumber())
+                    : DistributionMapping::makeKnapSack(*costs[lev],
+                                                        currentEfficiency, proposedEfficiency,
+                                                        nmax,
+                                                        false,
+                                                        ParallelDescriptor::IOProcessorNumber());
+            } else {
+                amrex::BoxArray coarse_ba = boxArray(lev-1);
+                amrex::DistributionMapping coarse_dm = DistributionMap(lev-1);
+                amrex::BoxArray ba = boxArray(lev);
+                ba.coarsen(WarpX::RefRatio(lev-1));
+                newdm = amrex::MakeSimilarDM(ba, coarse_ba, coarse_dm, getngEB());
+            }
+            Print() << "new dm on lev " << lev << ": \n";
+            Print() << newdm << std::endl;
+            // As specified in the above calls to makeSFC and makeKnapSack, the new
+            // distribution mapping is NOT communicated to all ranks; the loadbalanced
+            // dm is up-to-date only on root, and we can decide whether to broadcast
+            if ((load_balance_efficiency_ratio_threshold > 0.0)
+                && (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber()))
+            {
+                doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
+            }
+
+            ParallelDescriptor::Bcast(&doLoadBalance, 1,
+                                      ParallelDescriptor::IOProcessorNumber());
+
+            if (doLoadBalance)
+            {
+                Vector<int> pmap;
+                if (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber())
+                {
+                    pmap = newdm.ProcessorMap();
+                } else
+                {
+                    pmap.resize(static_cast<std::size_t>(nboxes));
+                }
+                ParallelDescriptor::Bcast(pmap.data(), pmap.size(), ParallelDescriptor::IOProcessorNumber());
+
+                if (ParallelDescriptor::MyProc() != ParallelDescriptor::IOProcessorNumber())
+                {
+                    newdm = DistributionMapping(pmap);
+                }
+
+                RemakeLevel(lev, t_new[lev], boxArray(lev), newdm);
+
+                // Record the load balance efficiency
+                setLoadBalanceEfficiency(lev, proposedEfficiency);
+            }
+
+            loadBalancedAnyLevel = loadBalancedAnyLevel || doLoadBalance;
+        }
     }
     if (loadBalancedAnyLevel)
     {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index d24ab99e6aa..3b256fdfb13 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -1643,6 +1643,8 @@ private:
     amrex::Real costs_heuristic_particles_wt = amrex::Real(0);
     /** Whether to use MakeSimilarDM when load balancing lev > 1 */
     int do_similar_dm_refpatch = 0;
+    /** Whether to use SFC on vectorized BoxArrays from all levels */
+    int do_SFC_dm_vectorlevel = 0;
 
     // Determines timesteps for override sync
     utils::parser::IntervalsParser override_sync_intervals;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 272e40ef0c4..d7b613da8bd 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1308,6 +1308,7 @@ WarpX::ReadParameters ()
             load_balance_intervals_string_vec);
         pp_algo.query("load_balance_with_sfc", load_balance_with_sfc);
         pp_algo.query("do_similar_dm_refpatch", do_similar_dm_refpatch);
+        pp_algo.query("do_SFC_dm_vectorlevel",do_SFC_dm_vectorlevel);
         // Knapsack factor only used with non-SFC strategy
         if (!load_balance_with_sfc) {
             pp_algo.query("load_balance_knapsack_factor", load_balance_knapsack_factor);

From e38355e9444ca073e2a41c359160bbca479c74f0 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Wed, 20 Mar 2024 12:01:33 -0700
Subject: [PATCH 06/25] hack to force LB at first LB time

---
 Source/Parallelization/WarpXRegrid.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index ee34d9a6bd5..dc362751882 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -124,6 +124,9 @@ WarpX::LoadBalance ()
             if ((load_balance_efficiency_ratio_threshold > 0.0))
             {
                 doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
+                if (getistep(0) == get_load_balance_intervals()) {
+                    doLoadBalance = true;
+                }
             }
             amrex::Print() << " doloadbalance : " << doLoadBalance << "\n";
             amrex::Print() << proposedEfficiency << "\n";

From 6e8995e9a07ace0f770d9c811a46fd4c7a3fdef2 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Wed, 20 Mar 2024 14:33:22 -0700
Subject: [PATCH 07/25] fix startstep to force LB

---
 Source/Parallelization/WarpXRegrid.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index dc362751882..e7658d54dd3 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -124,7 +124,7 @@ WarpX::LoadBalance ()
             if ((load_balance_efficiency_ratio_threshold > 0.0))
             {
                 doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
-                if (getistep(0) == get_load_balance_intervals()) {
+                if (getistep(0) == 100) {
                     doLoadBalance = true;
                 }
             }

From 769f409488337997f2182d53ffc8cfcef3708131 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Fri, 29 Mar 2024 16:18:19 -0700
Subject: [PATCH 08/25] assigngrid with locator to refine plasma in regions
 overlapping with ref patch

---
 .../Particles/PhysicalParticleContainer.cpp   | 28 +++++++++++++++----
 Source/WarpX.H                                |  1 +
 Source/WarpX.cpp                              |  2 ++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 160eac0d19c..9ec1009cfcc 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -978,6 +978,23 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
         rrfac = m_gdb->refRatio(0);
         fine_injection_box.coarsen(rrfac);
     }
+    static bool refineplasma = false;
+    amrex::ParticleLocator<amrex::DenseBins<amrex::Box> > refinepatch_locator;
+    if (WarpX::refineAddplasma)
+    {
+        refineplasma = true;
+        rrfac = m_gdb->refRatio(0);
+    }
+    auto fineba = ParticleBoxArray(1);
+    auto coarsened_fineba = fineba;
+    coarsened_fineba.coarsen(rrfac);
+    if (!refinepatch_locator.isValid(coarsened_fineba)) {
+        refinepatch_locator.build(coarsened_fineba, Geom(0));
+    }
+    refinepatch_locator.setGeometry(Geom(0));
+    auto assignpartgrid = refinepatch_locator.getGridAssignor();
+        // if assign_grid(ijk_vec) > 0, then we are in refinement patch. therefore refine plasma particles
+        // else, usual num_part
 
     InjectorPosition* inj_pos = plasma_injector.getInjectorPosition();
     InjectorDensity*  inj_rho = plasma_injector.getInjectorDensity();
@@ -1070,11 +1087,12 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
 
             lo.z = applyBallisticCorrection(lo, inj_mom, gamma_boost, beta_boost, t);
             hi.z = applyBallisticCorrection(hi, inj_mom, gamma_boost, beta_boost, t);
-
             if (inj_pos->overlapsWith(lo, hi))
             {
                 auto index = overlap_box.index(iv);
-                const amrex::Long r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv))?
+                amrex::IntVect glo_iv = iv + tile_box.smallEnd();
+                bool in_refpatch = ( assignpartgrid(glo_iv) < 0 ) ? false : true;
+                const amrex::Long r = ( (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) || (refineplasma && in_refpatch) ) ?
                     (AMREX_D_TERM(lrrfac[0],*lrrfac[1],*lrrfac[2])) : (1);
                 pcounts[index] = num_ppc*r;
                 // update pcount by checking if cell-corners or cell-center
@@ -1113,7 +1131,6 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
         // Max number of new particles. All of them are created,
         // and invalid ones are then discarded
         const amrex::Long max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data());
-
         // Update NextID to include particles created in this function
         amrex::Long pid;
 #ifdef AMREX_USE_OMP
@@ -1238,6 +1255,8 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
         {
             const IntVect iv = IntVect(AMREX_D_DECL(i, j, k));
             const auto index = overlap_box.index(iv);
+            amrex::IntVect glo_iv = iv + tile_box.smallEnd();
+            bool in_refpatch = ( assignpartgrid(glo_iv) < 0 ) ? false : true;
 #ifdef WARPX_DIM_RZ
             Real theta_offset = 0._rt;
             if (rz_random_theta) { theta_offset = amrex::Random(engine) * 2._rt * MathConst::pi; }
@@ -1258,13 +1277,12 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
             {
                 long ip = poffset[index] + i_part;
                 pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
-                const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ?
+                const XDim3 r = ( (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) || (refineplasma && in_refpatch))  ?
                   // In the refined injection region: use refinement ratio `lrrfac`
                   inj_pos->getPositionUnitBox(i_part, lrrfac, engine) :
                   // Otherwise: use 1 as the refinement ratio
                   inj_pos->getPositionUnitBox(i_part, amrex::IntVect::TheUnitVector(), engine);
                 auto pos = getCellCoords(overlap_corner, dx, r, iv);
-
 #if defined(WARPX_DIM_3D)
                 if (!tile_realbox.contains(XDim3{pos.x,pos.y,pos.z})) {
                     ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 6526171ff67..38625d9377c 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -335,6 +335,7 @@ public:
 
     static bool do_dynamic_scheduling;
     static bool refine_plasma;
+    static bool refineAddplasma;
 
     static utils::parser::IntervalsParser sort_intervals;
     static amrex::IntVect sort_bin_size;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index c929718b043..f6fceec9fd0 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -173,6 +173,7 @@ bool WarpX::use_filter_compensation = false;
 
 bool WarpX::serialize_initial_conditions = false;
 bool WarpX::refine_plasma     = false;
+bool WarpX::refineAddplasma   = false;
 
 int WarpX::num_mirrors = 0;
 
@@ -882,6 +883,7 @@ WarpX::ReadParameters ()
 
         pp_warpx.query("serialize_initial_conditions", serialize_initial_conditions);
         pp_warpx.query("refine_plasma", refine_plasma);
+        pp_warpx.query("refineAddplasma", refineAddplasma);
         pp_warpx.query("do_dive_cleaning", do_dive_cleaning);
         pp_warpx.query("do_divb_cleaning", do_divb_cleaning);
         utils::parser::queryWithParser(

From bad7b5011e9c9f51ba58fd89f2fe505a62380239 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Fri, 29 Mar 2024 16:32:15 -0700
Subject: [PATCH 09/25] enforce load balance at start step

---
 Source/Parallelization/WarpXRegrid.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index e7658d54dd3..c18dab47e01 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -124,7 +124,7 @@ WarpX::LoadBalance ()
             if ((load_balance_efficiency_ratio_threshold > 0.0))
             {
                 doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
-                if (getistep(0) == 100) {
+                if ((getistep(0)+1) == 100) {
                     doLoadBalance = true;
                 }
             }

From 1bb6369c09143b8995c446e1b9f501f5b9e7f438 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Tue, 9 Apr 2024 14:08:14 -0700
Subject: [PATCH 10/25] flexible ref ratio for add plasma

---
 Source/Particles/PhysicalParticleContainer.cpp | 16 ++++++++--------
 Source/WarpX.H                                 |  1 +
 Source/WarpX.cpp                               | 17 +++++++++++++++++
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index d0fd0acb1c9..268f0b36c93 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -983,15 +983,15 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
     if (WarpX::refineAddplasma)
     {
         refineplasma = true;
-        rrfac = m_gdb->refRatio(0);
-    }
-    auto fineba = ParticleBoxArray(1);
-    auto coarsened_fineba = fineba;
-    coarsened_fineba.coarsen(rrfac);
-    if (!refinepatch_locator.isValid(coarsened_fineba)) {
-        refinepatch_locator.build(coarsened_fineba, Geom(0));
+        rrfac = WarpX::AddplasmaRefRatio;
+        auto fineba = ParticleBoxArray(1);
+        auto coarsened_fineba = fineba;
+        coarsened_fineba.coarsen(m_gdb->refRatio(0));
+        if (!refinepatch_locator.isValid(coarsened_fineba)) {
+            refinepatch_locator.build(coarsened_fineba, Geom(0));
+        }
+        refinepatch_locator.setGeometry(Geom(0));
     }
-    refinepatch_locator.setGeometry(Geom(0));
     auto assignpartgrid = refinepatch_locator.getGridAssignor();
         // if assign_grid(ijk_vec) > 0, then we are in refinement patch. therefore refine plasma particles
         // else, usual num_part
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 5c889c95e28..886b5d829c6 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -340,6 +340,7 @@ public:
     static bool do_dynamic_scheduling;
     static bool refine_plasma;
     static bool refineAddplasma;
+    static amrex::IntVect AddplasmaRefRatio;
 
     static utils::parser::IntervalsParser sort_intervals;
     static amrex::IntVect sort_bin_size;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index dbfb1fa096c..4353cae8ef1 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -177,6 +177,7 @@ bool WarpX::use_filter_compensation = false;
 bool WarpX::serialize_initial_conditions = false;
 bool WarpX::refine_plasma     = false;
 bool WarpX::refineAddplasma   = false;
+amrex::IntVect WarpX::AddplasmaRefRatio(AMREX_D_DECL(1,1,1));
 
 int WarpX::num_mirrors = 0;
 
@@ -887,6 +888,22 @@ WarpX::ReadParameters ()
         pp_warpx.query("serialize_initial_conditions", serialize_initial_conditions);
         pp_warpx.query("refine_plasma", refine_plasma);
         pp_warpx.query("refineAddplasma", refineAddplasma);
+        amrex::Vector<int> addplasma_ref_ratio(AMREX_SPACEDIM,1);
+        const bool addplasma_ref_ratio_specified =
+            utils::parser::queryArrWithParser(
+                pp_warpx, "refineplasma_refratio",
+                addplasma_ref_ratio, 0, AMREX_SPACEDIM);
+        if (addplasma_ref_ratio_specified){
+            amrex::Print() << " refratio for addplasma is : ";
+            for (int i=0; i<AMREX_SPACEDIM; i++) {
+                AddplasmaRefRatio[i] = addplasma_ref_ratio[i];
+                amrex::Print() << " " << AddplasmaRefRatio[i];
+            }
+            amrex::Print() << "\n";
+        } else {
+            AddplasmaRefRatio = RefRatio[0];
+        }
+
         pp_warpx.query("do_dive_cleaning", do_dive_cleaning);
         pp_warpx.query("do_divb_cleaning", do_divb_cleaning);
         utils::parser::queryWithParser(

From 59e9012c74be2fb76a3058da21f8205ed45d911f Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Tue, 9 Apr 2024 14:10:36 -0700
Subject: [PATCH 11/25] refineplasma not a static var

---
 Source/Particles/PhysicalParticleContainer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 268f0b36c93..508455a7fa3 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -978,7 +978,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
         rrfac = m_gdb->refRatio(0);
         fine_injection_box.coarsen(rrfac);
     }
-    static bool refineplasma = false;
+    bool refineplasma = false;
     amrex::ParticleLocator<amrex::DenseBins<amrex::Box> > refinepatch_locator;
     if (WarpX::refineAddplasma)
     {

From 641fdca7bb4088641ac7255a44192d562ad4f932 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Mon, 22 Apr 2024 10:20:50 -0700
Subject: [PATCH 12/25] comment in buffer mask for x-direction

---
 Source/WarpX.cpp | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index e8ac8a679fe..9dca8446a09 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -3135,40 +3135,40 @@ WarpX::BuildBufferMasks ()
                                 wtmsk(i,j,k) = 0.;
                                 return;
                             }
-                            //for (int ii = i-1; ii >= i-ngbuffer; --ii) {
-                            //    if (gmsk(ii,j,k)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                            //        amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
-                            //        return;
-                            //    }
-                            //}
-                            //for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
-                            //    if (gmsk(ii,j,k)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                            //        amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
-                            //        return;
-                            //    }
-                            //}
-                            for (int jj = j-1; jj >= j-ngbuffer; --jj) {
-                                if (gmsk(i,jj,k)==0) {
-                                    amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
+                            for (int ii = i-1; ii >= i-ngbuffer; --ii) {
+                                if (gmsk(ii,j,k)==0) {
+                                    amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
                                                       / ((1.-tanh_midpoint)*(ngbuffer/3.));
                                     wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                                    amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
                                     return;
                                 }
                             }
-                            for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
-                                if (gmsk(i,jj,k)==0) {
-                                    amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
+                            for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
+                                if (gmsk(ii,j,k)==0) {
+                                    amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
                                                       / ((1.-tanh_midpoint)*(ngbuffer/3.));
                                     wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                                    amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
                                     return;
                                 }
                             }
+                            //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
+                            //    if (gmsk(i,jj,k)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                            //        return;
+                            //    }
+                            //}
+                            //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
+                            //    if (gmsk(i,jj,k)==0) {
+                            //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
+                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                            //        return;
+                            //    }
+                            //}
                             //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
                             //    if (gmsk(i,j,kk)==0) {
                             //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)

From 505546e0a022e328e9ed17921a088775db42c338 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Mon, 22 Apr 2024 11:09:23 -0700
Subject: [PATCH 13/25] fix RefRation call

---
 Source/WarpX.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 62186e723df..09d15e48c5d 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -904,7 +904,7 @@ WarpX::ReadParameters ()
             }
             amrex::Print() << "\n";
         } else {
-            AddplasmaRefRatio = RefRatio[0];
+            AddplasmaRefRatio = RefRatio(0);
         }
 
         pp_warpx.query("do_dive_cleaning", do_dive_cleaning);

From 8c137ca81fb90d932ea3dca2bc4ac3c48862b535 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Mon, 22 Apr 2024 12:13:03 -0700
Subject: [PATCH 14/25] parallel for outside for cuda

---
 Source/WarpX.H   |   4 ++
 Source/WarpX.cpp | 150 ++++++++++++++++++++++++++++-------------------
 2 files changed, 95 insertions(+), 59 deletions(-)

diff --git a/Source/WarpX.H b/Source/WarpX.H
index 2ff7ce3fe74..1c99b47562e 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -1127,6 +1127,10 @@ public:
     // for cuda
     void BuildBufferMasksInBox ( amrex::Box tbx, amrex::IArrayBox &buffer_mask,
                                  const amrex::IArrayBox &guard_mask, int ng );
+
+    void SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real> wtmsk,
+                                  const amrex::Array4<int const> gmsk, const amrex::Array4<int const> bmsk,
+                                  const int ngbuffer, const bool do_interpolate,amrex::Real tanh_midpoint);
 #ifdef AMREX_USE_EB
     amrex::EBFArrayBoxFactory const& fieldEBFactory (int lev) const noexcept {
         return static_cast<amrex::EBFArrayBoxFactory const&>(*m_field_factory[lev]);
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 09d15e48c5d..9f1bfcf36be 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -3149,71 +3149,103 @@ WarpX::BuildBufferMasks ()
                     auto const& gmsk = tmp[mfi].const_array();
                     auto const& bmsk = (*bmasks)[mfi].array();
                     auto const& wtmsk = (*weight_gbuffer)[mfi].array();
-                    amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
-                        wtmsk(i,j,k) = 0._rt;
-                        if (bmsk(i,j,k) == 0 && do_interpolate) {
-                            if(gmsk(i,j,k)==0) {
-                                wtmsk(i,j,k) = 0.;
-                                return;
-                            }
-                            for (int ii = i-1; ii >= i-ngbuffer; --ii) {
-                                if (gmsk(ii,j,k)==0) {
-                                    amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
-                                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                                    wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                                    amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
-                                    return;
-                                }
-                            }
-                            for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
-                                if (gmsk(ii,j,k)==0) {
-                                    amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
-                                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                                    wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                                    amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
-                                    return;
-                                }
-                            }
-                            //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
-                            //    if (gmsk(i,jj,k)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                            //        return;
-                            //    }
-                            //}
-                            //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
-                            //    if (gmsk(i,jj,k)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                            //        return;
-                            //    }
-                            //}
-                            //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
-                            //    if (gmsk(i,j,kk)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                            //        return;
-                            //    }
-                            //}
-                            //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
-                            //    if (gmsk(i,j,kk)==0) {
-                            //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
-                            //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                            //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                            //        return;
-                            //    }
-                            //}
-                        }
-                    });
+                    SetWeightsInGatherBuffer(tbx, wtmsk, gmsk, bmsk, ngbuffer, do_interpolate, tanh_midpoint);
+                    //amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                    //    wtmsk(i,j,k) = 0._rt;
+                    //    if (bmsk(i,j,k) == 0 && do_interpolate) {
+                    //        if(gmsk(i,j,k)==0) {
+                    //            wtmsk(i,j,k) = 0.;
+                    //            return;
+                    //        }
+                    //        for (int ii = i-1; ii >= i-ngbuffer; --ii) {
+                    //            if (gmsk(ii,j,k)==0) {
+                    //                amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
+                    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //                wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                    //                amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
+                    //                return;
+                    //            }
+                    //        }
+                    //        for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
+                    //            if (gmsk(ii,j,k)==0) {
+                    //                amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
+                    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //                wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                    //                amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
+                    //                return;
+                    //            }
+                    //        }
+                    //        //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
+                    //        //    if (gmsk(i,jj,k)==0) {
+                    //        //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
+                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                    //        //        return;
+                    //        //    }
+                    //        //}
+                    //        //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
+                    //        //    if (gmsk(i,jj,k)==0) {
+                    //        //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
+                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                    //        //        return;
+                    //        //    }
+                    //        //}
+                    //        //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
+                    //        //    if (gmsk(i,j,kk)==0) {
+                    //        //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
+                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                    //        //        return;
+                    //        //    }
+                    //        //}
+                    //        //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
+                    //        //    if (gmsk(i,j,kk)==0) {
+                    //        //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
+                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                    //        //        return;
+                    //        //    }
+                    //        //}
+                    //    }
+                    //});
                 }
             }
         }
     }
 }
 
+void
+WarpX::SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real> wtmsk,
+                                const amrex::Array4<int const> gmsk, const amrex::Array4<int const> bmsk, const int ngbuffer, const bool do_interpolate, amrex::Real tanh_midpoint)
+{
+    amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+        wtmsk(i,j,k) = 0._rt;
+        if (bmsk(i,j,k) == 0 && do_interpolate) {
+            if(gmsk(i,j,k)==0) {
+                wtmsk(i,j,k) = 0.;
+                return;
+            }
+            for (int ii = i-1; ii >= i-ngbuffer; --ii) {
+                if (gmsk(ii,j,k)==0) {
+                    amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
+                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                    return;
+                }
+            }
+            for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
+                if (gmsk(ii,j,k)==0) {
+                    amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
+                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                    wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                    return;
+                }
+            }
+        }        
+    });
+}
+
 /**
  * \brief Build buffer mask within given FArrayBox
  *

From 5788d747e43d9de9630b84f1ccbff996d685e44c Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Fri, 3 May 2024 13:37:34 -0400
Subject: [PATCH 15/25] start level for LB

---
 Source/Parallelization/WarpXRegrid.cpp | 4 ++--
 Source/WarpX.H                         | 2 +-
 Source/WarpX.cpp                       | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index a8de91faf40..404cf67aea4 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -170,8 +170,8 @@ WarpX::LoadBalance ()
         //        ablastr::coarsen::sample::Coarsen(*costs[lev-1], *costs[lev],0,0,1,0,WarpX::RefRatio(lev-1));
         //    }
         //}
-
-        for (int lev = 0; lev <= nLevels; ++lev)
+        int startlev = WarpX::load_balance_startlevel;
+        for (int lev = startlev; lev <= nLevels; ++lev)
         {
             int doLoadBalance = false;
 
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 1c99b47562e..f8ee36de410 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -708,7 +708,7 @@ public:
     {
         return load_balance_intervals;
     }
-
+    static int load_balance_startlevel;
     /**
      * \brief Private function for spectral solver
      * Applies a damping factor in the guards cells that extend
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 9f1bfcf36be..5d11ea5c209 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -127,6 +127,7 @@ bool WarpX::do_single_precision_comms = false;
 bool WarpX::do_subcycle_current = false;
 int WarpX::n_subcycle_current = 1;
 bool WarpX::do_abc_in_pml = false;
+int WarpX::load_balance_startlevel = 0;
 
 bool WarpX::do_shared_mem_charge_deposition = false;
 bool WarpX::do_shared_mem_current_deposition = false;
@@ -1353,6 +1354,7 @@ WarpX::ReadParameters ()
         pp_algo.query("load_balance_with_sfc", load_balance_with_sfc);
         pp_algo.query("do_similar_dm_refpatch", do_similar_dm_refpatch);
         pp_algo.query("do_SFC_dm_vectorlevel",do_SFC_dm_vectorlevel);
+	pp_algo.query("load_balance_startlevel",load_balance_startlevel);
         // Knapsack factor only used with non-SFC strategy
         if (!load_balance_with_sfc) {
             pp_algo.query("load_balance_knapsack_factor", load_balance_knapsack_factor);

From 19566fa74d8ab8727eea04fad6c4dff61adde558 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Thu, 9 May 2024 17:06:28 -0400
Subject: [PATCH 16/25] remote makeSimilarDM

---
 Source/Parallelization/WarpXRegrid.cpp | 36 +++++++++++++-------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index 404cf67aea4..e2ce251494a 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -185,24 +185,24 @@ WarpX::LoadBalance ()
             amrex::Real currentEfficiency = 0.0;
             amrex::Real proposedEfficiency = 0.0;
 
-            if (lev == 0 || !do_similar_dm_refpatch) {
-                newdm = (load_balance_with_sfc)
-                    ? DistributionMapping::makeSFC(*costs[lev],
-                                                   currentEfficiency, proposedEfficiency,
-                                                   false,
-                                                   ParallelDescriptor::IOProcessorNumber())
-                    : DistributionMapping::makeKnapSack(*costs[lev],
-                                                        currentEfficiency, proposedEfficiency,
-                                                        nmax,
-                                                        false,
-                                                        ParallelDescriptor::IOProcessorNumber());
-            } else {
-                amrex::BoxArray coarse_ba = boxArray(lev-1);
-                amrex::DistributionMapping coarse_dm = DistributionMap(lev-1);
-                amrex::BoxArray ba = boxArray(lev);
-                ba.coarsen(WarpX::RefRatio(lev-1));
-                newdm = amrex::MakeSimilarDM(ba, coarse_ba, coarse_dm, getngEB());
-            }
+            //if (lev == 0 || !do_similar_dm_refpatch) {
+            newdm = (load_balance_with_sfc)
+                ? DistributionMapping::makeSFC(*costs[lev],
+                                               currentEfficiency, proposedEfficiency,
+                                               false,
+                                               ParallelDescriptor::IOProcessorNumber())
+                : DistributionMapping::makeKnapSack(*costs[lev],
+                                                    currentEfficiency, proposedEfficiency,
+                                                    nmax,
+                                                    false,
+                                                    ParallelDescriptor::IOProcessorNumber());
+            //} else {
+            //    amrex::BoxArray coarse_ba = boxArray(lev-1);
+            //    amrex::DistributionMapping coarse_dm = DistributionMap(lev-1);
+            //    amrex::BoxArray ba = boxArray(lev);
+            //    ba.coarsen(WarpX::RefRatio(lev-1));
+            //    newdm = amrex::MakeSimilarDM(ba, coarse_ba, coarse_dm, getngEB());
+            //}
             // As specified in the above calls to makeSFC and makeKnapSack, the new
             // distribution mapping is NOT communicated to all ranks; the loadbalanced
             // dm is up-to-date only on root, and we can decide whether to broadcast

From 0f33db576fcdb245bf98b824b4355d1b54c46fb6 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Thu, 23 May 2024 20:37:57 -0400
Subject: [PATCH 17/25] remove implementation for smoothing

---
 Source/WarpX.cpp | 140 +++++++++++++++++++++++------------------------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 5d11ea5c209..d92d2d53606 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -3142,76 +3142,76 @@ WarpX::BuildBufferMasks ()
                     const Box tbx = mfi.growntilebox();
                     BuildBufferMasksInBox( tbx, (*bmasks)[mfi], tmp[mfi], ngbuffer );
                 }
-                if (ipass==0) continue;
-                amrex::MultiFab* weight_gbuffer = interp_weight_gbuffer[lev].get();
-                // Using tmp to also set weights in the interp_weight_gbuffer multifab
-                for (MFIter mfi(*weight_gbuffer, true); mfi.isValid(); ++mfi)
-                {
-                    const Box& tbx = mfi.tilebox(IntVect::TheNodeVector(),weight_gbuffer->nGrowVect());
-                    auto const& gmsk = tmp[mfi].const_array();
-                    auto const& bmsk = (*bmasks)[mfi].array();
-                    auto const& wtmsk = (*weight_gbuffer)[mfi].array();
-                    SetWeightsInGatherBuffer(tbx, wtmsk, gmsk, bmsk, ngbuffer, do_interpolate, tanh_midpoint);
-                    //amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
-                    //    wtmsk(i,j,k) = 0._rt;
-                    //    if (bmsk(i,j,k) == 0 && do_interpolate) {
-                    //        if(gmsk(i,j,k)==0) {
-                    //            wtmsk(i,j,k) = 0.;
-                    //            return;
-                    //        }
-                    //        for (int ii = i-1; ii >= i-ngbuffer; --ii) {
-                    //            if (gmsk(ii,j,k)==0) {
-                    //                amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
-                    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //                wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                    //                amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
-                    //                return;
-                    //            }
-                    //        }
-                    //        for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
-                    //            if (gmsk(ii,j,k)==0) {
-                    //                amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
-                    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //                wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                    //                amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
-                    //                return;
-                    //            }
-                    //        }
-                    //        //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
-                    //        //    if (gmsk(i,jj,k)==0) {
-                    //        //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
-                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                    //        //        return;
-                    //        //    }
-                    //        //}
-                    //        //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
-                    //        //    if (gmsk(i,jj,k)==0) {
-                    //        //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
-                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                    //        //        return;
-                    //        //    }
-                    //        //}
-                    //        //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
-                    //        //    if (gmsk(i,j,kk)==0) {
-                    //        //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
-                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                    //        //        return;
-                    //        //    }
-                    //        //}
-                    //        //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
-                    //        //    if (gmsk(i,j,kk)==0) {
-                    //        //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
-                    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                    //        //        return;
-                    //        //    }
-                    //        //}
-                    //    }
-                    //});
-                }
+                //if (ipass==0) continue;
+                //amrex::MultiFab* weight_gbuffer = interp_weight_gbuffer[lev].get();
+                //// Using tmp to also set weights in the interp_weight_gbuffer multifab
+                //for (MFIter mfi(*weight_gbuffer, true); mfi.isValid(); ++mfi)
+                //{
+                //    const Box& tbx = mfi.tilebox(IntVect::TheNodeVector(),weight_gbuffer->nGrowVect());
+                //    auto const& gmsk = tmp[mfi].const_array();
+                //    auto const& bmsk = (*bmasks)[mfi].array();
+                //    auto const& wtmsk = (*weight_gbuffer)[mfi].array();
+                //    SetWeightsInGatherBuffer(tbx, wtmsk, gmsk, bmsk, ngbuffer, do_interpolate, tanh_midpoint);
+                //    //amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                //    //    wtmsk(i,j,k) = 0._rt;
+                //    //    if (bmsk(i,j,k) == 0 && do_interpolate) {
+                //    //        if(gmsk(i,j,k)==0) {
+                //    //            wtmsk(i,j,k) = 0.;
+                //    //            return;
+                //    //        }
+                //    //        for (int ii = i-1; ii >= i-ngbuffer; --ii) {
+                //    //            if (gmsk(ii,j,k)==0) {
+                //    //                amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
+                //    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //                wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                //    //                amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
+                //    //                return;
+                //    //            }
+                //    //        }
+                //    //        for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
+                //    //            if (gmsk(ii,j,k)==0) {
+                //    //                amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
+                //    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //                wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                //    //                amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
+                //    //                return;
+                //    //            }
+                //    //        }
+                //    //        //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
+                //    //        //    if (gmsk(i,jj,k)==0) {
+                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
+                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
+                //    //        //        return;
+                //    //        //    }
+                //    //        //}
+                //    //        //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
+                //    //        //    if (gmsk(i,jj,k)==0) {
+                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
+                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                //    //        //        return;
+                //    //        //    }
+                //    //        //}
+                //    //        //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
+                //    //        //    if (gmsk(i,j,kk)==0) {
+                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
+                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                //    //        //        return;
+                //    //        //    }
+                //    //        //}
+                //    //        //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
+                //    //        //    if (gmsk(i,j,kk)==0) {
+                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
+                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
+                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
+                //    //        //        return;
+                //    //        //    }
+                //    //        //}
+                //    //    }
+                //    //});
+                //}
             }
         }
     }

From 1ba9dc12039e61cb7e298add58e24e2b9cb284d9 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Thu, 23 May 2024 20:38:28 -0400
Subject: [PATCH 18/25] remove implementation for smoothening

---
 .../Particles/PhysicalParticleContainer.cpp   | 20 +++----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 56d3e6ba718..095a79130dd 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -2091,7 +2091,6 @@ PhysicalParticleContainer::Evolve (int lev,
             FArrayBox const* bzfab = &Bz[pti];
 
             Elixir exeli, eyeli, ezeli, bxeli, byeli, bzeli;
-            Elixir buf_exeli, buf_eyeli, buf_ezeli, buf_bxeli, buf_byeli, buf_bzeli;
 
             if (WarpX::use_fdtd_nci_corr)
             {
@@ -2176,16 +2175,6 @@ PhysicalParticleContainer::Evolve (int lev,
                     FArrayBox const* cbyfab = &(*cBy)[pti];
                     FArrayBox const* cbzfab = &(*cBz)[pti];
 
-                    //// buffer box (Both 2D and 3D)
-                    InterpolateFieldsInGatherBuffer(box, bufferEx, bufferEy, bufferEz,
-                                                         bufferBx, bufferBy, bufferBz,
-                                                         buf_exeli, buf_eyeli, buf_ezeli,
-                                                         buf_bxeli, buf_byeli, buf_bzeli,
-                                                         exfab, eyfab, ezfab, bxfab, byfab, bzfab,
-                                                         cexfab, ceyfab, cezfab, cbxfab, cbyfab, cbzfab,
-                                                         (*gather_masks)[pti], (*weight_gbuffer)[pti],
-                                                         ref_ratio);
-
                     if (WarpX::use_fdtd_nci_corr)
                     {
                         // should this be bufferEFields*
@@ -2203,14 +2192,11 @@ PhysicalParticleContainer::Evolve (int lev,
                     // Field gather and push for particles in gather buffers
                     e_is_nodal = cEx->is_nodal() and cEy->is_nodal() and cEz->is_nodal();
                     if (push_type == PushType::Explicit) {
-                        //PushPX(pti, cexfab, ceyfab, cezfab,
-                        //       cbxfab, cbyfab, cbzfab,
-                        PushPX(pti, &bufferEx, &bufferEy, &bufferEz,
-                               &bufferBx, &bufferBy, &bufferBz,
+                        PushPX(pti, cexfab, ceyfab, cezfab,
+                               cbxfab, cbyfab, cbzfab,
                                cEx->nGrowVect(), e_is_nodal,
                                nfine_gather, np-nfine_gather,
-                               lev, gather_lev, dt, ScaleFields(false), a_dt_type);
-                               //lev, lev-1, dt, ScaleFields(false), a_dt_type);
+                               lev, lev-1, dt, ScaleFields(false), a_dt_type);
                     } else if (push_type == PushType::Implicit) {
                         ImplicitPushXP(pti, cexfab, ceyfab, cezfab,
                                        cbxfab, cbyfab, cbzfab,

From ee8d9bd9b806c034ba67ef56d7f26316a3e58581 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Tue, 9 Jul 2024 11:22:09 -0700
Subject: [PATCH 19/25] option for fgb cdb in each dir

---
 Source/WarpX.H   |  4 +++-
 Source/WarpX.cpp | 31 +++++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/Source/WarpX.H b/Source/WarpX.H
index f8ee36de410..892d5816000 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -361,6 +361,7 @@ public:
     //! #n_field_gather_buffer cells of the edge of the patch, will gather the fields
     //! from the lower refinement level instead of the refinement patch itself
     static int n_field_gather_buffer;
+    static amrex::IntVect n_field_gather_buffer_each_dir;
 
     /**
      * If true, particles located inside refinement patch but within
@@ -391,6 +392,7 @@ public:
     //! #n_current_deposition_buffer cells of the edge of the patch, will deposit their charge
     //! and current onto the lower refinement level instead of the refinement patch itself
     static int n_current_deposition_buffer;
+    static amrex::IntVect n_current_deposition_buffer_each_dir;
 
     //! Integer that corresponds to the type of grid used in the simulation
     //! (collocated, staggered, hybrid)
@@ -1126,7 +1128,7 @@ public:
 
     // for cuda
     void BuildBufferMasksInBox ( amrex::Box tbx, amrex::IArrayBox &buffer_mask,
-                                 const amrex::IArrayBox &guard_mask, int ng );
+                                 const amrex::IArrayBox &guard_mask, const amrex::IntVect ng );
 
     void SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real> wtmsk,
                                   const amrex::Array4<int const> gmsk, const amrex::Array4<int const> bmsk,
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index d92d2d53606..16965217622 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -211,6 +211,8 @@ std::map<std::string, amrex::iMultiFab *> WarpX::imultifab_map;
 
 IntVect WarpX::filter_npass_each_dir(1);
 
+amrex::IntVect WarpX::n_field_gather_buffer_each_dir(-1);
+amrex::IntVect WarpX::n_current_deposition_buffer_each_dir(-1);
 int WarpX::n_field_gather_buffer = -1;
 int WarpX::n_current_deposition_buffer = -1;
 bool WarpX::do_fieldinterp_gather_buffer = false;
@@ -912,8 +914,22 @@ WarpX::ReadParameters ()
         pp_warpx.query("do_divb_cleaning", do_divb_cleaning);
         utils::parser::queryWithParser(
             pp_warpx, "n_field_gather_buffer", n_field_gather_buffer);
+        amrex::Vector<int> nfieldgatherbuffer_eachdir(AMREX_SPACEDIM,n_field_gather_buffer);
+        utils::parser::queryArrWithParser( 
+            pp_warpx, "n_field_gather_buffer_each_dir", nfieldgatherbuffer_eachdir, 0, AMREX_SPACEDIM);
+        for (int i = 0; i < AMREX_SPACEDIM; ++i) {
+            n_field_gather_buffer_each_dir[i] = nfieldgatherbuffer_eachdir[i];
+        }
         utils::parser::queryWithParser(
             pp_warpx, "n_current_deposition_buffer", n_current_deposition_buffer);
+        amrex::Vector<int> ncurrentdepositionbuffer_eachdir(AMREX_SPACEDIM,n_current_deposition_buffer);
+        utils::parser::queryArrWithParser( 
+            pp_warpx, "n_current_deposition_buffer_each_dir",
+            ncurrentdepositionbuffer_eachdir, 0, AMREX_SPACEDIM);
+        for (int i = 0; i < AMREX_SPACEDIM; ++i) {
+            n_current_deposition_buffer_each_dir[i] = ncurrentdepositionbuffer_eachdir[i];
+        }
+
         utils::parser::queryWithParser(
             pp_warpx, "do_fieldinterp_gather_buffer", do_fieldinterp_gather_buffer);
         utils::parser::queryWithParser(
@@ -2711,13 +2727,14 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
     //
     // Copy of the coarse aux
     //
-    if (lev > 0 && (n_field_gather_buffer > 0 || n_current_deposition_buffer > 0 ||
+    if (lev > 0 && (n_field_gather_buffer_each_dir.max() > 0 ||
+                    n_current_deposition_buffer_each_dir.max() > 0 ||
                     mypc->nSpeciesGatherFromMainGrid() > 0))
     {
         BoxArray cba = ba;
         cba.coarsen(refRatio(lev-1));
 
-        if (n_field_gather_buffer > 0 || mypc->nSpeciesGatherFromMainGrid() > 0) {
+        if (n_field_gather_buffer_each_dir.max() > 0 || mypc->nSpeciesGatherFromMainGrid() > 0) {
             if (aux_is_nodal) {
                 BoxArray const& cnba = amrex::convert(cba,IntVect::TheNodeVector());
                 AllocInitMultiFab(Bfield_cax[lev][0], cnba,dm,ncomps,ngEB,lev, "Bfield_cax[x]");
@@ -2744,7 +2761,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
             AllocInitMultiFab(interp_weight_gbuffer[lev], amrex::convert(ba, IntVect::TheNodeVector()), dm, ncomps, ngEB, lev, "interp_weight_gbuffer", 0.0_rt);
         }
 
-        if (n_current_deposition_buffer > 0) {
+        if (n_current_deposition_buffer_each_dir.max() > 0) {
             amrex::Print() << " current dep buffer : " << n_current_deposition_buffer << "\n";
             AllocInitMultiFab(current_buf[lev][0], amrex::convert(cba,jx_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[x]");
             AllocInitMultiFab(current_buf[lev][1], amrex::convert(cba,jy_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[y]");
@@ -3121,7 +3138,9 @@ WarpX::BuildBufferMasks ()
     {
         for (int ipass = 0; ipass < 2; ++ipass)
         {
-            const int ngbuffer = (ipass == 0) ? n_current_deposition_buffer : n_field_gather_buffer;
+            //const int ngbuffer = (ipass == 0) ? n_current_deposition_buffer : n_field_gather_buffer;
+            const amrex::IntVect ngbuffer = (ipass == 0) ? n_current_deposition_buffer_each_dir
+                                                         : n_field_gather_buffer_each_dir;
             iMultiFab* bmasks = (ipass == 0) ? current_buffer_masks[lev].get() : gather_buffer_masks[lev].get();
             if (bmasks)
             {
@@ -3258,11 +3277,11 @@ WarpX::SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real>
  */
 void
 WarpX::BuildBufferMasksInBox ( const amrex::Box tbx, amrex::IArrayBox &buffer_mask,
-                               const amrex::IArrayBox &guard_mask, const int ng )
+                               const amrex::IArrayBox &guard_mask, const amrex::IntVect ng )
 {
     auto const& msk = buffer_mask.array();
     auto const& gmsk = guard_mask.const_array();
-    const amrex::Dim3 ng3 = amrex::IntVect(ng).dim3();
+    const amrex::Dim3 ng3 = ng.dim3();
     amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
     {
         for         (int kk = k-ng3.z; kk <= k+ng3.z; ++kk) {

From 74cbd903b4d9c8e6948c47eef41659de2c750a93 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Sat, 10 Aug 2024 15:01:10 -0700
Subject: [PATCH 20/25] remove interpolation of fields in fgb region

---
 Source/Particles/PhysicalParticleContainer.H  |  22 ----
 .../Particles/PhysicalParticleContainer.cpp   | 107 -----------------
 Source/WarpX.H                                |  28 -----
 Source/WarpX.cpp                              | 110 ------------------
 4 files changed, 267 deletions(-)

diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 73e68fa9a9b..286d0675da6 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -320,28 +320,6 @@ public:
         amrex::FArrayBox const * & ez_ptr, amrex::FArrayBox const * & bx_ptr,
         amrex::FArrayBox const * & by_ptr, amrex::FArrayBox const * & bz_ptr);
 
-    void InterpolateFieldsInGatherBuffer (const amrex::Box& box,
-        amrex::FArrayBox& bufferEx, amrex::FArrayBox& bufferEy, amrex::FArrayBox& bufferEz,
-        amrex::FArrayBox& bufferBx, amrex::FArrayBox& bufferBy, amrex::FArrayBox& bufferBz,
-        amrex::Elixir& buf_exeli, amrex::Elixir& buf_eyeli, amrex::Elixir& buf_ezeli,
-        amrex::Elixir& buf_bxeli, amrex::Elixir& buf_byeli, amrex::Elixir& buf_bzeli,
-        amrex::FArrayBox const *& ex_ptr, amrex::FArrayBox const *& ey_ptr, amrex::FArrayBox const* & ez_ptr,
-        amrex::FArrayBox const *& bx_ptr, amrex::FArrayBox const *& by_ptr, amrex::FArrayBox const* & bz_ptr,
-        amrex::FArrayBox const *& cex_ptr, amrex::FArrayBox const *& cey_ptr, amrex::FArrayBox const* & cez_ptr,
-        amrex::FArrayBox const *& cbx_ptr, amrex::FArrayBox const *& cby_ptr, amrex::FArrayBox const* & cbz_ptr,
-        const amrex::IArrayBox& gather_mask, const amrex::FArrayBox& weight_gbuffer,
-        const amrex::IntVect ref_ratio);
-
-    void WeightedSumInGatherBuffer (const amrex::Box& tmp_xbox, const amrex::Box& tmp_ybox, const amrex::Box& tmp_zbox,
-                      amrex::Array4<amrex::Real> const & xbuf_field_arr, amrex::Array4<amrex::Real> const& ybuf_field_arr,
-                      amrex::Array4<amrex::Real> const & zbuf_field_arr,
-                      amrex::Array4<const amrex::Real> cfx_arr, amrex::Array4<const amrex::Real> cfy_arr,
-                      amrex::Array4<const amrex::Real> cfz_arr,
-                      amrex::Array4<const amrex::Real> fx_arr, amrex::Array4<const amrex::Real> fy_arr,
-                      amrex::Array4<const amrex::Real> fz_arr,
-                      amrex::Array4<const int > gm_arr, amrex::Array4<const amrex::Real> wt_arr,
-                      const amrex::IntVect ref_ratio);
-
     /**
     * \brief This function determines if resampling should be done for the current species, and
     * if so, performs the resampling.
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 095a79130dd..a67870eb6d8 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -2349,113 +2349,6 @@ PhysicalParticleContainer::applyNCIFilter (
 #endif
 }
 
-void
-PhysicalParticleContainer::InterpolateFieldsInGatherBuffer (const Box& box,
-    amrex::FArrayBox& bufferEx, amrex::FArrayBox& bufferEy, amrex::FArrayBox& bufferEz,
-    amrex::FArrayBox& bufferBx, amrex::FArrayBox& bufferBy, amrex::FArrayBox& bufferBz,
-    Elixir& buf_exeli, Elixir& buf_eyeli, Elixir& buf_ezeli,
-    Elixir& buf_bxeli, Elixir& buf_byeli, Elixir& buf_bzeli,
-    FArrayBox const *& ex_ptr, FArrayBox const *& ey_ptr, FArrayBox const* & ez_ptr,
-    FArrayBox const *& bx_ptr, FArrayBox const *& by_ptr, FArrayBox const* & bz_ptr,
-    FArrayBox const *& cex_ptr, FArrayBox const *& cey_ptr, FArrayBox const* & cez_ptr,
-    FArrayBox const *& cbx_ptr, FArrayBox const *& cby_ptr, FArrayBox const* & cbz_ptr,
-    const IArrayBox& gather_mask, const FArrayBox& weight_gbuffer,
-    const amrex::IntVect ref_ratio)
-{
-    amrex::Box tmp_exbox = amrex::convert(box,ex_ptr->box().ixType());
-    amrex::Box tmp_eybox = amrex::convert(box,ey_ptr->box().ixType());
-    amrex::Box tmp_ezbox = amrex::convert(box,ez_ptr->box().ixType());
-
-    bufferEx.resize(tmp_exbox);
-    buf_exeli = bufferEx.elixir();
-    bufferEy.resize(tmp_eybox);
-    buf_eyeli = bufferEy.elixir();
-    bufferEz.resize(tmp_ezbox);
-    buf_ezeli = bufferEz.elixir();
-    WeightedSumInGatherBuffer(tmp_exbox, tmp_eybox, tmp_ezbox,
-                    bufferEx.array(), bufferEy.array(), bufferEz.array(),
-                    cex_ptr->array(), cey_ptr->array(), cez_ptr->array(),
-                    ex_ptr->array(), ey_ptr->array(), ez_ptr->array(),
-                    gather_mask.array(), weight_gbuffer.array(), ref_ratio);
-
-    amrex::Box tmp_bxbox = amrex::convert(box,bx_ptr->box().ixType());
-    amrex::Box tmp_bybox = amrex::convert(box,by_ptr->box().ixType());
-    amrex::Box tmp_bzbox = amrex::convert(box,bz_ptr->box().ixType());
-    bufferBx.resize(tmp_bxbox);
-    buf_bxeli = bufferBx.elixir();
-    bufferBy.resize(tmp_bybox);
-    buf_byeli = bufferBy.elixir();
-    bufferBz.resize(tmp_bzbox);
-    buf_bzeli = bufferBz.elixir();
-
-    WeightedSumInGatherBuffer(tmp_bxbox, tmp_bybox, tmp_bzbox,
-                    bufferBx.array(), bufferBy.array(), bufferBz.array(),
-                    cbx_ptr->array(), cby_ptr->array(), cbz_ptr->array(),
-                    bx_ptr->array(), by_ptr->array(), bz_ptr->array(),
-                    gather_mask.array(), weight_gbuffer.array(), ref_ratio);
-}
-
-void
-PhysicalParticleContainer::WeightedSumInGatherBuffer (
-                      const amrex::Box& tmp_xbox, const amrex::Box& tmp_ybox, const amrex::Box& tmp_zbox,
-                      amrex::Array4<amrex::Real> const & xbuf_field_arr, amrex::Array4<amrex::Real> const& ybuf_field_arr,
-                      amrex::Array4<amrex::Real> const & zbuf_field_arr,
-                      amrex::Array4<const amrex::Real> cfx_arr, amrex::Array4<const amrex::Real> cfy_arr,
-                      amrex::Array4<const amrex::Real> cfz_arr,
-                      amrex::Array4<const amrex::Real> fx_arr, amrex::Array4<const amrex::Real> fy_arr,
-                      amrex::Array4<const amrex::Real> fz_arr,
-                      amrex::Array4<const int > gm_arr, amrex::Array4<const amrex::Real> wt_arr,
-                      const amrex::IntVect ref_ratio)
-{
-    amrex::ParallelFor( tmp_xbox, tmp_ybox, tmp_zbox,
-        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
-            int ii = amrex::coarsen(i,ref_ratio[0]);
-            int jj = j;
-            int kk = k;
-#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-            jj = amrex::coarsen(j,ref_ratio[1]);
-#elif defined(WARPX_DIM_3D)
-            kk = amrex::coarsen(k,ref_ratio[2]);
-#endif
-            if (gm_arr(i,j,k) == 0) {
-                xbuf_field_arr(i,j,k) = wt_arr(i,j,k)*fx_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfx_arr(ii,jj,kk);
-            } else {
-                xbuf_field_arr(i,j,k) = fx_arr(i,j,k);
-            }
-        },
-        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
-            int ii = amrex::coarsen(i,ref_ratio[0]);
-            int jj = j;
-            int kk = k;
-#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-            jj = amrex::coarsen(j,ref_ratio[1]);
-#elif defined(WARPX_DIM_3D)
-            kk = amrex::coarsen(k,ref_ratio[2]);
-#endif
-            if (gm_arr(i,j,k) == 0) {
-                ybuf_field_arr(i,j,k) = wt_arr(i,j,k)*fy_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfy_arr(ii,jj,kk);
-            } else {
-                ybuf_field_arr(i,j,k) = fy_arr(i,j,k);
-            }
-        },
-        [=] AMREX_GPU_DEVICE (int i, int j, int k) {
-            int ii = amrex::coarsen(i,ref_ratio[0]);
-            int jj = j;
-            int kk = k;
-#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-            jj = amrex::coarsen(j,ref_ratio[1]);
-#elif defined(WARPX_DIM_3D)
-            kk = amrex::coarsen(k,ref_ratio[2]);
-#endif
-            if (gm_arr(i,j,k) == 0) {
-               zbuf_field_arr(i,j,k) = wt_arr(i,j,k)*fz_arr(i,j,k) + (1._rt-wt_arr(i,j,k))*cfz_arr(ii,jj,kk);
-            } else {
-               zbuf_field_arr(i,j,k) = fz_arr(i,j,k);
-            }
-        }
-    );
-}
-
 // Loop over all particles in the particle container and
 // split particles tagged with p.id()=DoSplitParticleID
 void
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 892d5816000..3772b102f1f 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -363,31 +363,6 @@ public:
     static int n_field_gather_buffer;
     static amrex::IntVect n_field_gather_buffer_each_dir;
 
-    /**
-     * If true, particles located inside refinement patch but within
-     *  #n_field_gather_buffer cells of the patch edge will gather fields
-     * that are a weighted sum of the fields from lower refinement
-     * level and the refinement patch. By default, no interpolation will be done,
-     * and the fields will be gathered only from the lower refinement level.
-     * The option can be used to allow for a smooth transition from the fine
-     * and coarse solutions with a tanh profile, such that, the value
-     * of the gather fields is close to the solution on the refinement patch
-     * near the gather buffer edge, and close to the solution on the lower
-     * refinement level close to the edge of the patch.
-     * #tanh_midpoint_gather_buffer can be used to set the tanh profile
-     */
-    static bool do_fieldinterp_gather_buffer;
-
-    /**
-     * With mesh-refinement, finite number of gather buffer cells, and
-     * do_fieldinterp_gather_buffer, the particles on the fine patch
-     * gather a weighted sum of fields from fine patch and coarse patch.
-     * A tanh profile is used to set the smoothing kernel
-     * #tanh_midpoint_gather_buffer must be a fraction of the buffer width
-     * measured from coarsefine interface, where the tanh profile is 0.5
-     * The default value is 0.5
-     */
-    static amrex::Real tanh_midpoint_gather_buffer;
     //! With mesh refinement, particles located inside a refinement patch, but within
     //! #n_current_deposition_buffer cells of the edge of the patch, will deposit their charge
     //! and current onto the lower refinement level instead of the refinement patch itself
@@ -1130,9 +1105,6 @@ public:
     void BuildBufferMasksInBox ( amrex::Box tbx, amrex::IArrayBox &buffer_mask,
                                  const amrex::IArrayBox &guard_mask, const amrex::IntVect ng );
 
-    void SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real> wtmsk,
-                                  const amrex::Array4<int const> gmsk, const amrex::Array4<int const> bmsk,
-                                  const int ngbuffer, const bool do_interpolate,amrex::Real tanh_midpoint);
 #ifdef AMREX_USE_EB
     amrex::EBFArrayBoxFactory const& fieldEBFactory (int lev) const noexcept {
         return static_cast<amrex::EBFArrayBoxFactory const&>(*m_field_factory[lev]);
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 16965217622..789432c2a09 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -215,8 +215,6 @@ amrex::IntVect WarpX::n_field_gather_buffer_each_dir(-1);
 amrex::IntVect WarpX::n_current_deposition_buffer_each_dir(-1);
 int WarpX::n_field_gather_buffer = -1;
 int WarpX::n_current_deposition_buffer = -1;
-bool WarpX::do_fieldinterp_gather_buffer = false;
-amrex::Real WarpX::tanh_midpoint_gather_buffer = 0.5;
 
 short WarpX::grid_type;
 amrex::IntVect m_rho_nodal_flag;
@@ -930,11 +928,6 @@ WarpX::ReadParameters ()
             n_current_deposition_buffer_each_dir[i] = ncurrentdepositionbuffer_eachdir[i];
         }
 
-        utils::parser::queryWithParser(
-            pp_warpx, "do_fieldinterp_gather_buffer", do_fieldinterp_gather_buffer);
-        utils::parser::queryWithParser(
-            pp_warpx, "tanh_midpoint_gather_buffer", tanh_midpoint_gather_buffer);
-
         amrex::Real quantum_xi_tmp;
         const auto quantum_xi_is_specified =
             utils::parser::queryWithParser(pp_warpx, "quantum_xi", quantum_xi_tmp);
@@ -3132,8 +3125,6 @@ WarpX::getLoadBalanceEfficiency (const int lev)
 void
 WarpX::BuildBufferMasks ()
 {
-    bool do_interpolate = WarpX::do_fieldinterp_gather_buffer;
-    amrex::Real tanh_midpoint =  WarpX::tanh_midpoint_gather_buffer;
     for (int lev = 1; lev <= maxLevel(); ++lev)
     {
         for (int ipass = 0; ipass < 2; ++ipass)
@@ -3161,112 +3152,11 @@ WarpX::BuildBufferMasks ()
                     const Box tbx = mfi.growntilebox();
                     BuildBufferMasksInBox( tbx, (*bmasks)[mfi], tmp[mfi], ngbuffer );
                 }
-                //if (ipass==0) continue;
-                //amrex::MultiFab* weight_gbuffer = interp_weight_gbuffer[lev].get();
-                //// Using tmp to also set weights in the interp_weight_gbuffer multifab
-                //for (MFIter mfi(*weight_gbuffer, true); mfi.isValid(); ++mfi)
-                //{
-                //    const Box& tbx = mfi.tilebox(IntVect::TheNodeVector(),weight_gbuffer->nGrowVect());
-                //    auto const& gmsk = tmp[mfi].const_array();
-                //    auto const& bmsk = (*bmasks)[mfi].array();
-                //    auto const& wtmsk = (*weight_gbuffer)[mfi].array();
-                //    SetWeightsInGatherBuffer(tbx, wtmsk, gmsk, bmsk, ngbuffer, do_interpolate, tanh_midpoint);
-                //    //amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
-                //    //    wtmsk(i,j,k) = 0._rt;
-                //    //    if (bmsk(i,j,k) == 0 && do_interpolate) {
-                //    //        if(gmsk(i,j,k)==0) {
-                //    //            wtmsk(i,j,k) = 0.;
-                //    //            return;
-                //    //        }
-                //    //        for (int ii = i-1; ii >= i-ngbuffer; --ii) {
-                //    //            if (gmsk(ii,j,k)==0) {
-                //    //                amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
-                //    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //                wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                //    //                amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n";
-                //    //                return;
-                //    //            }
-                //    //        }
-                //    //        for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
-                //    //            if (gmsk(ii,j,k)==0) {
-                //    //                amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
-                //    //                                  / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //                wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                //    //                amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n";
-                //    //                return;
-                //    //            }
-                //    //        }
-                //    //        //for (int jj = j-1; jj >= j-ngbuffer; --jj) {
-                //    //        //    if (gmsk(i,jj,k)==0) {
-                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(j-jj)-ngbuffer*tanh_midpoint)
-                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                //    //        //        return;
-                //    //        //    }
-                //    //        //}
-                //    //        //for (int jj = j+1; jj <= j+ngbuffer; ++jj) {
-                //    //        //    if (gmsk(i,jj,k)==0) {
-                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(jj - j)-ngbuffer*tanh_midpoint)
-                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                //    //        //        return;
-                //    //        //    }
-                //    //        //}
-                //    //        //for (int kk = k-1; kk >= k-ngbuffer; --kk) {
-                //    //        //    if (gmsk(i,j,kk)==0) {
-                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(k-kk)-ngbuffer*tanh_midpoint)
-                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                //    //        //        return;
-                //    //        //    }
-                //    //        //}
-                //    //        //for (int kk = k+1; kk <= k+ngbuffer; ++kk) {
-                //    //        //    if (gmsk(i,j,kk)==0) {
-                //    //        //        amrex::Real arg = (static_cast<amrex::Real>(kk-k)-ngbuffer*tanh_midpoint)
-                //    //        //                          / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                //    //        //        wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                //    //        //        return;
-                //    //        //    }
-                //    //        //}
-                //    //    }
-                //    //});
-                //}
             }
         }
     }
 }
 
-void
-WarpX::SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4<amrex::Real> wtmsk,
-                                const amrex::Array4<int const> gmsk, const amrex::Array4<int const> bmsk, const int ngbuffer, const bool do_interpolate, amrex::Real tanh_midpoint)
-{
-    amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
-        wtmsk(i,j,k) = 0._rt;
-        if (bmsk(i,j,k) == 0 && do_interpolate) {
-            if(gmsk(i,j,k)==0) {
-                wtmsk(i,j,k) = 0.;
-                return;
-            }
-            for (int ii = i-1; ii >= i-ngbuffer; --ii) {
-                if (gmsk(ii,j,k)==0) {
-                    amrex::Real arg = (static_cast<amrex::Real>(i-ii)-ngbuffer*tanh_midpoint)
-                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5;
-                    return;
-                }
-            }
-            for (int ii = i+1; ii <= i+ngbuffer; ++ii) {
-                if (gmsk(ii,j,k)==0) {
-                    amrex::Real arg = (static_cast<amrex::Real>(ii-i)-ngbuffer*tanh_midpoint)
-                                      / ((1.-tanh_midpoint)*(ngbuffer/3.));
-                    wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5;
-                    return;
-                }
-            }
-        }        
-    });
-}
-
 /**
  * \brief Build buffer mask within given FArrayBox
  *

From 97271d0b660559219193e763193e999b2589167c Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Sun, 11 Aug 2024 17:15:54 -0700
Subject: [PATCH 21/25] fix tab

---
 Source/WarpX.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 1582d8c238e..dcce60b115f 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1404,7 +1404,7 @@ WarpX::ReadParameters ()
         pp_algo.query("load_balance_with_sfc", load_balance_with_sfc);
         pp_algo.query("do_similar_dm_refpatch", do_similar_dm_refpatch);
         pp_algo.query("do_SFC_dm_vectorlevel",do_SFC_dm_vectorlevel);
-	pp_algo.query("load_balance_startlevel",load_balance_startlevel);
+    pp_algo.query("load_balance_startlevel",load_balance_startlevel);
         // Knapsack factor only used with non-SFC strategy
         if (!load_balance_with_sfc) {
             pp_algo.query("load_balance_knapsack_factor", load_balance_knapsack_factor);

From 0418eb1d776be4cff24d4c80ac31868bce815314 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Sun, 11 Aug 2024 17:20:50 -0700
Subject: [PATCH 22/25] EOL

---
 Source/WarpX.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index dcce60b115f..9d812537e76 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -927,7 +927,7 @@ WarpX::ReadParameters ()
         utils::parser::queryWithParser(
             pp_warpx, "n_field_gather_buffer", n_field_gather_buffer);
         amrex::Vector<int> nfieldgatherbuffer_eachdir(AMREX_SPACEDIM,n_field_gather_buffer);
-        utils::parser::queryArrWithParser( 
+        utils::parser::queryArrWithParser(
             pp_warpx, "n_field_gather_buffer_each_dir", nfieldgatherbuffer_eachdir, 0, AMREX_SPACEDIM);
         for (int i = 0; i < AMREX_SPACEDIM; ++i) {
             n_field_gather_buffer_each_dir[i] = nfieldgatherbuffer_eachdir[i];
@@ -935,7 +935,7 @@ WarpX::ReadParameters ()
         utils::parser::queryWithParser(
             pp_warpx, "n_current_deposition_buffer", n_current_deposition_buffer);
         amrex::Vector<int> ncurrentdepositionbuffer_eachdir(AMREX_SPACEDIM,n_current_deposition_buffer);
-        utils::parser::queryArrWithParser( 
+        utils::parser::queryArrWithParser(
             pp_warpx, "n_current_deposition_buffer_each_dir",
             ncurrentdepositionbuffer_eachdir, 0, AMREX_SPACEDIM);
         for (int i = 0; i < AMREX_SPACEDIM; ++i) {

From eea5eb899a5e002666aee847c9c1e9af154bc352 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Mon, 12 Aug 2024 09:59:35 -0700
Subject: [PATCH 23/25]  backward compatibility for buffergathereachdir

---
 Source/Parallelization/WarpXRegrid.cpp |  2 +-
 Source/Particles/Sorting/Partition.cpp |  8 ++++----
 Source/WarpX.cpp                       | 11 ++++++++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index 594875d55b0..de4337fee84 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -465,7 +465,7 @@ WarpX::RemakeLevel (int lev, Real /*time*/, const BoxArray& ba, const Distributi
 #endif
         }
 
-        if (lev > 0 && (n_field_gather_buffer > 0 || n_current_deposition_buffer > 0)) {
+        if (lev > 0 && (n_field_gather_buffer_each_dir.max() > 0 || n_current_deposition_buffer_each_dir.max() > 0)) {
             for (int idim=0; idim < 3; ++idim)
             {
                 RemakeMultiFab(Bfield_cax[lev][idim], false);
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index 9f92739a5d5..a6d5fc9cea0 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -67,7 +67,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
 
     // - Select the larger buffer
     iMultiFab const* bmasks =
-        (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
+        (WarpX::n_field_gather_buffer_each_dir.max() >= WarpX::n_current_deposition_buffer_each_dir.max()) ?
         gather_masks : current_masks;
     // - For each particle, find whether it is in the larger buffer,
     //   by looking up the mask. Store the answer in `inexflag`.
@@ -86,7 +86,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
     // Second, among particles that are in the larger buffer, partition
     // particles into the smaller buffer
 
-    if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
+    if (WarpX::n_current_deposition_buffer_each_dir.max() == WarpX::n_field_gather_buffer_each_dir.max()) {
         // No need to do anything if the buffers have the same size
         nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep);
     } else if (sep == pid.end()) {
@@ -97,11 +97,11 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
         if (bmasks == gather_masks) {
             nfine_gather = n_fine;
             bmasks = current_masks;
-            n_buf = WarpX::n_current_deposition_buffer;
+            n_buf = WarpX::n_current_deposition_buffer_each_dir.max();
         } else {
             nfine_current = n_fine;
             bmasks = gather_masks;
-            n_buf = WarpX::n_field_gather_buffer;
+            n_buf = WarpX::n_field_gather_buffer_each_dir.max();
         }
         if (n_buf > 0)
         {
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 9d812537e76..a7c37dd5ba2 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -2246,7 +2246,16 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
         // Field gather buffer should be larger than current deposition buffers
         n_field_gather_buffer = n_current_deposition_buffer + 1;
     }
-
+    for (int i = 0; i < AMREX_SPACEDIM; ++i) {
+        if (n_current_deposition_buffer_each_dir[i] < 0) {
+            n_current_deposition_buffer_each_dir[i] = guard_cells.ng_alloc_J.max();
+        }
+    }
+    for (int i = 0; i < AMREX_SPACEDIM; ++i) {
+        if (n_field_gather_buffer_each_dir[i] < 0) {
+            n_field_gather_buffer_each_dir[i] = n_current_deposition_buffer + 1;
+        }
+    }
     AllocLevelMFs(lev, ba, dm, guard_cells.ng_alloc_EB, guard_cells.ng_alloc_J,
                   guard_cells.ng_alloc_Rho, guard_cells.ng_alloc_F, guard_cells.ng_alloc_G, aux_is_nodal);
 

From 0c42c38d5a2eb55123b0ae762231f212d7792446 Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Tue, 13 Aug 2024 11:55:41 -0700
Subject: [PATCH 24/25] remove buffer each dir and initializeexternal field on
 Bfield_fp

---
 Source/Initialization/WarpXInitData.cpp | 120 +++++++++++++++---------
 Source/Parallelization/WarpXRegrid.cpp  |   2 +-
 Source/Particles/Sorting/Partition.cpp  |   8 +-
 Source/WarpX.H                          |  20 ++--
 Source/WarpX.cpp                        |  24 +++--
 5 files changed, 100 insertions(+), 74 deletions(-)

diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 6be5a9c66b9..715c25d2e8b 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -866,12 +866,12 @@ WarpX::InitLevelData (int lev, Real /*time*/)
     // Externally imposed fields are only initialized until the user-defined maxlevel_extEMfield_init.
     // The default maxlevel_extEMfield_init value is the total number of levels in the simulation
     if ((m_p_ext_field_params->B_ext_grid_type == ExternalFieldType::parse_ext_grid_function)
-         && (lev > 0) && (lev <= maxlevel_extEMfield_init)) {
+         && (lev <= maxlevel_extEMfield_init)) {
 
         InitializeExternalFieldsOnGridUsingParser(
-            Bfield_aux[lev][0].get(),
-            Bfield_aux[lev][1].get(),
-            Bfield_aux[lev][2].get(),
+            Bfield_fp[lev][0].get(),
+            Bfield_fp[lev][1].get(),
+            Bfield_fp[lev][2].get(),
             m_p_ext_field_params->Bxfield_parser->compile<3>(),
             m_p_ext_field_params->Byfield_parser->compile<3>(),
             m_p_ext_field_params->Bzfield_parser->compile<3>(),
@@ -880,17 +880,31 @@ WarpX::InitLevelData (int lev, Real /*time*/)
             'B',
             lev, PatchType::fine);
 
-        InitializeExternalFieldsOnGridUsingParser(
-            Bfield_cp[lev][0].get(),
-            Bfield_cp[lev][1].get(),
-            Bfield_cp[lev][2].get(),
-            m_p_ext_field_params->Bxfield_parser->compile<3>(),
-            m_p_ext_field_params->Byfield_parser->compile<3>(),
-            m_p_ext_field_params->Bzfield_parser->compile<3>(),
-            m_edge_lengths[lev],
-            m_face_areas[lev],
-            'B',
-            lev, PatchType::coarse);
+        if (lev > 0) {
+            InitializeExternalFieldsOnGridUsingParser(
+                Bfield_aux[lev][0].get(),
+                Bfield_aux[lev][1].get(),
+                Bfield_aux[lev][2].get(),
+                m_p_ext_field_params->Bxfield_parser->compile<3>(),
+                m_p_ext_field_params->Byfield_parser->compile<3>(),
+                m_p_ext_field_params->Bzfield_parser->compile<3>(),
+                m_edge_lengths[lev],
+                m_face_areas[lev],
+                'B',
+                lev, PatchType::fine);
+
+            InitializeExternalFieldsOnGridUsingParser(
+                Bfield_cp[lev][0].get(),
+                Bfield_cp[lev][1].get(),
+                Bfield_cp[lev][2].get(),
+                m_p_ext_field_params->Bxfield_parser->compile<3>(),
+                m_p_ext_field_params->Byfield_parser->compile<3>(),
+                m_p_ext_field_params->Bzfield_parser->compile<3>(),
+                m_edge_lengths[lev],
+                m_face_areas[lev],
+                'B',
+                lev, PatchType::coarse);
+        }
     }
     ParmParse pp_warpx("warpx");
     int IncludeBfieldPerturbation = 0;
@@ -939,6 +953,18 @@ WarpX::InitLevelData (int lev, Real /*time*/)
     if ((m_p_ext_field_params->E_ext_grid_type == ExternalFieldType::parse_ext_grid_function)
         && (lev <= maxlevel_extEMfield_init)) {
 
+        InitializeExternalFieldsOnGridUsingParser(
+            Efield_fp[lev][0].get(),
+            Efield_fp[lev][1].get(),
+            Efield_fp[lev][2].get(),
+            m_p_ext_field_params->Exfield_parser->compile<3>(),
+            m_p_ext_field_params->Eyfield_parser->compile<3>(),
+            m_p_ext_field_params->Ezfield_parser->compile<3>(),
+            m_edge_lengths[lev],
+            m_face_areas[lev],
+            'E',
+            lev, PatchType::fine);
+
 #ifdef AMREX_USE_EB
         // We initialize ECTRhofield consistently with the Efield
         if (WarpX::electromagnetic_solver_id == ElectromagneticSolverAlgo::ECT) {
@@ -1392,22 +1418,23 @@ WarpX::LoadExternalFields (int const lev)
             "External fields from file are not compatible with the moving window." );
     }
 
-    // External grid fields
-    if (m_p_ext_field_params->B_ext_grid_type == ExternalFieldType::parse_ext_grid_function) {
-        // Initialize Bfield_fp_external with external function
-        InitializeExternalFieldsOnGridUsingParser(
-            Bfield_fp_external[lev][0].get(),
-            Bfield_fp_external[lev][1].get(),
-            Bfield_fp_external[lev][2].get(),
-            m_p_ext_field_params->Bxfield_parser->compile<3>(),
-            m_p_ext_field_params->Byfield_parser->compile<3>(),
-            m_p_ext_field_params->Bzfield_parser->compile<3>(),
-            m_edge_lengths[lev],
-            m_face_areas[lev],
-            'B',
-            lev, PatchType::fine);
-    }
-    else if (m_p_ext_field_params->B_ext_grid_type == ExternalFieldType::read_from_file) {
+    //// External grid fields
+    //if (m_p_ext_field_params->B_ext_grid_type == ExternalFieldType::parse_ext_grid_function) {
+    //    // Initialize Bfield_fp_external with external function
+    //    InitializeExternalFieldsOnGridUsingParser(
+    //        Bfield_fp_external[lev][0].get(),
+    //        Bfield_fp_external[lev][1].get(),
+    //        Bfield_fp_external[lev][2].get(),
+    //        m_p_ext_field_params->Bxfield_parser->compile<3>(),
+    //        m_p_ext_field_params->Byfield_parser->compile<3>(),
+    //        m_p_ext_field_params->Bzfield_parser->compile<3>(),
+    //        m_edge_lengths[lev],
+    //        m_face_areas[lev],
+    //        'B',
+    //        lev, PatchType::fine);
+    //}
+    //else
+    if (m_p_ext_field_params->B_ext_grid_type == ExternalFieldType::read_from_file) {
 #if defined(WARPX_DIM_RZ)
         WARPX_ALWAYS_ASSERT_WITH_MESSAGE(n_rz_azimuthal_modes == 1,
                                          "External field reading is not implemented for more than one RZ mode (see #3829)");
@@ -1421,21 +1448,22 @@ WarpX::LoadExternalFields (int const lev)
 #endif
     }
 
-    if (m_p_ext_field_params->E_ext_grid_type == ExternalFieldType::parse_ext_grid_function) {
-        // Initialize Efield_fp_external with external function
-        InitializeExternalFieldsOnGridUsingParser(
-            Efield_fp_external[lev][0].get(),
-            Efield_fp_external[lev][1].get(),
-            Efield_fp_external[lev][2].get(),
-            m_p_ext_field_params->Exfield_parser->compile<3>(),
-            m_p_ext_field_params->Eyfield_parser->compile<3>(),
-            m_p_ext_field_params->Ezfield_parser->compile<3>(),
-            m_edge_lengths[lev],
-            m_face_areas[lev],
-            'E',
-            lev, PatchType::fine);
-    }
-    else if (m_p_ext_field_params->E_ext_grid_type == ExternalFieldType::read_from_file) {
+    //if (m_p_ext_field_params->E_ext_grid_type == ExternalFieldType::parse_ext_grid_function) {
+    //    // Initialize Efield_fp_external with external function
+    //    InitializeExternalFieldsOnGridUsingParser(
+    //        Efield_fp_external[lev][0].get(),
+    //        Efield_fp_external[lev][1].get(),
+    //        Efield_fp_external[lev][2].get(),
+    //        m_p_ext_field_params->Exfield_parser->compile<3>(),
+    //        m_p_ext_field_params->Eyfield_parser->compile<3>(),
+    //        m_p_ext_field_params->Ezfield_parser->compile<3>(),
+    //        m_edge_lengths[lev],
+    //        m_face_areas[lev],
+    //        'E',
+    //        lev, PatchType::fine);
+    //}
+    //else
+    if (m_p_ext_field_params->E_ext_grid_type == ExternalFieldType::read_from_file) {
 #if defined(WARPX_DIM_RZ)
         WARPX_ALWAYS_ASSERT_WITH_MESSAGE(n_rz_azimuthal_modes == 1,
                                          "External field reading is not implemented for more than one RZ mode (see #3829)");
diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index de4337fee84..594875d55b0 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -465,7 +465,7 @@ WarpX::RemakeLevel (int lev, Real /*time*/, const BoxArray& ba, const Distributi
 #endif
         }
 
-        if (lev > 0 && (n_field_gather_buffer_each_dir.max() > 0 || n_current_deposition_buffer_each_dir.max() > 0)) {
+        if (lev > 0 && (n_field_gather_buffer > 0 || n_current_deposition_buffer > 0)) {
             for (int idim=0; idim < 3; ++idim)
             {
                 RemakeMultiFab(Bfield_cax[lev][idim], false);
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index a6d5fc9cea0..c94c5e185e9 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -67,7 +67,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
 
     // - Select the larger buffer
     iMultiFab const* bmasks =
-        (WarpX::n_field_gather_buffer_each_dir.max() >= WarpX::n_current_deposition_buffer_each_dir.max()) ?
+        (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
         gather_masks : current_masks;
     // - For each particle, find whether it is in the larger buffer,
     //   by looking up the mask. Store the answer in `inexflag`.
@@ -86,7 +86,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
     // Second, among particles that are in the larger buffer, partition
     // particles into the smaller buffer
 
-    if (WarpX::n_current_deposition_buffer_each_dir.max() == WarpX::n_field_gather_buffer_each_dir.max()) {
+    if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer ) {
         // No need to do anything if the buffers have the same size
         nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep);
     } else if (sep == pid.end()) {
@@ -97,11 +97,11 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
         if (bmasks == gather_masks) {
             nfine_gather = n_fine;
             bmasks = current_masks;
-            n_buf = WarpX::n_current_deposition_buffer_each_dir.max();
+            n_buf = WarpX::n_current_deposition_buffer;
         } else {
             nfine_current = n_fine;
             bmasks = gather_masks;
-            n_buf = WarpX::n_field_gather_buffer_each_dir.max();
+            n_buf = WarpX::n_field_gather_buffer;
         }
         if (n_buf > 0)
         {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index b2aa183f6fc..39dc429698b 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -914,10 +914,10 @@ public:
     static amrex::XDim3 InvCellSize (int lev);
     static amrex::RealBox getRealBox(const amrex::Box& bx, int lev);
 
-    [[nodiscard]] const amrex::MultiFab* getInterpWeightGBuffer (int lev) const
-    {
-        return interp_weight_gbuffer[lev].get();
-    }
+//    [[nodiscard]] const amrex::MultiFab* getInterpWeightGBuffer (int lev) const
+//    {
+//        return interp_weight_gbuffer[lev].get();
+//    }
     /**
      * \brief Return the lower corner of the box in real units.
      * \param bx The box
@@ -1126,7 +1126,7 @@ public:
 
     // for cuda
     void BuildBufferMasksInBox ( amrex::Box tbx, amrex::IArrayBox &buffer_mask,
-                                 const amrex::IArrayBox &guard_mask, const amrex::IntVect ng );
+                                 const amrex::IArrayBox &guard_mask, const int ng );
 
 #ifdef AMREX_USE_EB
     amrex::EBFArrayBoxFactory const& fieldEBFactory (int lev) const noexcept {
@@ -1602,11 +1602,11 @@ private:
     amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3 > > Bfield_cax;
     amrex::Vector<std::unique_ptr<amrex::iMultiFab> > current_buffer_masks;
     amrex::Vector<std::unique_ptr<amrex::iMultiFab> > gather_buffer_masks;
-    /** Multifab that stores weights for interpolating fine and coarse solutions
-     *  in the buffer gather region, such that, the weight for fine solution is
-     *  0 near the coarse-fine interface, and 1 as the buffer gather region terminates in the fine patch
-     */
-    amrex::Vector<std::unique_ptr<amrex::MultiFab> > interp_weight_gbuffer;
+//    /** Multifab that stores weights for interpolating fine and coarse solutions
+//     *  in the buffer gather region, such that, the weight for fine solution is
+//     *  0 near the coarse-fine interface, and 1 as the buffer gather region terminates in the fine patch
+//     */
+//    amrex::Vector<std::unique_ptr<amrex::MultiFab> > interp_weight_gbuffer;
 
     // If charge/current deposition buffers are used
     amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_buf;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index a7c37dd5ba2..84f892d354e 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -402,7 +402,7 @@ WarpX::WarpX ()
     gather_buffer_masks.resize(nlevs_max);
     current_buf.resize(nlevs_max);
     charge_buf.resize(nlevs_max);
-    interp_weight_gbuffer.resize(nlevs_max);
+//    interp_weight_gbuffer.resize(nlevs_max);
 
     pml.resize(nlevs_max);
 #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_FFT)
@@ -2160,7 +2160,7 @@ WarpX::ClearLevel (int lev)
 
     current_buffer_masks[lev].reset();
     gather_buffer_masks[lev].reset();
-    interp_weight_gbuffer[lev].reset();
+//    interp_weight_gbuffer[lev].reset();
 
     F_fp  [lev].reset();
     G_fp  [lev].reset();
@@ -2253,7 +2253,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
     }
     for (int i = 0; i < AMREX_SPACEDIM; ++i) {
         if (n_field_gather_buffer_each_dir[i] < 0) {
-            n_field_gather_buffer_each_dir[i] = n_current_deposition_buffer + 1;
+            n_field_gather_buffer_each_dir[i] = n_current_deposition_buffer_each_dir[i] + 1;
         }
     }
     AllocLevelMFs(lev, ba, dm, guard_cells.ng_alloc_EB, guard_cells.ng_alloc_J,
@@ -2799,14 +2799,14 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
     //
     // Copy of the coarse aux
     //
-    if (lev > 0 && (n_field_gather_buffer_each_dir.max() > 0 ||
-                    n_current_deposition_buffer_each_dir.max() > 0 ||
+    if (lev > 0 && (n_field_gather_buffer > 0 ||
+                    n_current_deposition_buffer > 0 ||
                     mypc->nSpeciesGatherFromMainGrid() > 0))
     {
         BoxArray cba = ba;
         cba.coarsen(refRatio(lev-1));
 
-        if (n_field_gather_buffer_each_dir.max() > 0 || mypc->nSpeciesGatherFromMainGrid() > 0) {
+        if (n_field_gather_buffer > 0 || mypc->nSpeciesGatherFromMainGrid() > 0) {
             if (aux_is_nodal) {
                 BoxArray const& cnba = amrex::convert(cba,IntVect::TheNodeVector());
                 AllocInitMultiFab(Bfield_cax[lev][0], cnba,dm,ncomps,ngEB,lev, "Bfield_cax[x]", 0.0_rt);
@@ -2830,10 +2830,10 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
             // Gather buffer masks have 1 ghost cell, because of the fact
             // that particles may move by more than one cell when using subcycling.
             AllocInitMultiFab(gather_buffer_masks[lev], ba, dm, ncomps, amrex::IntVect(1), lev, "gather_buffer_masks");
-            AllocInitMultiFab(interp_weight_gbuffer[lev], amrex::convert(ba, IntVect::TheNodeVector()), dm, ncomps, ngEB, lev, "interp_weight_gbuffer", 0.0_rt);
+//            AllocInitMultiFab(interp_weight_gbuffer[lev], amrex::convert(ba, IntVect::TheNodeVector()), dm, ncomps, ngEB, lev, "interp_weight_gbuffer", 0.0_rt);
         }
 
-        if (n_current_deposition_buffer_each_dir.max() > 0) {
+        if (n_current_deposition_buffer > 0) {
             amrex::Print() << " current dep buffer : " << n_current_deposition_buffer << "\n";
             AllocInitMultiFab(current_buf[lev][0], amrex::convert(cba,jx_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[x]");
             AllocInitMultiFab(current_buf[lev][1], amrex::convert(cba,jy_nodal_flag),dm,ncomps,ngJ,lev, "current_buf[y]");
@@ -3215,9 +3215,7 @@ WarpX::BuildBufferMasks ()
     {
         for (int ipass = 0; ipass < 2; ++ipass)
         {
-            //const int ngbuffer = (ipass == 0) ? n_current_deposition_buffer : n_field_gather_buffer;
-            const amrex::IntVect ngbuffer = (ipass == 0) ? n_current_deposition_buffer_each_dir
-                                                         : n_field_gather_buffer_each_dir;
+            const int ngbuffer = (ipass == 0) ? n_current_deposition_buffer : n_field_gather_buffer;
             iMultiFab* bmasks = (ipass == 0) ? current_buffer_masks[lev].get() : gather_buffer_masks[lev].get();
             if (bmasks)
             {
@@ -3253,11 +3251,11 @@ WarpX::BuildBufferMasks ()
  */
 void
 WarpX::BuildBufferMasksInBox ( const amrex::Box tbx, amrex::IArrayBox &buffer_mask,
-                               const amrex::IArrayBox &guard_mask, const amrex::IntVect ng )
+                               const amrex::IArrayBox &guard_mask, const int ng )
 {
     auto const& msk = buffer_mask.array();
     auto const& gmsk = guard_mask.const_array();
-    const amrex::Dim3 ng3 = ng.dim3();
+    const amrex::Dim3 ng3 = amrex::IntVect(ng).dim3();
     amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
     {
         for         (int kk = k-ng3.z; kk <= k+ng3.z; ++kk) {

From 95cfda2d367d11568b42455f1d9f7022af22ec7d Mon Sep 17 00:00:00 2001
From: RevathiJambunathan <revanathan@gmail.com>
Date: Wed, 14 Aug 2024 07:08:24 -0700
Subject: [PATCH 25/25] fix Refineplasma for level0 and particle locator
 default for level0 as parent grid

---
 Source/Particles/PhysicalParticleContainer.cpp | 15 ++++++++++++---
 Source/WarpX.cpp                               |  2 +-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 1043b574859..b7410920edb 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -991,6 +991,8 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
     }
     bool refineplasma = false;
     amrex::ParticleLocator<amrex::DenseBins<amrex::Box> > refinepatch_locator;
+    amrex::ParticleLocator<amrex::DenseBins<amrex::Box> > parent_locator;
+    parent_locator.build(ParticleBoxArray(0),Geom(0));
     if (WarpX::refineAddplasma)
     {
         refineplasma = true;
@@ -1003,7 +1005,8 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
         }
         refinepatch_locator.setGeometry(Geom(0));
     }
-    auto assignpartgrid = refinepatch_locator.getGridAssignor();
+    auto assignpartgrid = (WarpX::refineAddplasma) ? refinepatch_locator.getGridAssignor()
+                                                   : parent_locator.getGridAssignor();
         // if assign_grid(ijk_vec) > 0, then we are in refinement patch. therefore refine plasma particles
         // else, usual num_part
 
@@ -1115,7 +1118,10 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
             {
                 auto index = overlap_box.index(iv);
                 amrex::IntVect glo_iv = iv + tile_box.smallEnd();
-                bool in_refpatch = ( assignpartgrid(glo_iv) < 0 ) ? false : true;
+                bool in_refpatch = false;
+                if ( !(assignpartgrid(glo_iv)<0) && refineplasma) {
+                    in_refpatch = true;
+                }
                 const amrex::Long r = ( (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) || (refineplasma && in_refpatch) ) ?
                     (AMREX_D_TERM(lrrfac[0],*lrrfac[1],*lrrfac[2])) : (1);
                 pcounts[index] = num_ppc*r;
@@ -1274,7 +1280,10 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
             const IntVect iv = IntVect(AMREX_D_DECL(i, j, k));
             const auto index = overlap_box.index(iv);
             amrex::IntVect glo_iv = iv + tile_box.smallEnd();
-            bool in_refpatch = ( assignpartgrid(glo_iv) < 0 ) ? false : true;
+            bool in_refpatch = false;
+            if ( !(assignpartgrid(glo_iv)<0) && refineplasma) {
+                in_refpatch = true;
+            }
 #ifdef WARPX_DIM_RZ
             Real theta_offset = 0._rt;
             if (rz_random_theta) { theta_offset = amrex::Random(engine) * 2._rt * MathConst::pi; }
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 84f892d354e..cf1aa908202 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -919,7 +919,7 @@ WarpX::ReadParameters ()
             }
             amrex::Print() << "\n";
         } else {
-            AddplasmaRefRatio = RefRatio(0);
+            if (maxLevel() > 0) AddplasmaRefRatio = RefRatio(0);
         }
 
         pp_warpx.query("do_dive_cleaning", do_dive_cleaning);