paranumal · tcew · Jan 27, 2021 · Jan 27, 2021
diff --git a/include/mesh.hpp b/include/mesh.hpp
@@ -220,6 +220,7 @@ class mesh_t {
   occa::memory o_sM;  // Surface mass
 
   // volume, surface, and second order geometric factors
+  string gfloatString;
   occa::memory o_vgeo, o_sgeo, o_ggeo;
 
   //face node mappings

diff --git a/libs/mesh/meshOccaSetupHex3D.cpp b/libs/mesh/meshOccaSetupHex3D.cpp
@@ -41,6 +41,7 @@ void meshHex3D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*Np*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nfp*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Np*Nggeo*sizeof(dfloat), ggeo);
+  gfloatString = dfloatString; // define type of ggeo
 
   /* NC: disabling until we re-add treatment of affine elements
 

diff --git a/libs/mesh/meshOccaSetupQuad2D.cpp b/libs/mesh/meshOccaSetupQuad2D.cpp
@@ -41,4 +41,5 @@ void meshQuad2D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*Np*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nfp*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Np*Nggeo*sizeof(dfloat), ggeo);
+  gfloatString = dfloatString; // define type of ggeo
 }
diff --git a/libs/mesh/meshOccaSetupQuad3D.cpp b/libs/mesh/meshOccaSetupQuad3D.cpp
@@ -41,4 +41,5 @@ void meshQuad3D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*Np*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nfp*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Np*Nggeo*sizeof(dfloat), ggeo);
+  gfloatString = dfloatString; // define type of ggeo
 }
diff --git a/libs/mesh/meshOccaSetupTet3D.cpp b/libs/mesh/meshOccaSetupTet3D.cpp
@@ -72,6 +72,7 @@ void meshTet3D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Nggeo*sizeof(dfloat), ggeo);
+  gfloatString = dfloatString; // define type of ggeo
 
   free(DT);
   free(LIFTT);

diff --git a/libs/mesh/meshOccaSetupTri2D.cpp b/libs/mesh/meshOccaSetupTri2D.cpp
@@ -64,7 +64,8 @@ void meshTri2D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Nggeo*sizeof(dfloat), ggeo);
-
+  gfloatString = dfloatString; // define type of ggeo
+
   free(DT);
   free(LIFTT);
   free(sMT);

diff --git a/libs/mesh/meshOccaSetupTri3D.cpp b/libs/mesh/meshOccaSetupTri3D.cpp
@@ -64,7 +64,8 @@ void meshTri3D::OccaSetup(){
   o_vgeo = platform.malloc((Nelements+totalHaloPairs)*Nvgeo*sizeof(dfloat), vgeo);
   o_sgeo = platform.malloc(Nelements*Nfaces*Nsgeo*sizeof(dfloat), sgeo);
   o_ggeo = platform.malloc(Nelements*Nggeo*sizeof(dfloat), ggeo);
-
+  gfloatString = dfloatString; // define type of ggeo
+
   free(DT);
   free(LIFTT);
   free(sMT);

diff --git a/solvers/elliptic/okl/ellipticAxHex3D.okl b/solvers/elliptic/okl/ellipticAxHex3D.okl
@@ -26,7 +26,7 @@ SOFTWARE.
 
 
 @kernel void ellipticAxHex3D(const dlong Nelements,
-                             @restrict const  dfloat *  ggeo,
+                             @restrict const  gfloat *  ggeo,
                              @restrict const  dfloat *  DT,
                              @restrict const  dfloat *  S,
                              @restrict const  dfloat *  MM,
@@ -163,7 +163,7 @@ SOFTWARE.
 @kernel void ellipticPartialAxHex3D_v0(const dlong Nelements,
                                     @restrict const  dlong  *  elementList,
                                     @restrict const  dlong  *  GlobalToLocal,
-                                    @restrict const  dfloat *  ggeo,
+                                    @restrict const  gfloat *  ggeo,
                                     @restrict const  dfloat *  DT,
                                     @restrict const  dfloat *  S,
                                     @restrict const  dfloat *  MM,
@@ -308,7 +308,7 @@ SOFTWARE.
 
 @kernel void ellipticPartialAxHex3D_v1(const dlong Nelements,
                                    @restrict const  dlong  *  elementList,
-                                   @restrict const  dfloat *  ggeo,
+                                   @restrict const  gfloat *  ggeo,
                                    @restrict const  dfloat *  DT,
                                    @restrict const  dfloat *  S,
                                    @restrict const  dfloat *  MM,
@@ -1066,7 +1066,7 @@ SOFTWARE.
 // SPAM KERNELS
 @kernel void ellipticPartialAxHex3D_v2(const dlong Nelements,
                                        @restrict const  dlong  *  elementList,
-                                       @restrict const  dfloat *  ggeo,
+                                       @restrict const  gfloat *  ggeo,
                                        @restrict const  dfloat *  DT,
                                        @restrict const  dfloat *  S,
                                        @restrict const  dfloat *  MM,
@@ -1144,7 +1144,7 @@ SOFTWARE.
 
 @kernel void ellipticPartialAxHex3D_v3(const dlong Nelements,
                                        @restrict const  dlong  *  elementList,
-                                       @restrict const  dfloat *  ggeo,
+                                       @restrict const  gfloat *  ggeo,
                                        @restrict const  dfloat *  DT,
                                        @restrict const  dfloat *  S,
                                        @restrict const  dfloat *  MM,
@@ -1364,7 +1364,7 @@ SOFTWARE.
 #if 0
 @kernel void ellipticPartialAxHex3D_v4(const dlong Nelements,
                                        @restrict const  dlong  *  elementList,
-                                       @restrict const  dfloat *  ggeo,
+                                       @restrict const  gfloat *  ggeo,
                                        @restrict const  dfloat *  DT,
                                        @restrict const  dfloat *  S,
                                        @restrict const  dfloat *  MM,
@@ -1589,7 +1589,7 @@ SOFTWARE.
 
 @kernel void ellipticPartialAxHex3D_v5(const dlong Nelements,
                                        @restrict const  dlong  *  elementList,
-                                       @restrict const  dfloat *  ggeo,
+                                       @restrict const  gfloat *  ggeo,
                                        @restrict const  dfloat *  DT,
                                        @restrict const  dfloat *  S,
                                        @restrict const  dfloat *  MM,
@@ -1735,7 +1735,7 @@ SOFTWARE.
 
 @kernel void ellipticPartialAxHex3D_v6(const dlong Nelements,
                                        @restrict const  dlong  *  elementList,
-                                       @restrict const  dfloat *  ggeo,
+                                       @restrict const  gfloat *  ggeo,
                                        @restrict const  dfloat *  DT,
                                        @restrict const  dfloat *  S,
                                        @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/okl/ellipticAxQuad2D.okl b/solvers/elliptic/okl/ellipticAxQuad2D.okl
@@ -33,7 +33,7 @@ SOFTWARE.
 
 // square thread version
 @kernel void ellipticAxQuad2D(const dlong   Nelements,
-                               @restrict const  dfloat *  ggeo,
+                               @restrict const  gfloat *  ggeo,
                                @restrict const  dfloat *  DT,
                                @restrict const  dfloat *  S,
                                @restrict const  dfloat *  MM,
@@ -133,7 +133,7 @@ SOFTWARE.
 @kernel void ellipticPartialAxQuad2D(const dlong Nelements,
                                    @restrict const  dlong   *  elementList,
                                    @restrict const  dlong   *  GlobalToLocal,
-                                   @restrict const  dfloat *  ggeo,
+                                   @restrict const  gfloat *  ggeo,
                                    @restrict const  dfloat *  DT,
                                    @restrict const  dfloat *  S,
                                    @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/okl/ellipticAxQuad3D.okl b/solvers/elliptic/okl/ellipticAxQuad3D.okl
@@ -33,7 +33,7 @@
 
 // square thread version
 @kernel void ellipticAxQuad3D(const dlong   Nelements,
-			      @restrict const  dfloat *  ggeo,
+			      @restrict const  gfloat *  ggeo,
 			      @restrict const  dfloat *  D,
 			      @restrict const  dfloat *  S,
 			      @restrict const  dfloat *  MM,
@@ -146,7 +146,7 @@
 @kernel void ellipticPartialAxQuad3D(const dlong Nelements,
 				     @restrict const  dlong   *  elementList,
              @restrict const  dlong   *  GlobalToLocal,
-				     @restrict const  dfloat *  ggeo,
+				     @restrict const  gfloat *  ggeo,
 				     @restrict const  dfloat *  D,
 				     @restrict const  dfloat *  S,
 				     @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/okl/ellipticAxTet3D.okl b/solvers/elliptic/okl/ellipticAxTet3D.okl
@@ -26,7 +26,7 @@ SOFTWARE.
 
 
 @kernel void ellipticAxTet3D(const dlong Nelements,
-                            @restrict const  dfloat *  ggeo,
+                            @restrict const  gfloat *  ggeo,
                             @restrict const  dfloat *  D,
                             @restrict const  dfloat *  S,
                             @restrict const  dfloat *  MM,
@@ -89,7 +89,7 @@ SOFTWARE.
 
 @kernel void ellipticPartialAxTet3D_v0(const dlong Nelements,
                                   @restrict const  dlong   *  elementList,
-                                  @restrict const  dfloat *  ggeo,
+                                  @restrict const  gfloat *  ggeo,
                                   @restrict const  dfloat *  D,
                                   @restrict const  dfloat *  S,
                                   @restrict const  dfloat *  MM,
@@ -187,7 +187,7 @@ SOFTWARE.
 @kernel void ellipticPartialAxTet3D(const dlong Nelements,
                                   @restrict const  dlong   *  elementList,
                                   @restrict const  dlong   *  GlobalToLocal,
-                                  @restrict const  dfloat *  ggeo,
+                                  @restrict const  gfloat *  ggeo,
                                   @restrict const  dfloat *  D,
                                   @restrict const  dfloat *  S,
                                   @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/okl/ellipticAxTri2D.okl b/solvers/elliptic/okl/ellipticAxTri2D.okl
@@ -26,7 +26,7 @@ SOFTWARE.
 
 
 @kernel void ellipticAxTri2D(const dlong Nelements,
-                            @restrict const  dfloat *  ggeo,
+                            @restrict const  gfloat *  ggeo,
                             @restrict const  dfloat *  D,
                             @restrict const  dfloat *  S,
                             @restrict const  dfloat *  MM,
@@ -97,7 +97,7 @@ SOFTWARE.
 @kernel void ellipticPartialAxTri2D(const dlong Nelements,
                                     @restrict const  dlong   *  elementList,
                                     @restrict const  dlong   *  GlobalToLocal,
-                                    @restrict const  dfloat *  ggeo,
+                                    @restrict const  gfloat *  ggeo,
                                     @restrict const  dfloat *  D,
                                     @restrict const  dfloat *  S,
                                     @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/okl/ellipticAxTri3D.okl b/solvers/elliptic/okl/ellipticAxTri3D.okl
@@ -26,7 +26,7 @@
 
 
 @kernel void ellipticAxTri3D(const dlong Nelements,
-                             @restrict const  dfloat *  ggeo,
+                             @restrict const  gfloat *  ggeo,
                              @restrict const  dfloat *  Dmatrices,
                              @restrict const  dfloat *  Smatrices,
                              @restrict const  dfloat *  MM,
@@ -97,7 +97,7 @@
 @kernel void ellipticPartialAxTri3D(const dlong Nelements,
                                     @restrict const  dlong   *  elementList,
                                     @restrict const  dlong   *  GlobalToLocal,
-                                    @restrict const  dfloat *  ggeo,
+                                    @restrict const  gfloat *  ggeo,
                                     @restrict const  dfloat *  Dmatrices,
                                     @restrict const  dfloat *  Smatrices,
                                     @restrict const  dfloat *  MM,

diff --git a/solvers/elliptic/setups/setupHex3D.rc b/solvers/elliptic/setups/setupHex3D.rc
@@ -38,7 +38,7 @@ ISOPARAMETRIC
 1
 
 [POLYNOMIAL DEGREE]
-4
+6
 
 [THREAD MODEL]
 CUDA
@@ -68,7 +68,8 @@ MULTIGRID
 
 # can be ALLDEGREES, HALFDEGREES, HALFDOFS
 [MULTIGRID COARSENING]
-HALFDEGREES
+ALLDEGREES
+#HALFDEGREES
 
 # can be LOCALPATCH, or DAMPEDJACOBI
 # LOCALPATCH smoother can include EXACT
@@ -80,6 +81,12 @@ CHEBYSHEV
 [MULTIGRID CHEBYSHEV DEGREE]
 2
 
+# precision of geofacs for p-coarsened multigrid levels
+# can be float or double
+[MULTIGRID GEOFAC TYPE]
+float
+#double
+
 ###########################################
 
 ########## ParAlmond Options ##############
@@ -104,7 +111,7 @@ CHEBYSHEV
 ###########################################
 
 [OUTPUT TO FILE]
-TRUE
+FALSE
 
 [OUTPUT FILE NAME]
 elliptic

diff --git a/solvers/elliptic/src/ellipticOperator.cpp b/solvers/elliptic/src/ellipticOperator.cpp
@@ -37,6 +37,8 @@ void elliptic_t::Operator(occa::memory &o_q, occa::memory &o_Aq){
 
     ogsMasked->GatheredHaloExchangeStart(o_q, 1, ogs_dfloat);
 
+    //    printf("ellipticOperator using precision gfloatString=%s \n", mesh.gfloatString.c_str());
+
     if(mesh.NlocalGatherElements){
       // if(integrationType==0) { // GLL or non-hex
         // if(mapType==0)

diff --git a/solvers/elliptic/src/ellipticPreconMultiGrid.cpp b/solvers/elliptic/src/ellipticPreconMultiGrid.cpp
@@ -37,6 +37,40 @@ void MultiGridPrecon::Operator(occa::memory& o_r, occa::memory& o_Mr) {
   if(elliptic.allNeumann) elliptic.ZeroMean(o_Mr);
 }
 
+void occaConvertType(platform_t &platform, dlong N, occa::memory &o_x, string &gfloatString){
+
+
+  if(gfloatString==dfloatString) return;
+
+  // bring o_x data back to host
+  dfloat *h_x = (dfloat*) calloc(N, sizeof(dfloat));
+  o_x.copyTo(h_x);
+
+  // free device storage
+  o_x.free();
+
+  // type convert on host and copy back to device
+  if(gfloatString=="float"){
+    float *x = (float*) calloc(N, sizeof(float));
+    for(int n=0;n<N;++n){
+      x[n] = (float)h_x[n];
+    }
+    o_x = platform.device.malloc(N*sizeof(float), x);
+    free(x);
+  }
+
+  if(gfloatString=="double"){
+    double *x = (double*) calloc(N, sizeof(double));
+    for(int n=0;n<N;++n){
+      x[n] = (double)h_x[n];
+    }
+    o_x = platform.device.malloc(N*sizeof(double), x);
+    free(x);
+  }
+
+  free(h_x);
+}
+
 MultiGridPrecon::MultiGridPrecon(elliptic_t& _elliptic):
   elliptic(_elliptic), mesh(_elliptic.mesh), settings(_elliptic.settings),
   parAlmond(elliptic.platform, settings, mesh.comm) {
@@ -52,6 +86,21 @@ MultiGridPrecon::MultiGridPrecon(elliptic_t& _elliptic):
   while(Nc>1) {
     //build mesh and elliptic objects for this degree
     mesh_t &meshF = mesh.SetupNewDegree(Nf);
+
+    // TW: rewrite o_ggeo here ? and set a variable o_ggeoType in mesh ?
+    if(Nf<mesh.N){ //
+      meshF.gfloatString = dfloatString;
+      settings.getSetting("MULTIGRID GEOFAC TYPE", meshF.gfloatString);
+
+      dlong M = 0;
+      if(meshF.elementType==TRIANGLES || meshF.elementType==TETRAHEDRA)
+	M = meshF.Nelements*meshF.Nggeo;
+      else
+	M = meshF.Nelements*meshF.Np*meshF.Nggeo;
+
+      occaConvertType(meshF.platform, M, meshF.o_ggeo, meshF.gfloatString);
+    }
+
     elliptic_t &ellipticF = elliptic.SetupNewDegree(meshF);
 
     //share masking data with previous MG level

diff --git a/solvers/elliptic/src/ellipticSettings.cpp b/solvers/elliptic/src/ellipticSettings.cpp
@@ -91,6 +91,11 @@ void ellipticAddSettings(settings_t& settings,
                       "2",
                       "Smoothing iterations in Chebyshev smoother");
 
+  settings.newSetting(prefix+"MULTIGRID GEOFAC TYPE",
+		      "double",
+		      "Precision of geometric factors to use for p coarsened p-multigrid levels",
+		      {"float", "double"});
+
   settings.newSetting(prefix+"VERBOSE",
                       "FALSE",
                       "Enable verbose output",
@@ -116,6 +121,7 @@ void ellipticSettings_t::report() {
       reportSetting("MULTIGRID SMOOTHER");
       if (compareSetting("MULTIGRID SMOOTHER","CHEBYSHEV"))
         reportSetting("MULTIGRID CHEBYSHEV DEGREE");
+      reportSetting("MULTIGRID GEOFAC TYPE");
     }
 
     if (compareSetting("PRECONDITIONER","MULTIGRID")

diff --git a/solvers/elliptic/src/ellipticSetup.cpp b/solvers/elliptic/src/ellipticSetup.cpp
@@ -113,7 +113,8 @@ elliptic_t& elliptic_t::Setup(platform_t& platform, mesh_t& mesh,
 
   int NblockV = mymax(1,blockMax/mesh.Np);
   kernelInfo["defines/" "p_NblockV"]= NblockV;
-
+  kernelInfo["defines/" "gfloat"] = mesh.gfloatString;
+
   // Ax kernel
   if (settings.compareSetting("DISCRETIZATION","CONTINUOUS")) {
     sprintf(fileName,  DELLIPTIC "/okl/ellipticAx%s.okl", suffix);

diff --git a/solvers/elliptic/src/ellipticSetupNewDegree.cpp b/solvers/elliptic/src/ellipticSetupNewDegree.cpp
@@ -107,6 +107,7 @@ elliptic_t& elliptic_t::SetupNewDegree(mesh_t& meshC){
 
   int NblockV = mymax(1,blockMax/meshC.Np);
   kernelInfo["defines/" "p_NblockV"]= NblockV;
+  kernelInfo["defines/" "gfloat"] = meshC.gfloatString;
 
   // Ax kernel
   if (settings.compareSetting("DISCRETIZATION","CONTINUOUS")) {

diff --git a/test/testElliptic.py b/test/testElliptic.py
@@ -39,6 +39,7 @@ def ellipticSettings(rcformat="2.0", data_file=ellipticData2D,
                      linear_solver="PCG",
                      precon="MULTIGRID",
                      multigrid_smoother="CHEBYSHEV",
+                     multigrid_geofac_type="float",
                      paralmond_cycle="VCYCLE",
                      paralmond_strength="SYMMETRIC",
                      paralmond_aggregation="UNSMOOTHED",
@@ -64,6 +65,7 @@ def ellipticSettings(rcformat="2.0", data_file=ellipticData2D,
           setting_t("LINEAR SOLVER", linear_solver),
           setting_t("PRECONDITIONER", precon),
           setting_t("MULTIGRID SMOOTHER", multigrid_smoother),
+          setting_t("MULTIGRID GEOFAC TYPE", multigrid_geofac_type),
           setting_t("PARALMOND CYCLE", paralmond_cycle),
           setting_t("PARALMOND STRENGTH", paralmond_strength),
           setting_t("PARALMOND AGGREGATION", paralmond_aggregation),