From df0005a65cc27c5504802250082bf6786a5bc5e2 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Mon, 13 May 2024 20:29:43 +0200
Subject: [PATCH 01/11] updates workflow script (macOS)

---
 .github/workflows/CI.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 007bca8fa..6713988bf 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -107,6 +107,12 @@ jobs:
         echo "PATH=/usr/local/opt/gnu-sed/libexec/gnubin:$PATH" >> $GITHUB_ENV
         ## OpenMP
         echo "OMP_NUM_THREADS=2" >> $GITHUB_ENV
+        ## MPI
+        echo "MPI environment:"
+        echo "mpif90 on path: $(which mpif90)"
+        echo
+        # add OpenMPI path to have ./configure detect mpi.h
+        echo "MPI_INC=/opt/homebrew/include" >> $GITHUB_ENV
         ## avoids MPI issue with number of slots
         echo "OMPI_MCA_rmaps_base_oversubscribe=1" >> $GITHUB_ENV
         echo "OMPI_MCA_rmaps_base_inherit=1" >> $GITHUB_ENV

From 8253955b797d047f0f2daf3a8a861c16b789b11e Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Tue, 14 May 2024 15:20:00 +0200
Subject: [PATCH 02/11] updates workflow (sed path for macOS)

---
 .github/workflows/CI.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 6713988bf..5a4e42773 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -104,10 +104,11 @@ jobs:
         echo ""
         ## avoids sed -i '' issue on MacOS, using gnu sed to have the same sed command lines as in linux
         brew install gnu-sed
-        echo "PATH=/usr/local/opt/gnu-sed/libexec/gnubin:$PATH" >> $GITHUB_ENV
+        echo "PATH=/opt/homebrew/opt/gnu-sed/libexec/gnubin:$PATH" >> $GITHUB_ENV
         ## OpenMP
         echo "OMP_NUM_THREADS=2" >> $GITHUB_ENV
         ## MPI
+        echo
         echo "MPI environment:"
         echo "mpif90 on path: $(which mpif90)"
         echo

From 71b40840b580cfa15cecd47ac1cd73d9b5e0a911 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Thu, 16 May 2024 11:30:46 +0200
Subject: [PATCH 03/11] adds more explicit type conversions

---
 src/auxiliaries/create_movie_AVS_DX.f90       |   8 +-
 src/auxiliaries/create_movie_GMT_global.f90   |  46 ++--
 src/auxiliaries/write_profile.f90             |  22 +-
 src/meshfem3D/add_topography_410_650.f90      |   5 +-
 src/meshfem3D/check_mesh_resolution.f90       | 249 ++++++++++--------
 src/meshfem3D/get_absorb.f90                  |  10 +-
 src/meshfem3D/model_atten3D_QRFSI12.f90       |  10 +-
 src/meshfem3D/model_full_sh.f90               |   5 +-
 src/meshfem3D/model_gll.f90                   |  14 +-
 src/meshfem3D/model_ppm.f90                   | 121 ++++-----
 src/meshfem3D/model_scattering.f90            |  29 +-
 src/meshfem3D/model_spiral.f90                |   6 +-
 src/meshfem3D/save_model_meshfiles.f90        |   8 +-
 .../write_AVS_DX_global_chunks_data.f90       |  64 ++---
 src/shared/fft.f90                            |  16 +-
 src/shared/interpolate.f90                    |   4 +-
 src/shared/model_topo_bathy.f90               |   2 +-
 src/shared/read_compute_parameters.f90        |   2 +-
 src/specfem3D/compute_add_sources.f90         |   4 +-
 src/specfem3D/compute_adj_source_frechet.f90  |  12 +-
 src/specfem3D/compute_arrays_source.f90       |   4 +-
 src/specfem3D/compute_coupling.f90            | 140 +++++-----
 src/specfem3D/compute_seismograms.F90         |  12 +-
 src/specfem3D/define_derivation_matrices.f90  |   2 +-
 src/specfem3D/locate_regular_points.f90       |  12 +-
 src/specfem3D/locate_sources.f90              |   2 +-
 src/specfem3D/noise_tomography.f90            |  10 +-
 src/specfem3D/prepare_attenuation.f90         |  26 +-
 src/specfem3D/prepare_elastic_elements.F90    |   6 +-
 src/specfem3D/prepare_timerun.F90             |  12 +-
 src/specfem3D/save_kernels.F90                |  54 ++--
 src/specfem3D/save_regular_kernels.f90        |  10 +-
 src/specfem3D/setup_sources_receivers.f90     |   8 +-
 src/specfem3D/write_movie_surface.f90         |  12 +-
 src/specfem3D/write_movie_volume.f90          |  10 +-
 src/specfem3D/write_seismograms.f90           |   4 +-
 src/tomography/compute_kernel_integral.f90    |   6 +-
 src/tomography/get_cg_direction.f90           |   6 +-
 src/tomography/get_sd_direction.f90           |  12 +-
 .../create_cross_section.F90                  |  36 +--
 .../laplacian_smoothing_sem.F90               |  45 ++--
 .../smooth_sem.F90                            |  12 +-
 42 files changed, 558 insertions(+), 520 deletions(-)

diff --git a/src/auxiliaries/create_movie_AVS_DX.f90 b/src/auxiliaries/create_movie_AVS_DX.f90
index 6d4c8493c..53e00cb47 100644
--- a/src/auxiliaries/create_movie_AVS_DX.f90
+++ b/src/auxiliaries/create_movie_AVS_DX.f90
@@ -894,11 +894,11 @@ program xcreate_movie_AVS_DX
             ipoin = ipoin + 1
             ireorder(ibool_number) = ipoin
             ! point value
-            total_dat(ipoin) = field_display(ilocnum+ieoff)
+            total_dat(ipoin) = real(field_display(ilocnum+ieoff),kind=CUSTOM_REAL)
             ! point location
-            total_dat_xyz(1,ipoin) = xp_save(ilocnum+ieoff)
-            total_dat_xyz(2,ipoin) = yp_save(ilocnum+ieoff)
-            total_dat_xyz(3,ipoin) = zp_save(ilocnum+ieoff)
+            total_dat_xyz(1,ipoin) = real(xp_save(ilocnum+ieoff),kind=CUSTOM_REAL)
+            total_dat_xyz(2,ipoin) = real(yp_save(ilocnum+ieoff),kind=CUSTOM_REAL)
+            total_dat_xyz(3,ipoin) = real(zp_save(ilocnum+ieoff),kind=CUSTOM_REAL)
           endif
           mask_point(ibool_number) = .true.
         enddo
diff --git a/src/auxiliaries/create_movie_GMT_global.f90 b/src/auxiliaries/create_movie_GMT_global.f90
index 178bb12a4..9696b0d7b 100644
--- a/src/auxiliaries/create_movie_GMT_global.f90
+++ b/src/auxiliaries/create_movie_GMT_global.f90
@@ -358,9 +358,9 @@ program create_movie_GMT_global
       close(IIN)
     endif
     ! effective half duration in movie runs
-    hdur = sqrt( cmt_hdur**2 + HDUR_MOVIE**2)
+    hdur = real(sqrt( cmt_hdur**2 + HDUR_MOVIE**2),kind=CUSTOM_REAL)
     ! start time of simulation
-    t0 = - 1.5d0*( cmt_t_shift - hdur )
+    t0 = real(-1.5d0*( cmt_t_shift - hdur ),kind=CUSTOM_REAL)
 
     ! becomes time (s) from hypocenter to reach surface (using average 8 km/s s-wave speed)
     ! note: especially for deep sources, this helps determine a better starttime to mute
@@ -383,15 +383,15 @@ program create_movie_GMT_global
 
     ! converts values into radians
     ! colatitude [0, PI]
-    LAT_SOURCE = (90.0 - LAT_SOURCE)*DEGREES_TO_RADIANS
+    LAT_SOURCE = real((90.d0 - LAT_SOURCE)*DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
 
     ! longitude [-PI, PI]
     if (LON_SOURCE < -180.0 ) LON_SOURCE = LON_SOURCE + 360.0
     if (LON_SOURCE > 180.0 ) LON_SOURCE = LON_SOURCE - 360.0
-    LON_SOURCE = LON_SOURCE * DEGREES_TO_RADIANS
+    LON_SOURCE = real(LON_SOURCE * DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
 
     ! mute radius in rad
-    RADIUS_TO_MUTE = RADIUS_TO_MUTE * DEGREES_TO_RADIANS
+    RADIUS_TO_MUTE = real(RADIUS_TO_MUTE * DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
   endif
 
   print *,'--------'
@@ -468,7 +468,7 @@ program create_movie_GMT_global
 
         ! approximate wavefront travel distance in degrees
         ! (~3.5 km/s wave speed for surface waves)
-        distance = SURFACE_WAVE_VELOCITY * ((it-1)*DT-t0) / (R_PLANET/1000.d0) * RADIANS_TO_DEGREES
+        distance = real(SURFACE_WAVE_VELOCITY * ((it-1)*DT-t0) / (R_PLANET/1000.d0) * RADIANS_TO_DEGREES,kind=CUSTOM_REAL)
 
         print *,'distance approximate: ',distance,'(degrees)'
 
@@ -498,7 +498,7 @@ program create_movie_GMT_global
         print *,'muting radius: ',0.7 * distance,'(degrees)'
 
         ! new radius of mute area (in rad)
-        RADIUS_TO_MUTE = 0.7 * distance * DEGREES_TO_RADIANS
+        RADIUS_TO_MUTE = real(0.7d0 * distance * DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
       else
         ! mute_factor used at the beginning for scaling displacement values
         if (STARTTIME_TO_MUTE > TINYVAL) then
@@ -507,9 +507,9 @@ program create_movie_GMT_global
           ! linear scaling between [0,1]:
           ! from 0 (simulation time equal to zero )
           ! to 1 (simulation time equals starttime_to_mute)
-          mute_factor = 1.0 - ( STARTTIME_TO_MUTE - ((it-1)*DT-t0) ) / (STARTTIME_TO_MUTE+t0)
+          mute_factor = real(1.d0 - ( STARTTIME_TO_MUTE - ((it-1)*DT-t0) ) / (STARTTIME_TO_MUTE+t0),kind=CUSTOM_REAL)
           ! threshold value for mute_factor
-          if (mute_factor < TINYVAL ) mute_factor = TINYVAL
+          if (mute_factor < TINYVAL ) mute_factor = real(TINYVAL,kind=CUSTOM_REAL)
           if (mute_factor > 1.0 ) mute_factor = 1.0
         endif
       endif
@@ -611,17 +611,17 @@ program create_movie_GMT_global
                   ! checks source longitude range
                   if (LON_SOURCE - RADIUS_TO_MUTE < -PI .or. LON_SOURCE + RADIUS_TO_MUTE > PI) then
                     ! source close to 180. longitudes, shifts range to [0, 2PI]
-                    if (phival < 0.0 ) phival = phival + TWO_PI
+                    if (phival < 0.0 ) phival = phival + real(TWO_PI,kind=CUSTOM_REAL)
                     if (LON_SOURCE < 0.0) then
-                      dist_lon = phival - (LON_SOURCE + TWO_PI)
+                      dist_lon = phival - real((LON_SOURCE + TWO_PI),kind=CUSTOM_REAL)
                     else
                       dist_lon = phival - LON_SOURCE
                     endif
                   else
                     ! source well between range to [-PI, PI]
                     ! shifts phival to be like LON_SOURCE between [-PI,PI]
-                    if (phival > PI ) phival = phival - TWO_PI
-                    if (phival < -PI ) phival = phival + TWO_PI
+                    if (phival > PI ) phival = phival - real(TWO_PI,kind=CUSTOM_REAL)
+                    if (phival < -PI ) phival = phival + real(TWO_PI,kind=CUSTOM_REAL)
 
                     dist_lon = phival - LON_SOURCE
                   endif
@@ -634,7 +634,7 @@ program create_movie_GMT_global
                     if ((it-1)*DT-t0 > STARTTIME_TO_MUTE) then
                       ! wavefield will be tapered to mask out noise in source area
                       ! factor from 0 to 1
-                      mute_factor = ( 0.5*(1.0 - cos(distance/RADIUS_TO_MUTE*PI)) )**6
+                      mute_factor = real( ( 0.d5*(1.d0 - cos(distance/RADIUS_TO_MUTE*PI)) )**6,kind=CUSTOM_REAL)
                       ! factor from 0.01 to 1
                       mute_factor = mute_factor * 0.99 + 0.01
                       displn(i,j) = displn(i,j) * mute_factor
@@ -721,11 +721,11 @@ program create_movie_GMT_global
 
                 ! determines North / South pole index for stamping maximum values
                 if (USE_AVERAGED_MAXIMUM .and. AVERAGE_NORMALIZE_VALUES) then
-                  xmesh = xp(ieoff)
-                  ymesh = yp(ieoff)
-                  zmesh = zp(ieoff)
-                  if (zmesh > -SMALL_VAL_ANGLE .and. zmesh <= ZERO) zmesh = -SMALL_VAL_ANGLE
-                  if (zmesh < SMALL_VAL_ANGLE .and. zmesh >= ZERO) zmesh = SMALL_VAL_ANGLE
+                  xmesh = real(xp(ieoff),kind=CUSTOM_REAL)
+                  ymesh = real(yp(ieoff),kind=CUSTOM_REAL)
+                  zmesh = real(zp(ieoff),kind=CUSTOM_REAL)
+                  if (zmesh > -SMALL_VAL_ANGLE .and. zmesh <= ZERO) zmesh = - real(SMALL_VAL_ANGLE,kind=CUSTOM_REAL)
+                  if (zmesh < SMALL_VAL_ANGLE .and. zmesh >= ZERO) zmesh = real(SMALL_VAL_ANGLE,kind=CUSTOM_REAL)
                   thetaval = atan2(sqrt(xmesh*xmesh+ymesh*ymesh),zmesh)
                   ! thetaval between 0 and PI / 2
                   !print *,'thetaval:',thetaval * 180. / PI
@@ -812,7 +812,7 @@ program create_movie_GMT_global
 
         if (max_absol < max_average) then
           ! distance (in degree) of surface waves travelled
-          distance = SURFACE_WAVE_VELOCITY * ((it-1)*DT-t0) / (R_PLANET/1000.d0) * RADIANS_TO_DEGREES
+          distance = real(SURFACE_WAVE_VELOCITY * ((it-1)*DT-t0) / (R_PLANET/1000.d0) * RADIANS_TO_DEGREES,kind=CUSTOM_REAL)
           if (distance > 10.0 .and. distance <= 20.0) then
             ! smooth transition between 10 and 20 degrees
             ! sets positive and negative maximum
@@ -866,15 +866,15 @@ program create_movie_GMT_global
               ! linear scaling between [0,1]:
               ! from 0 (simulation time equal to -t0 )
               ! to 1 (simulation time equals starttime_to_mute)
-              mute_factor = 1.0 - ( STARTTIME_TO_MUTE - ((it-1)*DT-t0) ) / (STARTTIME_TO_MUTE+t0)
+              mute_factor = real(1.d0 - ( STARTTIME_TO_MUTE - ((it-1)*DT-t0) ) / (STARTTIME_TO_MUTE+t0),kind=CUSTOM_REAL)
               ! takes complement and shifts scale to (1,100)
               ! thus, mute factor is 100 at simulation start and 1.0 at starttime_to_mute
               mute_factor = abs(1.0 - mute_factor) * 99.0 + 1.0
               ! positive and negative maximum reach average when wavefield appears
-              val = mute_factor * max_average
+              val = real(mute_factor * max_average,kind=CUSTOM_REAL)
             else
               ! uses a constant factor
-              val = 100.0 * max_average
+              val = real(100.d0 * max_average,kind=CUSTOM_REAL)
             endif
             ! positive and negative maximum
             field_display(istamp1) = + val
diff --git a/src/auxiliaries/write_profile.f90 b/src/auxiliaries/write_profile.f90
index 2434332bf..c0c65fbb6 100644
--- a/src/auxiliaries/write_profile.f90
+++ b/src/auxiliaries/write_profile.f90
@@ -1211,8 +1211,8 @@ subroutine load_GLL_mesh(theta_degrees,phi_degrees)
   integer,save :: slice_proc_old = 0
 
   ! converts to lat/lon
-  lat = 90.0 - theta_degrees
-  lon = phi_degrees
+  lat = real(90.d0 - theta_degrees,kind=CUSTOM_REAL)
+  lon = real(phi_degrees,kind=CUSTOM_REAL)
   ! limits value range
   if (lat < -90.0) lat = -90.0
   if (lat > 90.0) lat = 90.0
@@ -1226,8 +1226,8 @@ subroutine load_GLL_mesh(theta_degrees,phi_degrees)
     xi_width = 90.0
     eta_width = 90.0
   else
-    xi_width = ANGULAR_WIDTH_XI_IN_DEGREES
-    eta_width = ANGULAR_WIDTH_ETA_IN_DEGREES
+    xi_width = real(ANGULAR_WIDTH_XI_IN_DEGREES,kind=CUSTOM_REAL)
+    eta_width = real(ANGULAR_WIDTH_ETA_IN_DEGREES,kind=CUSTOM_REAL)
   endif
 
   ! converts chunk width to radians
@@ -1306,8 +1306,8 @@ subroutine get_latlon_chunk_location(lat,lon,nchunks,chunk,xi,eta)
   integer :: k
 
   ! converts lat/lon from degrees to radians (colatitute/longitude)
-  theta = (90.0 - lat)/180.0 * PI
-  phi = lon/180.0 * PI
+  theta = real((90.d0 - lat)/180.d0 * PI,kind=CUSTOM_REAL)
+  phi = real(lon/180.d0 * PI,kind=CUSTOM_REAL)
 
   ! converts (r,theta,phi) to (x,y,z) on unit sphere
   r = 1.0_CUSTOM_REAL
@@ -1317,9 +1317,9 @@ subroutine get_latlon_chunk_location(lat,lon,nchunks,chunk,xi,eta)
     ! compute rotation matrix from Euler angles
     call euler_angles(rotation_matrix,CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,GAMMA_ROTATION_AZIMUTH)
 
-    xn = x * rotation_matrix(1,1) + y * rotation_matrix(2,1) + z * rotation_matrix(3,1)
-    yn = x * rotation_matrix(1,2) + y * rotation_matrix(2,2) + z * rotation_matrix(3,2)
-    zn = x * rotation_matrix(1,3) + y * rotation_matrix(2,3) + z * rotation_matrix(3,3)
+    xn = real(x * rotation_matrix(1,1) + y * rotation_matrix(2,1) + z * rotation_matrix(3,1),kind=CUSTOM_REAL)
+    yn = real(x * rotation_matrix(1,2) + y * rotation_matrix(2,2) + z * rotation_matrix(3,2),kind=CUSTOM_REAL)
+    zn = real(x * rotation_matrix(1,3) + y * rotation_matrix(2,3) + z * rotation_matrix(3,3),kind=CUSTOM_REAL)
 
     x = xn; y = yn; z = zn
   endif
@@ -1513,8 +1513,8 @@ subroutine get_process_slice_number(chunk,xi,eta,nproc_xi,xi_width,eta_width,sli
   integer :: proc_xi,proc_eta
 
   ! converts chunk width to radians
-  xi_width_rad = xi_width/180.0 * PI
-  eta_width_rad = eta_width/180.0 * PI
+  xi_width_rad = real(xi_width/180.d0 * PI,kind=CUSTOM_REAL)
+  eta_width_rad = real(eta_width/180.d0 * PI,kind=CUSTOM_REAL)
 
   ! gets process/slice number
   xi1 = xi / xi_width_rad * 2.0
diff --git a/src/meshfem3D/add_topography_410_650.f90 b/src/meshfem3D/add_topography_410_650.f90
index 4940e2e8d..04bfa8c7a 100644
--- a/src/meshfem3D/add_topography_410_650.f90
+++ b/src/meshfem3D/add_topography_410_650.f90
@@ -48,8 +48,9 @@ subroutine add_topography_410_650(xelm,yelm,zelm)
 
   !statistics
   logical,parameter :: DEBUG_STATISTICS = .false.
-  real(kind=CUSTOM_REAL),save :: min_410 = HUGEVAL,max_410 = - HUGEVAL
-  real(kind=CUSTOM_REAL),save :: min_650 = HUGEVAL,max_650 = - HUGEVAL
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+  real(kind=CUSTOM_REAL),save :: min_410 = HUGEVAL_REAL,max_410 = - HUGEVAL_REAL
+  real(kind=CUSTOM_REAL),save :: min_650 = HUGEVAL_REAL,max_650 = - HUGEVAL_REAL
   real(kind=CUSTOM_REAL) :: min_410_all,max_410_all
   real(kind=CUSTOM_REAL) :: min_650_all,max_650_all
 
diff --git a/src/meshfem3D/check_mesh_resolution.f90 b/src/meshfem3D/check_mesh_resolution.f90
index b086386c5..0d0110cb9 100644
--- a/src/meshfem3D/check_mesh_resolution.f90
+++ b/src/meshfem3D/check_mesh_resolution.f90
@@ -87,6 +87,9 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
   !double precision, external :: wtime
   !double precision :: tstart,tCPU
 
+  ! to avoid conversion warnings
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+
   ! note: the mesh and time step check is only approximative
 
   !debug timing
@@ -124,23 +127,23 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
 
   ! initializes global min/max values only when first called
   if (iregion_code == 1) then
-    dt_max_glob = HUGEVAL
-    pmax_glob = - HUGEVAL
+    dt_max_glob = HUGEVAL_REAL
+    pmax_glob = - HUGEVAL_REAL
   endif
 
   ! statistics for this region
-  elemsize_min_reg = HUGEVAL
-  elemsize_max_reg = -HUGEVAL
+  elemsize_min_reg = HUGEVAL_REAL
+  elemsize_max_reg = - HUGEVAL_REAL
 
-  eig_ratio_min_reg = HUGEVAL
-  eig_ratio_max_reg = -HUGEVAL
+  eig_ratio_min_reg = HUGEVAL_REAL
+  eig_ratio_max_reg = - HUGEVAL_REAL
 
-  pmax_reg = - HUGEVAL
-  dt_max_reg = HUGEVAL
-  cmax_reg = - HUGEVAL
+  pmax_reg = - HUGEVAL_REAL
+  dt_max_reg = HUGEVAL_REAL
+  cmax_reg = - HUGEVAL_REAL
 
-  vsmin_reg = HUGEVAL
-  vpmax_reg = - HUGEVAL
+  vsmin_reg = HUGEVAL_REAL
+  vpmax_reg = - HUGEVAL_REAL
 
 ! openmp mesher
 !$OMP PARALLEL DEFAULT(SHARED) &
@@ -207,7 +210,7 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
     pmax_reg = max(pmax_reg,pmax)
 
     ! computes minimum and maximum distance of neighbor GLL points in this grid cell
-    distance_min = elemsize_min * percent_GLL(NGLLX)
+    distance_min = elemsize_min * real(percent_GLL(NGLLX),kind=CUSTOM_REAL)
 
     ! distance at skewed corner across
     ! if the angle at corner less than 60 degrees, then the distance between the second GLL points (B-C) becomes
@@ -230,7 +233,7 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
     call get_timestep_limit_significant_digit(deltat_suggested)
 
     ! maximum time step size
-    dt_max = deltat_suggested
+    dt_max = real(deltat_suggested,kind=CUSTOM_REAL)
 
     ! debug
     !if (dt_max_reg > dt_max) then
@@ -246,7 +249,7 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
     ! Courant number
     ! based on minimum GLL point distance and maximum velocity
     ! i.e. on the maximum ratio of ( velocity / gridsize )
-    cmax = vpmax * DT / distance_min
+    cmax = vpmax * real(DT,kind=CUSTOM_REAL) / distance_min
 
     ! sets region stability number
     cmax_reg = max(cmax_reg,cmax)
@@ -295,7 +298,7 @@ subroutine check_mesh_resolution(iregion_code,xstore,ystore,zstore, &
   call max_all_cr(eig_ratio_max,eig_ratio_max_reg)
 
   ! empirical minimum period resolved by mesh
-  pmax_empirical = T_min_period
+  pmax_empirical = real(T_min_period,kind=CUSTOM_REAL)
 
   !debug timing
   !tCPU = wtime() - tstart
@@ -422,6 +425,9 @@ subroutine get_vpvs_minmax(vpmax,vsmin,ispec,nspec,iregion_code,kappavstore,kapp
   ! scaling factors to re-dimensionalize units
   real(kind=CUSTOM_REAL) :: scaleval
 
+  ! to avoid conversion warnings
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+
   ! checks if anything to do for this region
   if (iregion_code == IREGION_TRINFINITE .or. &
       iregion_code == IREGION_INFINITE) then
@@ -432,8 +438,8 @@ subroutine get_vpvs_minmax(vpmax,vsmin,ispec,nspec,iregion_code,kappavstore,kapp
     return
   endif
 
-  vpmax = - HUGEVAL
-  vsmin = HUGEVAL
+  vpmax = - HUGEVAL_REAL
+  vsmin = HUGEVAL_REAL
 
   do k = 1, NGLLZ, NGLLZ-1
     do j = 1, NGLLY, NGLLY-1
@@ -567,20 +573,23 @@ subroutine get_elem_minmaxsize(elemsize_min,elemsize_max,ispec,nspec,xstore,ysto
   integer :: i1,i2,j1,j2,k1,k2
   integer :: i,j,k
 
-  elemsize_min = HUGEVAL
-  elemsize_max = -HUGEVAL
+  ! to avoid conversion warnings
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+
+  elemsize_min = HUGEVAL_REAL
+  elemsize_max = -HUGEVAL_REAL
   ! loops over the four edges that are along X
   i1 = 1
   i2 = NGLLX
   do k = 1, NGLLZ, NGLLZ-1
     do j = 1, NGLLY, NGLLY-1
-      x1 = xstore(i1,j,k,ispec)
-      y1 = ystore(i1,j,k,ispec)
-      z1 = zstore(i1,j,k,ispec)
+      x1 = real(xstore(i1,j,k,ispec),kind=CUSTOM_REAL)
+      y1 = real(ystore(i1,j,k,ispec),kind=CUSTOM_REAL)
+      z1 = real(zstore(i1,j,k,ispec),kind=CUSTOM_REAL)
 
-      x2 = xstore(i2,j,k,ispec)
-      y2 = ystore(i2,j,k,ispec)
-      z2 = zstore(i2,j,k,ispec)
+      x2 = real(xstore(i2,j,k,ispec),kind=CUSTOM_REAL)
+      y2 = real(ystore(i2,j,k,ispec),kind=CUSTOM_REAL)
+      z2 = real(zstore(i2,j,k,ispec),kind=CUSTOM_REAL)
 
       dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
       if (elemsize_min > dist) elemsize_min = dist
@@ -593,13 +602,13 @@ subroutine get_elem_minmaxsize(elemsize_min,elemsize_max,ispec,nspec,xstore,ysto
   j2 = NGLLY
   do k = 1, NGLLZ, NGLLZ-1
     do i = 1, NGLLX, NGLLX-1
-      x1 = xstore(i,j1,k,ispec)
-      y1 = ystore(i,j1,k,ispec)
-      z1 = zstore(i,j1,k,ispec)
+      x1 = real(xstore(i,j1,k,ispec),kind=CUSTOM_REAL)
+      y1 = real(ystore(i,j1,k,ispec),kind=CUSTOM_REAL)
+      z1 = real(zstore(i,j1,k,ispec),kind=CUSTOM_REAL)
 
-      x2 = xstore(i,j2,k,ispec)
-      y2 = ystore(i,j2,k,ispec)
-      z2 = zstore(i,j2,k,ispec)
+      x2 = real(xstore(i,j2,k,ispec),kind=CUSTOM_REAL)
+      y2 = real(ystore(i,j2,k,ispec),kind=CUSTOM_REAL)
+      z2 = real(zstore(i,j2,k,ispec),kind=CUSTOM_REAL)
 
       dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
       if (elemsize_min > dist) elemsize_min = dist
@@ -612,13 +621,13 @@ subroutine get_elem_minmaxsize(elemsize_min,elemsize_max,ispec,nspec,xstore,ysto
   k2 = NGLLZ
   do j = 1, NGLLY, NGLLY-1
     do i = 1, NGLLX, NGLLX-1
-      x1 = xstore(i,j,k1,ispec)
-      y1 = ystore(i,j,k1,ispec)
-      z1 = zstore(i,j,k1,ispec)
+      x1 = real(xstore(i,j,k1,ispec),kind=CUSTOM_REAL)
+      y1 = real(ystore(i,j,k1,ispec),kind=CUSTOM_REAL)
+      z1 = real(zstore(i,j,k1,ispec),kind=CUSTOM_REAL)
 
-      x2 = xstore(i,j,k2,ispec)
-      y2 = ystore(i,j,k2,ispec)
-      z2 = zstore(i,j,k2,ispec)
+      x2 = real(xstore(i,j,k2,ispec),kind=CUSTOM_REAL)
+      y2 = real(ystore(i,j,k2,ispec),kind=CUSTOM_REAL)
+      z2 = real(zstore(i,j,k2,ispec),kind=CUSTOM_REAL)
 
       dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
       if (elemsize_min > dist ) elemsize_min = dist
@@ -626,8 +635,8 @@ subroutine get_elem_minmaxsize(elemsize_min,elemsize_max,ispec,nspec,xstore,ysto
     enddo
   enddo
   ! size (in km)
-  elemsize_min = sqrt(elemsize_min) * R_PLANET_KM
-  elemsize_max = sqrt(elemsize_max) * R_PLANET_KM
+  elemsize_min = sqrt(elemsize_min) * real(R_PLANET_KM,kind=CUSTOM_REAL)
+  elemsize_max = sqrt(elemsize_max) * real(R_PLANET_KM,kind=CUSTOM_REAL)
 
   end subroutine get_elem_minmaxsize
 
@@ -651,53 +660,56 @@ subroutine get_min_distance_from_second_GLL_points(dx,ispec,nspec,xstore,ystore,
   real(kind=CUSTOM_REAL) :: x1,y1,z1,x2,y2,z2,dist
   integer :: i,j,k
 
-  dx = HUGEVAL
+  ! to avoid conversion warnings
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+
+  dx = HUGEVAL_REAL
   ! loops over the four edges that are along Z
   do k = 1, NGLLZ, NGLLZ-1
     ! front-left
-    x1 = xstore(1,2,k,ispec)
-    y1 = ystore(1,2,k,ispec)
-    z1 = zstore(1,2,k,ispec)
+    x1 = real(xstore(1,2,k,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(1,2,k,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(1,2,k,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(2,1,k,ispec)
-    y2 = ystore(2,1,k,ispec)
-    z2 = zstore(2,1,k,ispec)
+    x2 = real(xstore(2,1,k,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(2,1,k,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(2,1,k,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-left
-    x1 = xstore(1,NGLLY-1,k,ispec)
-    y1 = ystore(1,NGLLY-1,k,ispec)
-    z1 = zstore(1,NGLLY-1,k,ispec)
+    x1 = real(xstore(1,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(1,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(1,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(2,NGLLY,k,ispec)
-    y2 = ystore(2,NGLLY,k,ispec)
-    z2 = zstore(2,NGLLY,k,ispec)
+    x2 = real(xstore(2,NGLLY,k,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(2,NGLLY,k,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(2,NGLLY,k,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! front-right
-    x1 = xstore(NGLLX,2,k,ispec)
-    y1 = ystore(NGLLX,2,k,ispec)
-    z1 = zstore(NGLLX,2,k,ispec)
+    x1 = real(xstore(NGLLX,2,k,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(NGLLX,2,k,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(NGLLX,2,k,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(NGLLX-1,1,k,ispec)
-    y2 = ystore(NGLLX-1,1,k,ispec)
-    z2 = zstore(NGLLX-1,1,k,ispec)
+    x2 = real(xstore(NGLLX-1,1,k,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(NGLLX-1,1,k,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(NGLLX-1,1,k,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-right
-    x1 = xstore(NGLLX,NGLLY-1,k,ispec)
-    y1 = ystore(NGLLX,NGLLY-1,k,ispec)
-    z1 = zstore(NGLLX,NGLLY-1,k,ispec)
+    x1 = real(xstore(NGLLX,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(NGLLX,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(NGLLX,NGLLY-1,k,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(NGLLX-1,NGLLY,k,ispec)
-    y2 = ystore(NGLLX-1,NGLLY,k,ispec)
-    z2 = zstore(NGLLX-1,NGLLY,k,ispec)
+    x2 = real(xstore(NGLLX-1,NGLLY,k,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(NGLLX-1,NGLLY,k,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(NGLLX-1,NGLLY,k,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
@@ -706,49 +718,49 @@ subroutine get_min_distance_from_second_GLL_points(dx,ispec,nspec,xstore,ystore,
   ! loops over the four edges that are along Y
   do j = 1, NGLLY, NGLLY-1
     ! front-left
-    x1 = xstore(1,j,2,ispec)
-    y1 = ystore(1,j,2,ispec)
-    z1 = zstore(1,j,2,ispec)
+    x1 = real(xstore(1,j,2,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(1,j,2,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(1,j,2,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(2,j,1,ispec)
-    y2 = ystore(2,j,1,ispec)
-    z2 = zstore(2,j,1,ispec)
+    x2 = real(xstore(2,j,1,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(2,j,1,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(2,j,1,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-left
-    x1 = xstore(1,j,NGLLZ-1,ispec)
-    y1 = ystore(1,j,NGLLZ-1,ispec)
-    z1 = zstore(1,j,NGLLZ-1,ispec)
+    x1 = real(xstore(1,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(1,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(1,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(2,j,NGLLZ,ispec)
-    y2 = ystore(2,j,NGLLZ,ispec)
-    z2 = zstore(2,j,NGLLZ,ispec)
+    x2 = real(xstore(2,j,NGLLZ,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(2,j,NGLLZ,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(2,j,NGLLZ,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! front-right
-    x1 = xstore(NGLLX,j,2,ispec)
-    y1 = ystore(NGLLX,j,2,ispec)
-    z1 = zstore(NGLLX,j,2,ispec)
+    x1 = real(xstore(NGLLX,j,2,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(NGLLX,j,2,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(NGLLX,j,2,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(NGLLX-1,j,1,ispec)
-    y2 = ystore(NGLLX-1,j,1,ispec)
-    z2 = zstore(NGLLX-1,j,1,ispec)
+    x2 = real(xstore(NGLLX-1,j,1,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(NGLLX-1,j,1,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(NGLLX-1,j,1,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-right
-    x1 = xstore(NGLLX,j,NGLLZ-1,ispec)
-    y1 = ystore(NGLLX,j,NGLLZ-1,ispec)
-    z1 = zstore(NGLLX,j,NGLLZ-1,ispec)
+    x1 = real(xstore(NGLLX,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(NGLLX,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(NGLLX,j,NGLLZ-1,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(NGLLX-1,j,NGLLZ,ispec)
-    y2 = ystore(NGLLX-1,j,NGLLZ,ispec)
-    z2 = zstore(NGLLX-1,j,NGLLZ,ispec)
+    x2 = real(xstore(NGLLX-1,j,NGLLZ,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(NGLLX-1,j,NGLLZ,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(NGLLX-1,j,NGLLZ,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
@@ -757,55 +769,55 @@ subroutine get_min_distance_from_second_GLL_points(dx,ispec,nspec,xstore,ystore,
   ! loops over the four edges that are along X
   do i = 1, NGLLX, NGLLX-1
     ! front-left
-    x1 = xstore(i,1,2,ispec)
-    y1 = ystore(i,1,2,ispec)
-    z1 = zstore(i,1,2,ispec)
+    x1 = real(xstore(i,1,2,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(i,1,2,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(i,1,2,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(i,2,1,ispec)
-    y2 = ystore(i,2,1,ispec)
-    z2 = zstore(i,2,1,ispec)
+    x2 = real(xstore(i,2,1,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(i,2,1,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(i,2,1,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-left
-    x1 = xstore(i,1,NGLLZ-1,ispec)
-    y1 = ystore(i,1,NGLLZ-1,ispec)
-    z1 = zstore(i,1,NGLLZ-1,ispec)
+    x1 = real(xstore(i,1,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(i,1,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(i,1,NGLLZ-1,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(i,2,NGLLZ,ispec)
-    y2 = ystore(i,2,NGLLZ,ispec)
-    z2 = zstore(i,2,NGLLZ,ispec)
+    x2 = real(xstore(i,2,NGLLZ,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(i,2,NGLLZ,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(i,2,NGLLZ,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! front-right
-    x1 = xstore(i,NGLLY,2,ispec)
-    y1 = ystore(i,NGLLY,2,ispec)
-    z1 = zstore(i,NGLLY,2,ispec)
+    x1 = real(xstore(i,NGLLY,2,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(i,NGLLY,2,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(i,NGLLY,2,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(i,NGLLY-1,1,ispec)
-    y2 = ystore(i,NGLLY-1,1,ispec)
-    z2 = zstore(i,NGLLY-1,1,ispec)
+    x2 = real(xstore(i,NGLLY-1,1,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(i,NGLLY-1,1,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(i,NGLLY-1,1,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
 
     ! rear-right
-    x1 = xstore(i,NGLLY,NGLLZ-1,ispec)
-    y1 = ystore(i,NGLLY,NGLLZ-1,ispec)
-    z1 = zstore(i,NGLLY,NGLLZ-1,ispec)
+    x1 = real(xstore(i,NGLLY,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    y1 = real(ystore(i,NGLLY,NGLLZ-1,ispec),kind=CUSTOM_REAL)
+    z1 = real(zstore(i,NGLLY,NGLLZ-1,ispec),kind=CUSTOM_REAL)
 
-    x2 = xstore(i,NGLLY-1,NGLLZ,ispec)
-    y2 = ystore(i,NGLLY-1,NGLLZ,ispec)
-    z2 = zstore(i,NGLLY-1,NGLLZ,ispec)
+    x2 = real(xstore(i,NGLLY-1,NGLLZ,ispec),kind=CUSTOM_REAL)
+    y2 = real(ystore(i,NGLLY-1,NGLLZ,ispec),kind=CUSTOM_REAL)
+    z2 = real(zstore(i,NGLLY-1,NGLLZ,ispec),kind=CUSTOM_REAL)
 
     dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
     if (dx > dist ) dx = dist
   enddo
   ! size (in km)
-  dx = sqrt(dx) * R_PLANET_KM
+  dx = sqrt(dx) * real(R_PLANET_KM,kind=CUSTOM_REAL)
 
   end subroutine get_min_distance_from_second_GLL_points
 
@@ -834,8 +846,11 @@ subroutine get_eigenvalues_min_ratio(eig_ratio_min,eig_ratio_max,ispec)
   double precision :: e1,e2,e3
   double precision :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
 
-  eig_ratio_min = HUGEVAL
-  eig_ratio_max = - HUGEVAL
+  ! to avoid conversion warnings
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+
+  eig_ratio_min = HUGEVAL_REAL
+  eig_ratio_max = - HUGEVAL_REAL
 
   do k = 1,NGLLZ
     do j = 1,NGLLY
@@ -898,8 +913,8 @@ subroutine get_eigenvalues_min_ratio(eig_ratio_min,eig_ratio_max,ispec)
         e3 = sqrt(e3)
 
         ! ratio of smallest vs. largest eigenvalue ( == 1 for no distortion)
-        if (eig_ratio_min > e3/e1) eig_ratio_min = e3/e1
-        if (eig_ratio_max < e3/e1) eig_ratio_max = e3/e1
+        if (eig_ratio_min > e3/e1) eig_ratio_min = real(e3/e1,kind=CUSTOM_REAL)
+        if (eig_ratio_max < e3/e1) eig_ratio_max = real(e3/e1,kind=CUSTOM_REAL)
 
       enddo
     enddo
diff --git a/src/meshfem3D/get_absorb.f90 b/src/meshfem3D/get_absorb.f90
index 6b8112917..28d6e3672 100644
--- a/src/meshfem3D/get_absorb.f90
+++ b/src/meshfem3D/get_absorb.f90
@@ -357,7 +357,7 @@ subroutine get_absorb_create_Stacey_boundary_arrays(iregion,NSPEC2D_BOTTOM)
           abs_boundary_normal(1,igll,iface) = nx
           abs_boundary_normal(2,igll,iface) = ny
           abs_boundary_normal(3,igll,iface) = nz
-          abs_boundary_jacobian2Dw(igll,iface) = weight
+          abs_boundary_jacobian2Dw(igll,iface) = real(weight,kind=CUSTOM_REAL)
         enddo
       enddo
       abs_boundary_npoin(iface) = igll
@@ -393,7 +393,7 @@ subroutine get_absorb_create_Stacey_boundary_arrays(iregion,NSPEC2D_BOTTOM)
           abs_boundary_normal(1,igll,iface) = nx
           abs_boundary_normal(2,igll,iface) = ny
           abs_boundary_normal(3,igll,iface) = nz
-          abs_boundary_jacobian2Dw(igll,iface) = weight
+          abs_boundary_jacobian2Dw(igll,iface) = real(weight,kind=CUSTOM_REAL)
         enddo
       enddo
       abs_boundary_npoin(iface) = igll
@@ -427,7 +427,7 @@ subroutine get_absorb_create_Stacey_boundary_arrays(iregion,NSPEC2D_BOTTOM)
         abs_boundary_normal(1,igll,iface) = nx
         abs_boundary_normal(2,igll,iface) = ny
         abs_boundary_normal(3,igll,iface) = nz
-        abs_boundary_jacobian2Dw(igll,iface) = weight
+        abs_boundary_jacobian2Dw(igll,iface) = real(weight,kind=CUSTOM_REAL)
       enddo
     enddo
     abs_boundary_npoin(iface) = igll
@@ -460,7 +460,7 @@ subroutine get_absorb_create_Stacey_boundary_arrays(iregion,NSPEC2D_BOTTOM)
         abs_boundary_normal(1,igll,iface) = nx
         abs_boundary_normal(2,igll,iface) = ny
         abs_boundary_normal(3,igll,iface) = nz
-        abs_boundary_jacobian2Dw(igll,iface) = weight
+        abs_boundary_jacobian2Dw(igll,iface) = real(weight,kind=CUSTOM_REAL)
       enddo
     enddo
     abs_boundary_npoin(iface) = igll
@@ -492,7 +492,7 @@ subroutine get_absorb_create_Stacey_boundary_arrays(iregion,NSPEC2D_BOTTOM)
           abs_boundary_normal(1,igll,iface) = nx
           abs_boundary_normal(2,igll,iface) = ny
           abs_boundary_normal(3,igll,iface) = nz
-          abs_boundary_jacobian2Dw(igll,iface) = weight
+          abs_boundary_jacobian2Dw(igll,iface) = real(weight,kind=CUSTOM_REAL)
         enddo
       enddo
       abs_boundary_npoin(iface) = igll
diff --git a/src/meshfem3D/model_atten3D_QRFSI12.f90 b/src/meshfem3D/model_atten3D_QRFSI12.f90
index ced3144bb..044bab19d 100644
--- a/src/meshfem3D/model_atten3D_QRFSI12.f90
+++ b/src/meshfem3D/model_atten3D_QRFSI12.f90
@@ -221,8 +221,8 @@ subroutine model_atten3D_QRFSI12(radius,theta,phi,Qmu,idoubling)
   ! debug
   !  print *,'entering QRFSI12 subroutine'
 
-  ylat = 90.0d0 - theta
-  xlon = phi
+  ylat = real(90.0d0 - theta,kind=4)
+  xlon = real(phi,kind=4)
 
   ! only checks radius for crust, idoubling is misleading for oceanic crust
   ! when we want to expand mantle up to surface...
@@ -256,7 +256,7 @@ subroutine model_atten3D_QRFSI12(radius,theta,phi,Qmu,idoubling)
 
     ! we are in the mantle
 
-    depth = R_PLANET_KM - radius
+    depth = real(R_PLANET_KM - radius,kind=4)
 
     !debug
     !   print *,'QRFSI12: we are in the mantle at depth',depth
@@ -280,14 +280,14 @@ subroutine model_atten3D_QRFSI12(radius,theta,phi,Qmu,idoubling)
         shdep(j) = 0.0
       enddo
       do n = 1,NKQ
-        splpts(n) = QRFSI12_Q_spknt(n)
+        splpts(n) = real(QRFSI12_Q_spknt(n),kind=4)
       enddo
 
       call vbspl(depth,NKQ,splpts,splcon,splcond)
 
       do n = 1,NKQ
         do j = 1,NSQ
-          shdep(j) = shdep(j)+(splcon(n)*QRFSI12_Q_dqmu(n,j))
+          shdep(j) = shdep(j) + real(splcon(n) * QRFSI12_Q_dqmu(n,j),kind=4)
         enddo
       enddo
 
diff --git a/src/meshfem3D/model_full_sh.f90 b/src/meshfem3D/model_full_sh.f90
index e4e882476..451e8166d 100644
--- a/src/meshfem3D/model_full_sh.f90
+++ b/src/meshfem3D/model_full_sh.f90
@@ -1102,8 +1102,9 @@ subroutine add_topography_sh_mantle(xelm,yelm,zelm)
 
   !statistics
   logical,parameter :: DEBUG_STATISTICS = .false.
-  real(kind=CUSTOM_REAL),save :: min_410 = HUGEVAL,max_410 = - HUGEVAL
-  real(kind=CUSTOM_REAL),save :: min_650 = HUGEVAL,max_650 = - HUGEVAL
+  real(kind=CUSTOM_REAL), parameter :: HUGEVAL_REAL = real(HUGEVAL,kind=CUSTOM_REAL)
+  real(kind=CUSTOM_REAL),save :: min_410 = HUGEVAL_REAL, max_410 = - HUGEVAL_REAL
+  real(kind=CUSTOM_REAL),save :: min_650 = HUGEVAL_REAL, max_650 = - HUGEVAL_REAL
   real(kind=CUSTOM_REAL) :: min_410_all,max_410_all
   real(kind=CUSTOM_REAL) :: min_650_all,max_650_all
 
diff --git a/src/meshfem3D/model_gll.f90 b/src/meshfem3D/model_gll.f90
index e68ae5e12..649b214b2 100644
--- a/src/meshfem3D/model_gll.f90
+++ b/src/meshfem3D/model_gll.f90
@@ -41,7 +41,7 @@ module model_gll_par
   type model_gll_variables
     sequence
     ! tomographic iteration model on GLL points
-    double precision :: scale_velocity,scale_density,scale_GPa
+    real(kind=CUSTOM_REAL) :: scale_velocity,scale_density,scale_GPa
 
     ! isotropic model
     real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: vs_new,vp_new,rho_new
@@ -239,10 +239,11 @@ subroutine model_gll_broadcast()
   ! scaling values
   ! (model velocities must be given as km/s)
   scaleval = dsqrt(PI*GRAV*RHOAV)
-  MGLL_V%scale_velocity = 1000.0d0/(R_PLANET*scaleval)
-  MGLL_V%scale_density  =  1000.0d0/RHOAV
+  MGLL_V%scale_velocity = real(1000.0d0/(R_PLANET*scaleval),kind=CUSTOM_REAL)
+  MGLL_V%scale_density  = real(1000.0d0/RHOAV,kind=CUSTOM_REAL)
   ! non-dimensionalize the elastic coefficients using the scale of GPa--[g/cm^3][(km/s)^2]
-  MGLL_V%scale_GPa      = 1.d0/( (RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2) )  ! equal to scale_density * scale_velocity**2
+  ! equal to scale_density * scale_velocity**2
+  MGLL_V%scale_GPa      = real(1.d0/( (RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2) ),kind=CUSTOM_REAL)
 
   select case(MGLL_TYPE)
   case (1)
@@ -288,8 +289,9 @@ subroutine model_gll_broadcast()
       call flush_IMAIN()
     endif
     ! non-dimensionalizes
-    MGLL_V_IC%scale_velocity = 1000.0d0/(R_PLANET*scaleval)
-    MGLL_V_IC%scale_density  =  1000.0d0/RHOAV
+    MGLL_V_IC%scale_velocity = real(1000.0d0/(R_PLANET*scaleval),kind=CUSTOM_REAL)
+    MGLL_V_IC%scale_density  = real(1000.0d0/RHOAV,kind=CUSTOM_REAL)
+
     MGLL_V_IC%vp_new = MGLL_V_IC%vp_new * MGLL_V_IC%scale_velocity
     MGLL_V_IC%vs_new = MGLL_V_IC%vs_new * MGLL_V_IC%scale_velocity
     MGLL_V_IC%rho_new = MGLL_V_IC%rho_new * MGLL_V_IC%scale_density
diff --git a/src/meshfem3D/model_ppm.f90 b/src/meshfem3D/model_ppm.f90
index affe5521d..bc6e8c479 100644
--- a/src/meshfem3D/model_ppm.f90
+++ b/src/meshfem3D/model_ppm.f90
@@ -525,34 +525,35 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
 
   implicit none
 
-  integer :: nproc_xi, nproc_eta
+  integer, intent(in) :: nproc_xi, nproc_eta
 
-  integer NEX_XI
+  integer, intent(in) :: NEX_XI
 
-  integer nspec,nspec_stacey,NCHUNKS
+  integer, intent(in) :: nspec,nspec_stacey,NCHUNKS
 
-  logical ABSORBING_CONDITIONS
+  logical, intent(in) :: ABSORBING_CONDITIONS
 
-! arrays with Jacobian matrix
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+  ! arrays with Jacobian matrix
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(in) :: &
     xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore
 
-! arrays with mesh parameters
-  double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
-  double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
-  double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
+  ! arrays with mesh parameters
+  double precision, intent(in) :: xstore(NGLLX,NGLLY,NGLLZ,nspec)
+  double precision, intent(in) :: ystore(NGLLX,NGLLY,NGLLZ,nspec)
+  double precision, intent(in) :: zstore(NGLLX,NGLLY,NGLLZ,nspec)
 
-! for anisotropy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: rhostore,kappavstore,kappahstore, &
+  ! for anisotropy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(inout) :: rhostore,kappavstore,kappahstore, &
         muvstore,muhstore,eta_anisostore
 
-! Stacey
-  real(kind=CUSTOM_REAL) rho_vp(NGLLX,NGLLY,NGLLZ,nspec_stacey)
-  real(kind=CUSTOM_REAL) rho_vs(NGLLX,NGLLY,NGLLZ,nspec_stacey)
+  ! Stacey
+  real(kind=CUSTOM_REAL), intent(inout) :: rho_vp(NGLLX,NGLLY,NGLLZ,nspec_stacey)
+  real(kind=CUSTOM_REAL), intent(inout) :: rho_vs(NGLLX,NGLLY,NGLLZ,nspec_stacey)
+
+  integer, intent(in) :: iregion_code
 
   ! local parameters
-  integer i,j,k,ispec
-  integer iregion_code
+  integer :: i,j,k,ispec
 
   ! only include the neighboring 3 x 3 slices
   integer, parameter :: NSLICES = 3
@@ -577,7 +578,7 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: tk_rho,tk_kv,tk_kh,tk_muv,tk_muh,tk_eta,tk_dvp,tk_rhovp,tk_rhovs
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: bk
 
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
 
   real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable:: xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
 
@@ -595,21 +596,21 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   ! array with all the weights in the cube
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
 
-  real(kind=CUSTOM_REAL), parameter :: ZERO_ = 0.0_CUSTOM_REAL
+  real(kind=CUSTOM_REAL), parameter :: ZERO_REAL = 0.0_CUSTOM_REAL
 
-  real(kind=CUSTOM_REAL) maxlat,maxlon,maxdepth
-  real(kind=CUSTOM_REAL) minlat,minlon,mindepth
-  real(kind=CUSTOM_REAL) radius,theta,phi,lat,lon,r_depth,margin_v,margin_h
-  real(kind=CUSTOM_REAL) dist_h,dist_v
+  real(kind=CUSTOM_REAL) :: maxlat,maxlon,maxdepth
+  real(kind=CUSTOM_REAL) :: minlat,minlon,mindepth
+  real(kind=CUSTOM_REAL) :: radius,theta,phi,lat,lon,r_depth,margin_v,margin_h
+  real(kind=CUSTOM_REAL) :: dist_h,dist_v
 
   double precision,external :: wtime
 
 !----------------------------------------------------------------------------------------------------
   ! smoothing parameters
-  logical,parameter:: GAUSS_SMOOTHING = .false. ! set to true to use this smoothing routine
+  logical,parameter :: GAUSS_SMOOTHING = .false. ! set to true to use this smoothing routine
 
-  sigma_h = 100.0  ! km, horizontal
-  sigma_v = 100.0   ! km, vertical
+  sigma_h = 100.0_CUSTOM_REAL  ! km, horizontal
+  sigma_v = 100.0_CUSTOM_REAL   ! km, vertical
 
   ! check if smoothing applies
   if (.not. GAUSS_SMOOTHING ) return
@@ -620,7 +621,7 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
 
 
   sizeprocs = NCHUNKS*NPROC_XI*NPROC_ETA
-  element_size = (TWO_PI*R_PLANET/1000.d0)/(4*NEX_XI)
+  element_size = real((TWO_PI*R_PLANET/1000.d0)/(4*NEX_XI),kind=CUSTOM_REAL)
 
   if (myrank == 0) then
     write(IMAIN, *) "model smoothing defaults:"
@@ -634,25 +635,25 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   if (nchunks == 0) call exit_mpi(myrank,'no chunks')
 
   element_size = element_size * 1000  ! e.g. 9 km on the surface, 36 km at CMB
-  element_size = element_size / R_PLANET
+  element_size = real(element_size / R_PLANET,kind=CUSTOM_REAL)
 
-  sigma_h = sigma_h * 1000.0 ! m
-  sigma_h = sigma_h / R_PLANET ! scale
-  sigma_v = sigma_v * 1000.0 ! m
-  sigma_v = sigma_v / R_PLANET ! scale
+  sigma_h = sigma_h * 1000.0_CUSTOM_REAL ! m
+  sigma_h = real(sigma_h / R_PLANET,kind=CUSTOM_REAL) ! scale
+  sigma_v = sigma_v * 1000.0_CUSTOM_REAL ! m
+  sigma_v = real(sigma_v / R_PLANET,kind=CUSTOM_REAL) ! scale
 
   sigma_h2 = sigma_h ** 2
   sigma_v2 = sigma_v ** 2
 
   ! search radius
-  sigma_h3 = 3.0  * sigma_h + element_size
+  sigma_h3 = 3.0_CUSTOM_REAL * sigma_h + element_size
   sigma_h3 = sigma_h3 ** 2
-  sigma_v3 = 3.0  * sigma_v + element_size
+  sigma_v3 = 3.0_CUSTOM_REAL * sigma_v + element_size
   sigma_v3 = sigma_v3 ** 2
   ! theoretic normal value
   ! (see integral over -inf to +inf of exp[- x*x/(2*sigma) ] = sigma * sqrt(2*pi) )
-  norm_h = 2.0*PI*sigma_h**2
-  norm_v = sqrt(2.0*PI) * sigma_v
+  norm_h = real(2.d0*PI*sigma_h**2,kind=CUSTOM_REAL)
+  norm_v = real(sqrt(2.d0*PI) * sigma_v,kind=CUSTOM_REAL)
   norm   = norm_h * norm_v
 
   if (myrank == 0) then
@@ -667,7 +668,7 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   do k = 1,NGLLZ
     do j = 1,NGLLY
       do i = 1,NGLLX
-        wgll_cube(i,j,k) = wxgll(i)*wygll(j)*wzgll(k)
+        wgll_cube(i,j,k) = real(wxgll(i)*wygll(j)*wzgll(k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -702,9 +703,9 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
 
   ! read in the topology files of the current and neighboring slices
   ! read in myrank slice
-  xl(:,:,:,:) = xstore(:,:,:,:)
-  yl(:,:,:,:) = ystore(:,:,:,:)
-  zl(:,:,:,:) = zstore(:,:,:,:)
+  xl(:,:,:,:) = real(xstore(:,:,:,:),kind=CUSTOM_REAL)
+  yl(:,:,:,:) = real(ystore(:,:,:,:),kind=CUSTOM_REAL)
+  zl(:,:,:,:) = real(zstore(:,:,:,:),kind=CUSTOM_REAL)
 
   ! build Jacobian
   allocate(xix(NGLLX,NGLLY,NGLLZ,nspec),xiy(NGLLX,NGLLY,NGLLZ,nspec),xiz(NGLLX,NGLLY,NGLLZ,nspec))
@@ -746,7 +747,7 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
           if (abs(jacobianl) > 1.e-25) then
             jacobianl = 1.0_CUSTOM_REAL / jacobianl
           else
-            jacobianl = ZERO_
+            jacobianl = ZERO_REAL
           endif
 
           jacobian(i,j,k,ispec) = jacobianl
@@ -772,9 +773,9 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   do rank = 0,sizeprocs-1
     if (rank == myrank) then
       jacobian(:,:,:,:) = jacobian0(:,:,:,:)
-      x(:,:,:,:) = xstore(:,:,:,:)
-      y(:,:,:,:) = ystore(:,:,:,:)
-      z(:,:,:,:) = zstore(:,:,:,:)
+      x(:,:,:,:) = real(xstore(:,:,:,:),kind=CUSTOM_REAL)
+      y(:,:,:,:) = real(ystore(:,:,:,:),kind=CUSTOM_REAL)
+      z(:,:,:,:) = real(zstore(:,:,:,:),kind=CUSTOM_REAL)
     endif
     ! every process broadcasts its info
     call bcast_all_cr(x,NGLLX*NGLLY*NGLLZ*NSPEC)
@@ -957,31 +958,31 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
   if (myrank == 0) write(IMAIN, *) 'Done with integration ...'
 
   ! gets depths (in km) of upper and lower limit
-  maxlat = PPM_maxlat
-  minlat = PPM_minlat
+  maxlat = real(PPM_maxlat,kind=CUSTOM_REAL)
+  minlat = real(PPM_minlat,kind=CUSTOM_REAL)
 
-  maxlon = PPM_maxlon
-  minlon = PPM_minlon
+  maxlon = real(PPM_maxlon,kind=CUSTOM_REAL)
+  minlon = real(PPM_minlon,kind=CUSTOM_REAL)
 
-  maxdepth = PPM_maxdepth
-  mindepth = PPM_mindepth
+  maxdepth = real(PPM_maxdepth,kind=CUSTOM_REAL)
+  mindepth = real(PPM_mindepth,kind=CUSTOM_REAL)
 
-  margin_v = sigma_v*R_PLANET/1000.0 ! in km
-  margin_h = sigma_h*R_PLANET/1000.0 * 180.0/(R_PLANET_KM*PI) ! in degree
+  margin_v = real(sigma_v*R_PLANET/1000.d0,kind=CUSTOM_REAL) ! in km
+  margin_h = real(sigma_h*R_PLANET/1000.d0 * 180.d0/(R_PLANET_KM*PI),kind=CUSTOM_REAL) ! in degree
 
   ! computes the smoothed values
   do ispec = 1, nspec
 
     ! depth of given radius (in km)
     call xyz_2_rthetaphi(cx0(ispec),cy0(ispec),cz0(ispec),radius,theta,phi)
-    r_depth = R_PLANET_KM - radius*R_PLANET_KM  ! radius is normalized between [0,1]
+    r_depth = real(R_PLANET_KM - radius*R_PLANET_KM,kind=CUSTOM_REAL)  ! radius is normalized between [0,1]
     if (r_depth >= maxdepth+margin_v .or. r_depth+margin_v < mindepth) cycle
 
-    lat=(PI/2.0d0-theta)*180.0d0/PI
+    lat = real((PI/2.0d0-theta)*180.0d0/PI,kind=CUSTOM_REAL)
     if (lat < minlat-margin_h .or. lat > maxlat+margin_h ) cycle
 
-    lon = phi*180.0d0/PI
-    if (lon > 180.0d0) lon = lon-360.0d0
+    lon = real(phi*180.0d0/PI,kind=CUSTOM_REAL)
+    if (lon > 180.0_CUSTOM_REAL) lon = lon - 360.0_CUSTOM_REAL
     if (lon < minlon-margin_h .or. lon > maxlon+margin_h ) cycle
 
     do k = 1, NGLLZ
@@ -1016,14 +1017,14 @@ subroutine smooth_model(nproc_xi,nproc_eta, &
 
         ! depth of given radius (in km)
         call xyz_2_rthetaphi(cx0(ispec),cy0(ispec),cz0(ispec),radius,theta,phi)
-        r_depth = R_PLANET_KM - radius*R_PLANET_KM  ! radius is normalized between [0,1]
+        r_depth = real(R_PLANET_KM - radius*R_PLANET_KM,kind=CUSTOM_REAL)  ! radius is normalized between [0,1]
         if (r_depth >= maxdepth+margin_v .or. r_depth+margin_v < mindepth) cycle
 
-        lat=(PI/2.0d0-theta)*180.0d0/PI
+        lat = real((PI/2.0d0-theta)*180.0d0/PI,kind=CUSTOM_REAL)
         if (lat < minlat-margin_h .or. lat > maxlat+margin_h ) cycle
 
-        lon = phi*180.0d0/PI
-        if (lon > 180.0d0) lon = lon-360.0d0
+        lon = real(phi*180.0d0/PI,kind=CUSTOM_REAL)
+        if (lon > 180.0_CUSTOM_REAL) lon = lon - 360.0_CUSTOM_REAL
         if (lon < minlon-margin_h .or. lon > maxlon+margin_h ) cycle
 
         do k = 1, NGLLZ
diff --git a/src/meshfem3D/model_scattering.f90 b/src/meshfem3D/model_scattering.f90
index 4faad9d9f..41ecaa419 100644
--- a/src/meshfem3D/model_scattering.f90
+++ b/src/meshfem3D/model_scattering.f90
@@ -144,8 +144,8 @@ real function psd_vonKarman_3D(a_in,kx,ky,kz)
   real(kind=CUSTOM_REAL) :: a,H,sigma
   real(kind=CUSTOM_REAL) :: g1,g2,amp,k,psd
 
-  real(kind=CUSTOM_REAL), parameter :: PI = acos(-1.d0)
-  real(kind=CUSTOM_REAL), parameter :: CONST_3HALF = 3.0 / 2.0
+  real(kind=CUSTOM_REAL), parameter :: PI = real(acos(-1.d0),kind=CUSTOM_REAL)
+  real(kind=CUSTOM_REAL), parameter :: CONST_3HALF = real(3.d0 / 2.d0,kind=CUSTOM_REAL)
 
   a = a_in    ! correlation length
   H = 0.3     ! Hurst exponent: Imperatori & Mai use H = 0.1 and 0.3
@@ -238,13 +238,13 @@ subroutine generate_perturbations()
   time_start = wtime()
 
   ! estimated minimum wavelength resolved by mesh
-  lambda_min = estimated_min_wavelength
+  lambda_min = real(estimated_min_wavelength,kind=CUSTOM_REAL)
 
   ! normalized wavelength
-  lambda_min_norm = lambda_min / R_PLANET_KM
+  lambda_min_norm = real(lambda_min / R_PLANET_KM,kind=CUSTOM_REAL)
 
   ! quarter of wavelength for grid estimate (at least 5 grid points per wavelength)
-  min_length = lambda_min_norm / 4.0
+  min_length = lambda_min_norm / 4.0_CUSTOM_REAL
 
   ! total length
   length = grid_length
@@ -291,21 +291,22 @@ subroutine generate_perturbations()
 
   ! wavenumbers
   ! min/max: k_max = 2 pi / (2 dx)
-  k_min = 2.0 * PI / (N * dx)
-  k_max = 2.0 * PI / (2.0 * dx)
+  k_min = real(2.d0 * PI / (N * dx),kind=CUSTOM_REAL)
+  k_max = real(2.d0 * PI / (2.0 * dx),kind=CUSTOM_REAL)
 
   ! wavenumber increment
-  dk = 2.0 * PI / (N * dx)
+  dk = real(2.d0 * PI / (N * dx),kind=CUSTOM_REAL)
 
   ! maximum index for k_max
   !index_k_max = N / 2 # k_max / dk = (2 pi / (2 dx) ) / (2 pi / (N dx) ) = (N * dx) / (2 * dx) = N / 2
 
   ! index for k_lambda_min
-  k_lambda = 2.0 * PI / (lambda_min_norm)
+  k_lambda = real(2.d0 * PI / (lambda_min_norm),kind=CUSTOM_REAL)
   index_k_lambda = int(k_lambda / dk)
 
   ! correlation length
-  a_corr = lambda_min_norm / (2.0 * PI) * SCATTERING_CORRELATION    ! such that k * a ~ 1 (for correlation factor == 1)
+  ! such that k * a ~ 1 (for correlation factor == 1)
+  a_corr = real(lambda_min_norm / (2.d0 * PI) * SCATTERING_CORRELATION,kind=CUSTOM_REAL)
 
   ! user output
   if (myrank == 0) then
@@ -401,7 +402,7 @@ subroutine generate_perturbations()
           ! random phase
           call random_number(rand_phase)
           ! range [0,2pi]
-          rand_phase = rand_phase * 2.0 * PI
+          rand_phase = real(rand_phase * 2.d0 * PI,kind=CUSTOM_REAL)
           k_random = cmplx( cos(rand_phase), sin(rand_phase) )
 
           ! stores wavenumber distribution
@@ -456,7 +457,7 @@ subroutine generate_perturbations()
       do j = 1,N
         do i = 1,N
           ! stores perturbations array
-          perturbation_grid(i,j,k) = real(kxyz_dist(i,j,k))
+          perturbation_grid(i,j,k) = real(kxyz_dist(i,j,k),kind=CUSTOM_REAL)
         enddo
       enddo
     enddo
@@ -478,12 +479,12 @@ subroutine generate_perturbations()
 
     ! normalizes to range [-1,1]
     val_max = maxval(abs(perturbation_grid))
-    if (val_max > 0.00_CUSTOM_REAL) then
+    if (val_max > 0.0_CUSTOM_REAL) then
       perturbation_grid(:,:,:) = perturbation_grid(:,:,:) / val_max
     endif
 
     ! scales with maximum strength
-    perturbation_grid(:,:,:) = perturbation_grid(:,:,:) * SCATTERING_STRENGTH
+    perturbation_grid(:,:,:) = real(perturbation_grid(:,:,:) * SCATTERING_STRENGTH,kind=CUSTOM_REAL)
 
     ! debug
     !print *,'scattering perturbation: min/max = ',minval(perturbation_grid),'/',maxval(perturbation_grid)
diff --git a/src/meshfem3D/model_spiral.f90 b/src/meshfem3D/model_spiral.f90
index d1dc69a6f..ffc635a33 100644
--- a/src/meshfem3D/model_spiral.f90
+++ b/src/meshfem3D/model_spiral.f90
@@ -938,7 +938,7 @@ subroutine read_crust_spiral(lat,lon,coefs,rhos,thicks)
   ! interpolation variables
   double precision :: a,b  ! weights
   integer :: rec_read  ! position of the record to read in model.dat (direct access file)
-  double precision :: i_min, i_max, j_min, j_max ! upper and lower bound indices
+  integer :: i_min, i_max, j_min, j_max ! upper and lower bound indices
   double precision, dimension(CRUST_NP) :: thick1,thick2,thick3,thick4   ! thickness corner values in model.dat
   double precision, dimension(CRUST_NP) :: rho1,rho2,rho3,rho4   ! rho corner values in model.dat
   double precision, dimension(5,CRUST_NP) :: coef1,coef2,coef3,coef4   ! CIJ corner values in model.dat
@@ -1799,7 +1799,7 @@ subroutine read_mantle_spiral(r_in,lat_in,lon_in,mtle_coefs,mtle_rho)
   ! interpolation variables
   double precision :: a,b,c  ! weights
   integer :: rec_read  ! position of the record to read in model.dat (direct access file)
-  double precision :: i_min, i_max, j_min, j_max, k_min, k_max ! upper and lower bound indices
+  integer :: i_min, i_max, j_min, j_max, k_min, k_max ! upper and lower bound indices
   double precision :: rho1,rho2,rho3,rho4,rho5,rho6,rho7,rho8    ! rho corner values in model.dat
   double precision, dimension(5) :: coef1,coef2,coef3,coef4,coef5,coef6,coef7,coef8   ! CIJ corner values in model.dat
 
@@ -2097,7 +2097,7 @@ subroutine subtopo_spiral(lat_in,lon_in,topo410,topo660)
   ! interpolation variables
   double precision :: a,b  ! weights
   integer :: rec_read  ! position of the record to read in model.dat (direct access file)
-  double precision :: i_min, i_max, j_min, j_max ! upper and lower bound indices
+  integer :: i_min, i_max, j_min, j_max ! upper and lower bound indices
   double precision :: t410_1,t410_2,t410_3,t410_4   ! t410 corner values in model.dat
   double precision :: t660_1,t660_2,t660_3,t660_4   ! t660 corner values in model.dat
 
diff --git a/src/meshfem3D/save_model_meshfiles.f90 b/src/meshfem3D/save_model_meshfiles.f90
index 731d5c62b..532cb0262 100644
--- a/src/meshfem3D/save_model_meshfiles.f90
+++ b/src/meshfem3D/save_model_meshfiles.f90
@@ -52,8 +52,8 @@ subroutine save_model_meshfiles()
   real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: temp_store
 
   ! scaling factors to re-dimensionalize units
-  scaleval1 = sngl( sqrt(PI*GRAV*RHOAV)*(R_PLANET/1000.0d0) )
-  scaleval2 = sngl( RHOAV/1000.0d0 )
+  scaleval1 = real(sqrt(PI*GRAV*RHOAV)*(R_PLANET/1000.0d0),kind=CUSTOM_REAL)
+  scaleval2 = real(RHOAV/1000.0d0,kind=CUSTOM_REAL)
 
   ! uses temporary array
   allocate(temp_store(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
@@ -149,8 +149,8 @@ subroutine save_model_meshfiles()
   ! anisotropic values
   if (ANISOTROPIC_3D_MANTLE .and. iregion_code == IREGION_CRUST_MANTLE) then
     ! the scale of GPa--[g/cm^3][(km/s)^2]
-    scaleval = dsqrt(PI*GRAV*RHOAV)
-    scale_GPa = (RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2)
+    scaleval = real(sqrt(PI*GRAV*RHOAV),kind=CUSTOM_REAL)
+    scale_GPa = real((RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2),kind=CUSTOM_REAL)
 
     ! Gc_prime
     open(unit=IOUT,file=prname(1:len_trim(prname))//'Gc_prime.bin', &
diff --git a/src/meshfem3D/write_AVS_DX_global_chunks_data.f90 b/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
index 7380ff728..2df4f137d 100644
--- a/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
+++ b/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
@@ -186,8 +186,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,1,1,ispec)**2 + ystore(1,1,1,ispec)**2 + zstore(1,1,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
           if (vmin == 0.0) vmin=vmax
           write(11,*) numpoin,vmin,vmax
@@ -204,8 +204,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,NGLLY,1,ispec)**2 + ystore(1,NGLLY,1,ispec)**2 + zstore(1,NGLLY,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
           if (vmin == 0.0) vmin=vmax
           write(11,*) numpoin,vmin,vmax
@@ -222,8 +222,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
         if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
           r = dsqrt(xstore(1,NGLLY,NGLLZ,ispec)**2 + ystore(1,NGLLY,NGLLZ,ispec)**2 + zstore(1,NGLLY,NGLLZ,ispec)**2)
           call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-          vmax = vp
-          vmin = vp
+          vmax = real(vp,kind=CUSTOM_REAL)
+          vmin = real(vp,kind=CUSTOM_REAL)
         endif
         if (vmin == 0.0) vmin=vmax
         write(11,*) numpoin,vmin,vmax
@@ -240,8 +240,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
         if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
           r = dsqrt(xstore(1,1,NGLLZ,ispec)**2 + ystore(1,1,NGLLZ,ispec)**2 + zstore(1,1,NGLLZ,ispec)**2)
           call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-          vmax = vp
-          vmin = vp
+          vmax = real(vp,kind=CUSTOM_REAL)
+          vmin = real(vp,kind=CUSTOM_REAL)
         endif
         if (vmin == 0.0) vmin=vmax
         write(11,*) numpoin,vmin,vmax
@@ -267,8 +267,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,1,1,ispec)**2 + ystore(NGLLX,1,1,ispec)**2 + zstore(NGLLX,1,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -285,8 +285,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,NGLLY,1,ispec)**2 + ystore(NGLLX,NGLLY,1,ispec)**2 + zstore(NGLLX,NGLLY,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -303,8 +303,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,NGLLY,NGLLZ,ispec)**2 + ystore(NGLLX,NGLLY,NGLLZ,ispec)**2 + zstore(NGLLX,NGLLY,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -321,8 +321,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,1,NGLLZ,ispec)**2 + ystore(NGLLX,1,NGLLZ,ispec)**2 + zstore(NGLLX,1,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -348,8 +348,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(1,1,1,ispec)**2 + ystore(1,1,1,ispec)**2 + zstore(1,1,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -366,8 +366,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,1,1,ispec)**2 + ystore(NGLLX,1,1,ispec)**2 + zstore(NGLLX,1,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -384,8 +384,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,1,NGLLZ,ispec)**2 + ystore(NGLLX,1,NGLLZ,ispec)**2 + zstore(NGLLX,1,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -402,8 +402,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(1,1,NGLLZ,ispec)**2 + ystore(1,1,NGLLZ,ispec)**2 + zstore(1,1,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -429,8 +429,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(1,NGLLY,1,ispec)**2 + ystore(1,NGLLY,1,ispec)**2 + zstore(1,NGLLY,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -447,8 +447,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,NGLLY,1,ispec)**2 + ystore(NGLLX,NGLLY,1,ispec)**2 + zstore(NGLLX,NGLLY,1,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -465,8 +465,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(NGLLX,NGLLY,NGLLZ,ispec)**2 + ystore(NGLLX,NGLLY,NGLLZ,ispec)**2 + zstore(NGLLX,NGLLY,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
@@ -483,8 +483,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
     r = dsqrt(xstore(1,NGLLY,NGLLZ,ispec)**2 + ystore(1,NGLLY,NGLLZ,ispec)**2 + zstore(1,NGLLY,NGLLZ,ispec)**2)
     call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-    vmax = vp
-    vmin = vp
+    vmax = real(vp,kind=CUSTOM_REAL)
+    vmin = real(vp,kind=CUSTOM_REAL)
   endif
       if (vmin == 0.0) vmin=vmax
       write(11,*) numpoin,vmin,vmax
diff --git a/src/shared/fft.f90 b/src/shared/fft.f90
index 40fd3feaf..e89f444f4 100644
--- a/src/shared/fft.f90
+++ b/src/shared/fft.f90
@@ -92,11 +92,11 @@ subroutine FFT(npow,xi,zign,dtt,mpow)
 
       do i = 2,npow
         ii = i
-        if (k < mpow(i)) goto 4
-        k = k-mpow(i)
+        if (k < int(mpow(i))) goto 4
+        k = k - int(mpow(i))
       enddo
 
-    4 k = k+mpow(ii)
+    4 k = k + int(mpow(ii))
 
     enddo
   enddo
@@ -112,11 +112,11 @@ subroutine FFT(npow,xi,zign,dtt,mpow)
 
 5   do i = 1,npow
       ii = i
-      if (k < mpow(i)) goto 7
-      k = k-mpow(i)
+      if (k < int(mpow(i))) goto 7
+      k = k - int(mpow(i))
     enddo
 
-7   k = k+mpow(ii)
+7   k = k + int(mpow(ii))
   enddo
 
   ! final steps deal with dt factors
@@ -169,7 +169,7 @@ subroutine FFTinv(npow,s,zign,dtt,r,mpow)
 
   call FFT(npow,s,zign,dtt,mpow)    ! Fourier transform
 
-  r(1:nsmp) = real(s(1:nsmp))       ! take the real part
+  r(1:nsmp) = real(s(1:nsmp),kind=CUSTOM_REAL)       ! take the real part
 
   end subroutine FFTinv
 
@@ -193,7 +193,7 @@ subroutine rspec(s,np2)
   n1 = np2 + 1
 
   s(n1) = 0.0
-  s(1)  = cmplx(real(s(1)),0.0)
+  s(1)  = cmplx(real(s(1)),0.0,kind=CUSTOM_CMPLX)
 
   do i = 1,np2
     s(np2+i) = conjg(s(np2+2-i))
diff --git a/src/shared/interpolate.f90 b/src/shared/interpolate.f90
index ba36312ad..628419917 100644
--- a/src/shared/interpolate.f90
+++ b/src/shared/interpolate.f90
@@ -67,7 +67,7 @@ subroutine interpolate_element_value(xi,eta,gamma,ielem, &
   do k = 1, NGLLZ
     do j = 1, NGLLY
       do i = 1, NGLLX
-          val = val +  hxir(i) * hetar(j) * hgammar(k) * model_elem(i,j,k)
+          val = val +  real(hxir(i) * hetar(j) * hgammar(k) * model_elem(i,j,k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -131,7 +131,7 @@ subroutine interpolate_limited(xi,eta,gamma,ielem, &
   do k = 1, NGLLZ
     do j = 1, NGLLY
       do i = 1, NGLLX
-        val = val + hxir(i) * hetar(j) * hgammar(k) * model_elem(i,j,k)
+        val = val + real(hxir(i) * hetar(j) * hgammar(k) * model_elem(i,j,k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
diff --git a/src/shared/model_topo_bathy.f90 b/src/shared/model_topo_bathy.f90
index fa733679e..7666059e3 100644
--- a/src/shared/model_topo_bathy.f90
+++ b/src/shared/model_topo_bathy.f90
@@ -157,7 +157,7 @@ subroutine read_topo_bathy_file(ibathy_topo)
         !ival = ishftc(ival, 8, 16)
         ! work-around
         itmp = ival
-        ival = ishftc(itmp, 8, 16)
+        ival = int(ishftc(itmp, 8, 16),kind=2)
 
         ! stores in array
         ibathy_topo(itopo_x,itopo_y) = ival
diff --git a/src/shared/read_compute_parameters.f90 b/src/shared/read_compute_parameters.f90
index 2395ea540..9f2efe0a9 100644
--- a/src/shared/read_compute_parameters.f90
+++ b/src/shared/read_compute_parameters.f90
@@ -646,7 +646,7 @@ subroutine rcp_SIEM_set_mesh_parameters()
     endif
 
     nspec1layer = NSPEC2D_TOP(iregion0)
-    nspec1d = sqrt(real(nspec1layer))
+    nspec1d = int(sqrt(real(nspec1layer)))
 
     ! checks if nspec2d top is squared
     if (nspec1d*nspec1d /= nspec1layer) then
diff --git a/src/specfem3D/compute_add_sources.f90 b/src/specfem3D/compute_add_sources.f90
index 4f3dfb825..c71349ebf 100644
--- a/src/specfem3D/compute_add_sources.f90
+++ b/src/specfem3D/compute_add_sources.f90
@@ -214,7 +214,9 @@ subroutine compute_add_sources_adjoint()
           do i = 1,NGLLX
             iglob = ibool_crust_mantle(i,j,k,ispec)
 
-            hlagrange = hxir_adjstore(i,irec_local) * hetar_adjstore(j,irec_local) * hgammar_adjstore(k,irec_local)
+            hlagrange = real(hxir_adjstore(i,irec_local) &
+                           * hetar_adjstore(j,irec_local) &
+                           * hgammar_adjstore(k,irec_local),kind=CUSTOM_REAL)
 
             ! adds adjoint source acting at this time step (it):
             !
diff --git a/src/specfem3D/compute_adj_source_frechet.f90 b/src/specfem3D/compute_adj_source_frechet.f90
index e54a6589f..6450610f4 100644
--- a/src/specfem3D/compute_adj_source_frechet.f90
+++ b/src/specfem3D/compute_adj_source_frechet.f90
@@ -143,8 +143,8 @@ subroutine compute_adj_source_frechet(displ_s,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
         eps_array(:,:,i,j,k) = eps(:,:)
 
         ! Mjk eps_jk
-        eps_m_array(i,j,k) = Mxx * eps(1,1) + Myy * eps(2,2) + Mzz * eps(3,3) + &
-                   2 * (Mxy * eps(1,2) + Mxz * eps(1,3) + Myz * eps(2,3))
+        eps_m_array(i,j,k) = real(Mxx * eps(1,1) + Myy * eps(2,2) + Mzz * eps(3,3) + &
+                                  2 * (Mxy * eps( 1,2) + Mxz * eps(1,3) + Myz * eps(2,3)),kind=CUSTOM_REAL)
 
       enddo
     enddo
@@ -162,7 +162,7 @@ subroutine compute_adj_source_frechet(displ_s,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
     do j = 1,NGLLY
       do i = 1,NGLLX
 
-        hlagrange = hxir(i)*hetar(j)*hgammar(k)
+        hlagrange = real(hxir(i)*hetar(j)*hgammar(k),kind=CUSTOM_REAL)
 
         eps_s(1,1) = eps_s(1,1) + eps_array(1,1,i,j,k)*hlagrange
         eps_s(1,2) = eps_s(1,2) + eps_array(1,2,i,j,k)*hlagrange
@@ -197,9 +197,9 @@ subroutine compute_adj_source_frechet(displ_s,Mxx,Myy,Mzz,Mxy,Mxz,Myz, &
     do j = 1,NGLLY
       do i = 1,NGLLX
 
-        hlagrange_xi = hpxir(i)*hetar(j)*hgammar(k)
-        hlagrange_eta = hxir(i)*hpetar(j)*hgammar(k)
-        hlagrange_gamma = hxir(i)*hetar(j)*hpgammar(k)
+        hlagrange_xi = real(hpxir(i)*hetar(j)*hgammar(k),kind=CUSTOM_REAL)
+        hlagrange_eta = real(hxir(i)*hpetar(j)*hgammar(k),kind=CUSTOM_REAL)
+        hlagrange_gamma = real(hxir(i)*hetar(j)*hpgammar(k),kind=CUSTOM_REAL)
 
         eps_m_l_s(1) = eps_m_l_s(1) +  eps_m_array(i,j,k) * (hlagrange_xi * xix_s &
                    + hlagrange_eta * etax_s + hlagrange_gamma * gammax_s)
diff --git a/src/specfem3D/compute_arrays_source.f90 b/src/specfem3D/compute_arrays_source.f90
index 7fdb75843..2d395cc4a 100644
--- a/src/specfem3D/compute_arrays_source.f90
+++ b/src/specfem3D/compute_arrays_source.f90
@@ -365,7 +365,7 @@ subroutine compute_arrays_source_adjoint(adj_source_file,nu,source_adjoint, &
   endif
 
   ! non-dimensionalize
-  adj_src(:,:) = adj_src(:,:) * scale_displ_inv
+  adj_src(:,:) = real(adj_src(:,:) * scale_displ_inv,kind=CUSTOM_REAL)
 
   ! rotates to Cartesian
   do itime = 1, NSTEP_BLOCK
@@ -379,7 +379,7 @@ subroutine compute_arrays_source_adjoint(adj_source_file,nu,source_adjoint, &
 
   ! stores rotated adjoint source
   do icomp = 1, NDIM
-    source_adjoint(icomp,1:NSTEP_BLOCK) = adj_src_u(icomp,1:NSTEP_BLOCK)
+    source_adjoint(icomp,1:NSTEP_BLOCK) = real(adj_src_u(icomp,1:NSTEP_BLOCK),kind=CUSTOM_REAL)
   enddo
 
   ! free temporary arrays
diff --git a/src/specfem3D/compute_coupling.f90 b/src/specfem3D/compute_coupling.f90
index 902e84a2b..76fa101d0 100644
--- a/src/specfem3D/compute_coupling.f90
+++ b/src/specfem3D/compute_coupling.f90
@@ -703,39 +703,41 @@ subroutine compute_coupling_CMB_ICB_fluid(NGLOB_OC,accel_outer_core,wgllwgll_xy,
 
   implicit none
 
-  integer :: NGLOB_OC
-  real(kind=CUSTOM_REAL), dimension(NGLOB_OC) :: accel_outer_core
+  integer, intent(in) :: NGLOB_OC
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OC), intent(in) :: accel_outer_core
 
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY), intent(in) :: wgllwgll_xy
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE), intent(in) :: ibool_outer_core
 
   ! crust/mantel
-  integer :: NGLOB_CM
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM) :: displ_crust_mantle,accel_crust_mantle
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-  integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
+  integer, intent(in) :: NGLOB_CM
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM), intent(in) :: displ_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM), intent(inout) :: accel_crust_mantle
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE), intent(in) :: ibool_crust_mantle
+  integer, dimension(NSPEC2D_BOTTOM_CM), intent(in) :: ibelm_bottom_crust_mantle
 
   ! inner core
-  integer :: NGLOB_IC
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC) :: displ_inner_core,accel_inner_core
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-  integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
+  integer, intent(in) :: NGLOB_IC
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC), intent(in) :: displ_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC), intent(inout) :: accel_inner_core
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
+  integer, dimension(NSPEC2D_TOP_IC), intent(in) :: ibelm_top_inner_core
 
   ! outer core
-  double precision :: RHO_TOP_OC
-  real(kind=CUSTOM_REAL) :: minus_g_cmb
-  double precision :: RHO_BOTTOM_OC
-  real(kind=CUSTOM_REAL) :: minus_g_icb
+  double precision, intent(in) :: RHO_TOP_OC
+  real(kind=CUSTOM_REAL), intent(in) :: minus_g_cmb
+  double precision, intent(in) :: RHO_BOTTOM_OC
+  real(kind=CUSTOM_REAL), intent(in) :: minus_g_icb
 
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC), intent(in) :: normal_top_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC), intent(in) :: jacobian2D_top_outer_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC), intent(in) :: normal_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC), intent(in) :: jacobian2D_bottom_outer_core
 
-  integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
-  integer :: nspec_bottom
-  integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
-  integer :: nspec2D_top
+  integer, dimension(NSPEC2D_TOP_OC), intent(in) :: ibelm_top_outer_core
+  integer, intent(in) :: nspec_bottom
+  integer, dimension(NSPEC2D_BOTTOM_OC), intent(in) :: ibelm_bottom_outer_core
+  integer, intent(in) :: nspec2D_top
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
@@ -777,12 +779,12 @@ subroutine compute_coupling_CMB_ICB_fluid(NGLOB_OC,accel_outer_core,wgllwgll_xy,
 
         ! compute pressure, taking gravity into account
         if (GRAVITY_VAL) then
-          pressure = RHO_TOP_OC * (- accel_outer_core(iglob) &
+          pressure = real(RHO_TOP_OC,kind=CUSTOM_REAL) * (- accel_outer_core(iglob) &
              + minus_g_cmb *(displ_crust_mantle(1,iglob_mantle)*nx &
                             + displ_crust_mantle(2,iglob_mantle)*ny &
                             + displ_crust_mantle(3,iglob_mantle)*nz))
         else
-          pressure = - RHO_TOP_OC * accel_outer_core(iglob)
+          pressure = - real(RHO_TOP_OC,kind=CUSTOM_REAL) * accel_outer_core(iglob)
         endif
 
         ! formulation with generalized potential
@@ -824,12 +826,12 @@ subroutine compute_coupling_CMB_ICB_fluid(NGLOB_OC,accel_outer_core,wgllwgll_xy,
 
         ! compute pressure, taking gravity into account
         if (GRAVITY_VAL) then
-          pressure = RHO_BOTTOM_OC * (- accel_outer_core(iglob) &
+          pressure = real(RHO_BOTTOM_OC,kind=CUSTOM_REAL) * (- accel_outer_core(iglob) &
              + minus_g_icb *(displ_inner_core(1,iglob_inner_core)*nx &
                              + displ_inner_core(2,iglob_inner_core)*ny &
                              + displ_inner_core(3,iglob_inner_core)*nz))
         else
-          pressure = - RHO_BOTTOM_OC * accel_outer_core(iglob)
+          pressure = - real(RHO_BOTTOM_OC,kind=CUSTOM_REAL) * accel_outer_core(iglob)
         endif
 
         ! formulation with generalized potential
@@ -867,26 +869,27 @@ subroutine compute_coupling_CMB_fluid(NGLOB_CM,displ_crust_mantle,accel_crust_ma
 
   implicit none
 
-  integer :: NGLOB_CM
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM) :: displ_crust_mantle,accel_crust_mantle
+  integer, intent(in) :: NGLOB_CM
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM), intent(in) :: displ_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM), intent(inout) :: accel_crust_mantle
 
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-  integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE), intent(in) :: ibool_crust_mantle
+  integer, dimension(NSPEC2D_BOTTOM_CM), intent(in) :: ibelm_bottom_crust_mantle
 
-  integer :: NGLOB_OC
-  real(kind=CUSTOM_REAL), dimension(NGLOB_OC) :: accel_outer_core
+  integer, intent(in) :: NGLOB_OC
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OC), intent(in) :: accel_outer_core
 
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC), intent(in) :: normal_top_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC), intent(in) :: jacobian2D_top_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY), intent(in) :: wgllwgll_xy
 
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
-  integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE), intent(in) :: ibool_outer_core
+  integer, dimension(NSPEC2D_TOP_OC), intent(in) :: ibelm_top_outer_core
 
-  double precision :: RHO_TOP_OC
-  real(kind=CUSTOM_REAL) :: minus_g_cmb
+  double precision, intent(in) :: RHO_TOP_OC
+  real(kind=CUSTOM_REAL), intent(in) :: minus_g_cmb
 
-  integer :: nspec_bottom
+  integer, intent(in) :: nspec_bottom
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
@@ -928,12 +931,12 @@ subroutine compute_coupling_CMB_fluid(NGLOB_CM,displ_crust_mantle,accel_crust_ma
 
         ! compute pressure, taking gravity into account
         if (GRAVITY_VAL) then
-          pressure = RHO_TOP_OC * (- accel_outer_core(iglob) &
+          pressure = real(RHO_TOP_OC,kind=CUSTOM_REAL) * (- accel_outer_core(iglob) &
              + minus_g_cmb *(displ_crust_mantle(1,iglob_mantle)*nx &
                             + displ_crust_mantle(2,iglob_mantle)*ny &
                             + displ_crust_mantle(3,iglob_mantle)*nz))
         else
-          pressure = - RHO_TOP_OC * accel_outer_core(iglob)
+          pressure = - real(RHO_TOP_OC,kind=CUSTOM_REAL) * accel_outer_core(iglob)
         endif
 
         ! formulation with generalized potential
@@ -970,26 +973,27 @@ subroutine compute_coupling_ICB_fluid(NGLOB_IC,displ_inner_core,accel_inner_core
 
   implicit none
 
-  integer :: NGLOB_IC
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC) :: displ_inner_core,accel_inner_core
+  integer, intent(in) :: NGLOB_IC
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC), intent(in) :: displ_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC), intent(inout) :: accel_inner_core
 
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-  integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
+  integer, dimension(NSPEC2D_TOP_IC), intent(in) :: ibelm_top_inner_core
 
   integer :: NGLOB_OC
-  real(kind=CUSTOM_REAL), dimension(NGLOB_OC) :: accel_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OC), intent(in) :: accel_outer_core
 
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC), intent(in) :: normal_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC), intent(in) :: jacobian2D_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY), intent(in) :: wgllwgll_xy
 
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
-  integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE), intent(in) :: ibool_outer_core
+  integer, dimension(NSPEC2D_BOTTOM_OC), intent(in) :: ibelm_bottom_outer_core
 
-  double precision :: RHO_BOTTOM_OC
-  real(kind=CUSTOM_REAL) :: minus_g_icb
+  double precision, intent(in) :: RHO_BOTTOM_OC
+  real(kind=CUSTOM_REAL), intent(in) :: minus_g_icb
 
-  integer :: nspec2D_top
+  integer, intent(in) :: nspec2D_top
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: pressure,nx,ny,nz,weight
@@ -1031,12 +1035,12 @@ subroutine compute_coupling_ICB_fluid(NGLOB_IC,displ_inner_core,accel_inner_core
 
         ! compute pressure, taking gravity into account
         if (GRAVITY_VAL) then
-          pressure = RHO_BOTTOM_OC * (- accel_outer_core(iglob) &
+          pressure = real(RHO_BOTTOM_OC,kind=CUSTOM_REAL) * (- accel_outer_core(iglob) &
              + minus_g_icb *(displ_inner_core(1,iglob_inner_core)*nx &
                              + displ_inner_core(2,iglob_inner_core)*ny &
                              + displ_inner_core(3,iglob_inner_core)*nz))
         else
-          pressure = - RHO_BOTTOM_OC * accel_outer_core(iglob)
+          pressure = - real(RHO_BOTTOM_OC,kind=CUSTOM_REAL) * accel_outer_core(iglob)
         endif
 
         ! formulation with generalized potential
@@ -1068,8 +1072,8 @@ subroutine compute_coupling_ocean(NGLOB,accel_crust_mantle, &
 
   implicit none
 
-  integer,intent(in) :: NGLOB
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(inout) :: accel_crust_mantle
+  integer, intent(in) :: NGLOB
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB), intent(inout) :: accel_crust_mantle
 
   ! mass matrices
   !
@@ -1079,15 +1083,15 @@ subroutine compute_coupling_ocean(NGLOB,accel_crust_mantle, &
   !
   ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
   ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be pointers to it
-  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE),intent(in) :: rmassx_crust_mantle
-  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE),intent(in) :: rmassy_crust_mantle
-  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE),intent(in) :: rmassz_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE), intent(in) :: rmassx_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE), intent(in) :: rmassy_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE), intent(in) :: rmassz_crust_mantle
 
   ! oceans arrays
-  integer,intent(in) :: npoin_oceans
-  real(kind=CUSTOM_REAL), dimension(npoin_oceans) :: rmass_ocean_load_selected
-  real(kind=CUSTOM_REAL), dimension(NDIM,npoin_oceans) :: normal_ocean_load
-  integer, dimension(npoin_oceans) :: ibool_ocean_load
+  integer, intent(in) :: npoin_oceans
+  real(kind=CUSTOM_REAL), dimension(npoin_oceans), intent(in) :: rmass_ocean_load_selected
+  real(kind=CUSTOM_REAL), dimension(NDIM,npoin_oceans), intent(in) :: normal_ocean_load
+  integer, dimension(npoin_oceans), intent(in) :: ibool_ocean_load
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: force_normal_comp,rmass
diff --git a/src/specfem3D/compute_seismograms.F90 b/src/specfem3D/compute_seismograms.F90
index 2a8adec7e..24bf236f8 100644
--- a/src/specfem3D/compute_seismograms.F90
+++ b/src/specfem3D/compute_seismograms.F90
@@ -297,8 +297,9 @@ subroutine compute_seismograms_adjoint(displ_crust_mantle, &
     sloc_der(:,irec_local) = sloc_der(:,irec_local) + eps_m_l_s(:) * stf_deltat
 
     ! derivatives for time shift and hduration
-    Kp_deltat= -1.0d0/sqrt(PI)/hdur_Gaussian(irec) * exp(-(timeval/hdur_Gaussian(irec))**2) * deltat * scale_t
-    Hp_deltat= timeval/hdur_Gaussian(irec) * Kp_deltat
+    Kp_deltat = real(-1.0d0/sqrt(PI)/hdur_Gaussian(irec) &
+                     * exp(-(timeval/hdur_Gaussian(irec))**2) * deltat * scale_t,kind=CUSTOM_REAL)
+    Hp_deltat = real(timeval/hdur_Gaussian(irec) * Kp_deltat,kind=CUSTOM_REAL)
 
     stshift_der(irec_local) = stshift_der(irec_local) + eps_m_s * Kp_deltat
     shdur_der(irec_local) = shdur_der(irec_local) + eps_m_s * Hp_deltat
@@ -405,6 +406,8 @@ subroutine compute_seismograms_strain(nglob,displ)
   real(kind=CUSTOM_REAL) :: hlagrange
 
   real(kind=CUSTOM_REAL),dimension(NDIM,NDIM) :: eps_loc,eps_loc_new
+  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM) :: eps_loc_rot
+
   real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ) :: eps_array
   real(kind=CUSTOM_REAL),dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: displ_elem
 
@@ -517,7 +520,7 @@ subroutine compute_seismograms_strain(nglob,displ)
     do k = 1,NGLLZ
       do j = 1,NGLLY
         do i = 1,NGLLX
-          hlagrange = hxir(i)*hetar(j)*hgammar(k)
+          hlagrange = real(hxir(i)*hetar(j)*hgammar(k),kind=CUSTOM_REAL)
           eps_loc(1,1) = eps_loc(1,1) + eps_array(1,1,i,j,k)*hlagrange
           eps_loc(1,2) = eps_loc(1,2) + eps_array(1,2,i,j,k)*hlagrange
           eps_loc(1,3) = eps_loc(1,3) + eps_array(1,3,i,j,k)*hlagrange
@@ -550,7 +553,8 @@ subroutine compute_seismograms_strain(nglob,displ)
     ! eps_xy -> eps_ne
     ! eps_xz -> eps_nz
     ! eps_yz -> eps_ez
-    eps_loc_new(:,:) = matmul(matmul(nu_rec(:,:,irec),eps_loc(:,:)), transpose(nu_rec(:,:,irec)))
+    eps_loc_rot(:,:) = real(matmul(nu_rec(:,:,irec),eps_loc(:,:)),kind=CUSTOM_REAL)
+    eps_loc_new(:,:) = real(matmul(eps_loc_rot(:,:), transpose(nu_rec(:,:,irec))),kind=CUSTOM_REAL)
 
     ! distinguish between single and double precision for reals
     !
diff --git a/src/specfem3D/define_derivation_matrices.f90 b/src/specfem3D/define_derivation_matrices.f90
index e0582283b..ea88d98b0 100644
--- a/src/specfem3D/define_derivation_matrices.f90
+++ b/src/specfem3D/define_derivation_matrices.f90
@@ -106,7 +106,7 @@ subroutine define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
   do i = 1,NGLLX
     do j = 1,NGLLY
       do k = 1,NGLLZ
-        wgll_cube(i,j,k) = wxgll(i)*wygll(j)*wzgll(k)
+        wgll_cube(i,j,k) = real(wxgll(i)*wygll(j)*wzgll(k), kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
diff --git a/src/specfem3D/locate_regular_points.f90 b/src/specfem3D/locate_regular_points.f90
index 23d803358..703479165 100644
--- a/src/specfem3D/locate_regular_points.f90
+++ b/src/specfem3D/locate_regular_points.f90
@@ -142,7 +142,7 @@ subroutine find_regular_grid_slice_number(slice_number, GRID)
     call exit_MPI(myrank, 'Only deal with 6 chunks at this moment')
   endif
 
-  xi_width = PI/2; eta_width = PI/2
+  xi_width = real(PI/2.d0,kind=CUSTOM_REAL); eta_width = real(PI/2.d0,kind=CUSTOM_REAL)
   nproc = NPROC_XI_VAL
   ilayer = 0
 
@@ -153,9 +153,9 @@ subroutine find_regular_grid_slice_number(slice_number, GRID)
 
     ! (lat,lon,radius) for isp point
     lat = KL_REG_MIN_LAT + ilat * GRID%dlat * GRID%ndoubling(ilayer)
-    th = (90 - lat) * DEGREES_TO_RADIANS
+    th = real((90 - lat) * DEGREES_TO_RADIANS)
     lon = KL_REG_MIN_LON + (ilon - 1) * GRID%dlon * GRID%ndoubling(ilayer)
-    ph = lon * DEGREES_TO_RADIANS
+    ph = real(lon * DEGREES_TO_RADIANS)
     x = sin(th) * cos(ph); y = sin(th) * sin(ph); z = cos(th)
 
     ! figure out slice number
@@ -378,9 +378,9 @@ subroutine locate_regular_points(npoints_slice_reg,points_slice_reg,GRID, &
     call lagrange_any2(eta, NGLLY, yigll, hetar)
     call lagrange_any2(gamma, NGLLZ, zigll, hgammar)
 
-    hxir_reg(:,ipoint) = hxir(:)
-    hetar_reg(:,ipoint) = hetar(:)
-    hgammar_reg(:,ipoint) = hgammar(:)
+    hxir_reg(:,ipoint) = real(hxir(:),kind=CUSTOM_REAL)
+    hetar_reg(:,ipoint) = real(hetar(:),kind=CUSTOM_REAL)
+    hgammar_reg(:,ipoint) = real(hgammar(:),kind=CUSTOM_REAL)
 
   enddo ! ipoint
 
diff --git a/src/specfem3D/locate_sources.f90 b/src/specfem3D/locate_sources.f90
index 5442ec625..bb94ee226 100644
--- a/src/specfem3D/locate_sources.f90
+++ b/src/specfem3D/locate_sources.f90
@@ -880,7 +880,7 @@ subroutine calc_mask_source(mask_source,x_target,y_target,z_target)
           ! adds Gaussian function value to mask
           ! (mask value becomes 0 closer to source location, 1 everywhere else )
           mask_source(i,j,k,ispec) = mask_source(i,j,k,ispec) &
-                    * ( 1.0_CUSTOM_REAL - exp( - dist_squared / sigma_squared ) )
+                    * ( 1.0_CUSTOM_REAL - real(exp( - dist_squared / sigma_squared ),kind=CUSTOM_REAL) )
         enddo
       enddo
     enddo
diff --git a/src/specfem3D/noise_tomography.f90 b/src/specfem3D/noise_tomography.f90
index 4a53b5ad8..c55b18b52 100644
--- a/src/specfem3D/noise_tomography.f90
+++ b/src/specfem3D/noise_tomography.f90
@@ -619,9 +619,9 @@ subroutine compute_arrays_source_noise(xi_noise,eta_noise,gamma_noise,nu_single,
 
   ! rotates to Cartesian
   do itime = 1, NSTEP
-    noise_src_u(:,itime) = nu_single(1,:) * noise_src(itime) * nu_main(1) &
-                         + nu_single(2,:) * noise_src(itime) * nu_main(2) &
-                         + nu_single(3,:) * noise_src(itime) * nu_main(3)
+    noise_src_u(:,itime) = real(nu_single(1,:) * noise_src(itime) * nu_main(1) &
+                              + nu_single(2,:) * noise_src(itime) * nu_main(2) &
+                              + nu_single(3,:) * noise_src(itime) * nu_main(3),kind=CUSTOM_REAL)
   enddo
 
   ! receiver interpolators
@@ -634,7 +634,7 @@ subroutine compute_arrays_source_noise(xi_noise,eta_noise,gamma_noise,nu_single,
     do j = 1, NGLLY
       do i = 1, NGLLX
         do itime = 1, NSTEP
-          noise_sourcearray(:,i,j,k,itime) = hxir(i) * hetar(j) * hgammar(k) * noise_src_u(:,itime)
+          noise_sourcearray(:,i,j,k,itime) = real(hxir(i) * hetar(j) * hgammar(k),kind=CUSTOM_REAL) * noise_src_u(:,itime)
         enddo
       enddo
     enddo
@@ -1175,7 +1175,7 @@ subroutine save_kernels_strength_noise()
   real(kind=CUSTOM_REAL) :: scale_kl
 
   ! scaling factor for kernel units [ s / km^3 ]
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
 
   sigma_kl_crust_mantle(:,:,:,:) = sigma_kl_crust_mantle(:,:,:,:) * scale_kl
 
diff --git a/src/specfem3D/prepare_attenuation.f90 b/src/specfem3D/prepare_attenuation.f90
index ddfe179ab..12a1ae1bd 100644
--- a/src/specfem3D/prepare_attenuation.f90
+++ b/src/specfem3D/prepare_attenuation.f90
@@ -277,10 +277,10 @@ subroutine prepare_attenuation()
             L_dble = 0.5d0 * (d44 + d55)
             F_dble = 0.5d0 * (d13 + d23)
 
-            eta_aniso = F_dble / (A_dble - 2.d0*L_dble)   ! eta = F / (A-2L)
+            eta_aniso = real(F_dble / (A_dble - 2.d0*L_dble),kind=CUSTOM_REAL)   ! eta = F / (A-2L)
 
-            muvl = L_dble * scale_factor_minus_one     ! c44 - > L - > muv
-            muhl = N_dble * scale_factor_minus_one     ! c66 - > N - > muh
+            muvl = real(L_dble * scale_factor_minus_one,kind=CUSTOM_REAL)     ! c44 - > L - > muv
+            muhl = real(N_dble * scale_factor_minus_one,kind=CUSTOM_REAL)     ! c66 - > N - > muh
 
             d11 = d11 + FOUR_THIRDS * muhl ! * minus_sum_beta * mul
             d12 = d12 - TWO_THIRDS * muhl
@@ -327,7 +327,7 @@ subroutine prepare_attenuation()
             c66store_crust_mantle(i,j,k,ispec) = real(g66,kind=CUSTOM_REAL)
 
             ! for attenuation
-            muvstore_crust_mantle(i,j,k,ispec) = L_dble * scale_factor
+            muvstore_crust_mantle(i,j,k,ispec) = real(L_dble * scale_factor,kind=CUSTOM_REAL)
 
           else
             ! isotropic or transverse isotropic element
@@ -336,11 +336,11 @@ subroutine prepare_attenuation()
               muvstore_crust_mantle_3dmovie(i,j,k,ispec) = muvstore_crust_mantle(i,j,k,ispec)
             endif
 
-            muvstore_crust_mantle(i,j,k,ispec) = muvstore_crust_mantle(i,j,k,ispec) * scale_factor
+            muvstore_crust_mantle(i,j,k,ispec) = real(muvstore_crust_mantle(i,j,k,ispec) * scale_factor,kind=CUSTOM_REAL)
 
             ! scales transverse isotropic values for mu_h
             if (ispec_is_tiso_crust_mantle(ispec)) then
-              muhstore_crust_mantle(i,j,k,ispec) = muhstore_crust_mantle(i,j,k,ispec) * scale_factor
+              muhstore_crust_mantle(i,j,k,ispec) = real(muhstore_crust_mantle(i,j,k,ispec) * scale_factor,kind=CUSTOM_REAL)
             endif
           endif
 
@@ -369,19 +369,19 @@ subroutine prepare_attenuation()
 
             mul = c44store_inner_core(i,j,k,ispec)
             c11store_inner_core(i,j,k,ispec) = c11store_inner_core(i,j,k,ispec) &
-                    + FOUR_THIRDS * scale_factor_minus_one * mul
+                    + real(FOUR_THIRDS * scale_factor_minus_one * mul,kind=CUSTOM_REAL)
             c12store_inner_core(i,j,k,ispec) = c12store_inner_core(i,j,k,ispec) &
-                    - TWO_THIRDS * scale_factor_minus_one * mul
+                    - real(TWO_THIRDS * scale_factor_minus_one * mul,kind=CUSTOM_REAL)
             c13store_inner_core(i,j,k,ispec) = c13store_inner_core(i,j,k,ispec) &
-                    - TWO_THIRDS * scale_factor_minus_one * mul
+                    - real(TWO_THIRDS * scale_factor_minus_one * mul,kind=CUSTOM_REAL)
             c33store_inner_core(i,j,k,ispec) = c33store_inner_core(i,j,k,ispec) &
-                    + FOUR_THIRDS * scale_factor_minus_one * mul
+                    + real(FOUR_THIRDS * scale_factor_minus_one * mul,kind=CUSTOM_REAL)
             c44store_inner_core(i,j,k,ispec) = c44store_inner_core(i,j,k,ispec) &
-                    + scale_factor_minus_one * mul
+                    + real(scale_factor_minus_one * mul,kind=CUSTOM_REAL)
             ! for attenuation
-            muvstore_inner_core(i,j,k,ispec) = mul * scale_factor
+            muvstore_inner_core(i,j,k,ispec) = real(mul * scale_factor,kind=CUSTOM_REAL)
           else
-            muvstore_inner_core(i,j,k,ispec) = muvstore_inner_core(i,j,k,ispec) * scale_factor
+            muvstore_inner_core(i,j,k,ispec) = real(muvstore_inner_core(i,j,k,ispec) * scale_factor,kind=CUSTOM_REAL)
           endif
         enddo
       enddo
diff --git a/src/specfem3D/prepare_elastic_elements.F90 b/src/specfem3D/prepare_elastic_elements.F90
index e372b1642..f9a6baa57 100644
--- a/src/specfem3D/prepare_elastic_elements.F90
+++ b/src/specfem3D/prepare_elastic_elements.F90
@@ -284,10 +284,10 @@ subroutine prepare_elastic_elements()
           L_dble = 0.5d0 * (d44 + d55)
           F_dble = 0.5d0 * (d13 + d23)
 
-          eta_aniso = F_dble / (A_dble - 2.d0*L_dble)   ! eta = F / (A-2L)
+          eta_aniso = real(F_dble / (A_dble - 2.d0*L_dble),kind=CUSTOM_REAL)   ! eta = F / (A-2L)
 
-          muvl = L_dble * minus_sum_beta     ! c44 - > L - > muv
-          muhl = N_dble * minus_sum_beta     ! c66 - > N - > muh
+          muvl = real(L_dble * minus_sum_beta,kind=CUSTOM_REAL)     ! c44 - > L - > muv
+          muhl = real(N_dble * minus_sum_beta,kind=CUSTOM_REAL)     ! c66 - > N - > muh
 
           d11 = d11 + FOUR_THIRDS * muhl ! * minus_sum_beta * mul
           d12 = d12 - TWO_THIRDS * muhl
diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90
index 15a2558fc..869e57120 100644
--- a/src/specfem3D/prepare_timerun.F90
+++ b/src/specfem3D/prepare_timerun.F90
@@ -595,8 +595,8 @@ subroutine prepare_timerun_constants()
 
   ! distinguish between single and double precision for reals
   deltat = real(DT*scale_t_inv, kind=CUSTOM_REAL)
-  deltatover2 = 0.5d0*deltat
-  deltatsqover2 = 0.5d0*deltat*deltat
+  deltatover2 = real(0.5d0*deltat, kind=CUSTOM_REAL)
+  deltatsqover2 = real(0.5d0*deltat*deltat, kind=CUSTOM_REAL)
 
   if (SIMULATION_TYPE == 3) then
     if (UNDO_ATTENUATION) then
@@ -607,8 +607,8 @@ subroutine prepare_timerun_constants()
     else
       ! reconstructed wavefield moves backward in time from last snapshot
       b_deltat = - real(DT*scale_t_inv, kind=CUSTOM_REAL)
-      b_deltatover2 = 0.5d0*b_deltat
-      b_deltatsqover2 = 0.5d0*b_deltat*b_deltat
+      b_deltatover2 = real(0.5d0*b_deltat, kind=CUSTOM_REAL)
+      b_deltatsqover2 = real(0.5d0*b_deltat*b_deltat, kind=CUSTOM_REAL)
     endif
   else
     ! will not be used, but initialized
@@ -784,9 +784,9 @@ subroutine prepare_simultaneous_event_execution_shift_undoatt()
     ! determines time shift (in millisec) depending on group number
     if (estimated_io_time_in_millisec > 5000) then
       ! limits shifts to 5s
-      millisec_shift = 5000.d0 * mygroup
+      millisec_shift = int(5000.d0 * mygroup)
     else
-      millisec_shift = estimated_io_time_in_millisec * mygroup
+      millisec_shift = int(estimated_io_time_in_millisec * mygroup)
     endif
 
     ! user output
diff --git a/src/specfem3D/save_kernels.F90 b/src/specfem3D/save_kernels.F90
index fa2890f0a..72772ef60 100644
--- a/src/specfem3D/save_kernels.F90
+++ b/src/specfem3D/save_kernels.F90
@@ -491,7 +491,7 @@ subroutine restore_unshifted_reference_moduli()
 
       if (ANISOTROPIC_3D_MANTLE_VAL) then
         ! anisotropic element
-        scale_factor_minus_one = scale_factor - 1.d0
+        scale_factor_minus_one = scale_factor - 1.0_CUSTOM_REAL
 
         ! shifting: (in prepare_attenuation.f90)
 
@@ -700,7 +700,7 @@ subroutine restore_unshifted_reference_moduli()
         !
         !       however, to properly account for shear attenuation, one might have to add also
         !       memory-variables for a modulus defect associated with muh.
-        muvstore_crust_mantle(INDEX_IJK,ispec) = L_dble
+        muvstore_crust_mantle(INDEX_IJK,ispec) = real(L_dble,kind=CUSTOM_REAL)
 
       else
         ! isotropic or transverse isotropic element
@@ -730,7 +730,7 @@ subroutine restore_unshifted_reference_moduli()
         if (abs(scale_factor) < TINYVAL) cycle
 
         if (ANISOTROPIC_INNER_CORE_VAL) then
-          scale_factor_minus_one = scale_factor - 1.d0
+          scale_factor_minus_one = scale_factor - 1.0_CUSTOM_REAL
 
           mul = c44store_inner_core(INDEX_IJK,ispec) / scale_factor
           c44store_inner_core(INDEX_IJK,ispec) = mul
@@ -841,15 +841,15 @@ subroutine save_kernels_crust_mantle_ani()
   !                  with the intent to dimensionalize kernel values to [ s km^(-3) ]
   !
   ! kernel unit [ s / km^3 ]
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
   ! For anisotropic kernels
   ! final unit : [s km^(-3) GPa^(-1)]
-  scale_kl_ani = scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18
+  scale_kl_ani = real(scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18,kind=CUSTOM_REAL)
   ! final unit : [s km^(-3) (kg/m^3)^(-1)]
-  scale_kl_rho = scale_t * scale_displ_inv / RHOAV * 1.d9
+  scale_kl_rho = real(scale_t * scale_displ_inv / RHOAV * 1.d9,kind=CUSTOM_REAL)
   ! the scale of GPa--[g/cm^3][(km/s)^2]
-  scaleval = dsqrt(PI*GRAV*RHOAV)
-  scale_GPa = (RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2)
+  scaleval = real(sqrt(PI*GRAV*RHOAV),kind=CUSTOM_REAL)
+  scale_GPa = real((RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2),kind=CUSTOM_REAL)
 
   ! debug
   !if (myrank == 0) print *,'debug: save kernels: scaling factors',scale_kl,scale_kl_ani,scale_kl_rho
@@ -1575,12 +1575,12 @@ subroutine save_kernels_crust_mantle_iso()
   !                  with the intent to dimensionalize kernel values to [ s km^(-3) ]
   !
   ! kernel unit [ s / km^3 ]
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
   ! For anisotropic kernels
   ! final unit : [s km^(-3) GPa^(-1)]
-  scale_kl_ani = scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18
+  scale_kl_ani = real(scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18,kind=CUSTOM_REAL)
   ! final unit : [s km^(-3) (kg/m^3)^(-1)]
-  scale_kl_rho = scale_t * scale_displ_inv / RHOAV * 1.d9
+  scale_kl_rho = real(scale_t * scale_displ_inv / RHOAV * 1.d9,kind=CUSTOM_REAL)
 
   ! isotropic kernels
   !
@@ -1716,7 +1716,7 @@ subroutine save_kernels_outer_core(rhostore_outer_core,kappavstore_outer_core,rh
   ! saftey check
   if (.not. SAVE_KERNELS_OC) return
 
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
 
   ! outer_core
   do ispec = 1, NSPEC_OUTER_CORE_ADJOINT
@@ -1785,7 +1785,7 @@ subroutine save_kernels_inner_core(rhostore_inner_core,muvstore_inner_core,kappa
   if (.not. SAVE_KERNELS_IC) return
 
   ! scaling to units
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
 
   ! inner_core
   do ispec = 1, NSPEC_INNER_CORE_ADJOINT
@@ -1846,15 +1846,15 @@ subroutine save_kernels_boundary_kl()
   if (.not. SAVE_KERNELS_BOUNDARY) return
 
   ! kernel unit [ s / km^3 ]
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
 
   ! scale the boundary kernels properly: *scale_kl gives s/km^3 and 1.d3 gives
   ! the relative boundary kernels (for every 1 km) in s/km^2
-  moho_kl(:,:,:) = moho_kl(:,:,:) * scale_kl * 1.d3
-  d400_kl(:,:,:) = d400_kl(:,:,:) * scale_kl * 1.d3
-  d670_kl(:,:,:) = d670_kl(:,:,:) * scale_kl * 1.d3
-  cmb_kl(:,:,:) = cmb_kl(:,:,:) * scale_kl * 1.d3
-  icb_kl(:,:,:) = icb_kl(:,:,:) * scale_kl * 1.d3
+  moho_kl(:,:,:) = moho_kl(:,:,:) * real(scale_kl * 1.d3,kind=CUSTOM_REAL)
+  d400_kl(:,:,:) = d400_kl(:,:,:) * real(scale_kl * 1.d3,kind=CUSTOM_REAL)
+  d670_kl(:,:,:) = d670_kl(:,:,:) * real(scale_kl * 1.d3,kind=CUSTOM_REAL)
+  cmb_kl(:,:,:) = cmb_kl(:,:,:) * real(scale_kl * 1.d3,kind=CUSTOM_REAL)
+  icb_kl(:,:,:) = icb_kl(:,:,:) * real(scale_kl * 1.d3,kind=CUSTOM_REAL)
 
   ! writes out kernels to file
   if (ADIOS_FOR_KERNELS) then
@@ -1905,17 +1905,17 @@ subroutine save_kernels_source_derivatives()
   character(len=MAX_STRING_LEN) :: outputname
 
   ! scaling factor
-  scale_mass = RHOAV * (R_EARTH**3)
+  scale_mass = real(RHOAV * (R_EARTH**3),kind=CUSTOM_REAL)
 
   ! computes derivatives
   do irec_local = 1, nrec_local
     ! rotate and scale the location derivatives to correspond to dn,de,dz
-    sloc_der(:,irec_local) = matmul(transpose(nu_source(:,:,irec_local)),sloc_der(:,irec_local)) &
-                             * scale_displ * scale_t
+    sloc_der(:,irec_local) = real(matmul(transpose(nu_source(:,:,irec_local)),sloc_der(:,irec_local)) &
+                                  * scale_displ * scale_t,kind=CUSTOM_REAL)
 
     ! rotate scale the moment derivatives to correspond to M[n,e,z][n,e,z]
-    moment_der(:,:,irec_local) = matmul(matmul(transpose(nu_source(:,:,irec_local)),moment_der(:,:,irec_local)), &
-               nu_source(:,:,irec_local)) * scale_t ** 3 / scale_mass
+    moment_der(:,:,irec_local) = real(matmul(matmul(transpose(nu_source(:,:,irec_local)),moment_der(:,:,irec_local)), &
+               nu_source(:,:,irec_local)) * scale_t ** 3 / scale_mass,kind=CUSTOM_REAL)
 
     ! *nu_source* is the rotation matrix from ECEF to local N-E-UP as defined in src/specfem3D/locate_sources.f90
 
@@ -1934,8 +1934,8 @@ subroutine save_kernels_source_derivatives()
 ! which is in the opposite sense from the transformation of M.
 
     ! derivatives for time shift and hduration
-    stshift_der(irec_local) = stshift_der(irec_local) * scale_displ**2
-    shdur_der(irec_local) = shdur_der(irec_local) * scale_displ**2
+    stshift_der(irec_local) = stshift_der(irec_local) * real(scale_displ**2,kind=CUSTOM_REAL)
+    shdur_der(irec_local) = shdur_der(irec_local) * real(scale_displ**2,kind=CUSTOM_REAL)
   enddo
 
   ! writes out kernels to file
@@ -1990,7 +1990,7 @@ subroutine save_kernels_Hessian()
   real(kind=CUSTOM_REAL) :: scale_kl
 
   ! scaling factors
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
 
   ! scales approximate Hessian
   hess_kl_crust_mantle(:,:,:,:) = 2._CUSTOM_REAL * hess_kl_crust_mantle(:,:,:,:) * scale_kl
diff --git a/src/specfem3D/save_regular_kernels.f90 b/src/specfem3D/save_regular_kernels.f90
index 56bea4dea..d88b4039e 100644
--- a/src/specfem3D/save_regular_kernels.f90
+++ b/src/specfem3D/save_regular_kernels.f90
@@ -46,7 +46,7 @@ subroutine save_regular_kernels_cm()
   real(kind=CUSTOM_REAL) :: alphah_kl,alphav_kl,betah_kl,betav_kl,rhonotprime_kl
   real(kind=CUSTOM_REAL) :: theta,phi
   integer :: ispec,i,j,k,iglob
-  double precision :: hlagrange
+  real(kind=CUSTOM_REAL) :: hlagrange
   integer :: ipoint
 
   ! transverse isotropic parameters
@@ -68,12 +68,12 @@ subroutine save_regular_kernels_cm()
   if (.not. SAVE_REGULAR_KL) return
 
   ! scaling factors
-  scale_kl = scale_t * scale_displ_inv * 1.d9
+  scale_kl = real(scale_t * scale_displ_inv * 1.d9,kind=CUSTOM_REAL)
   ! For anisotropic kernels
   ! final unit : [s km^(-3) GPa^(-1)]
-  scale_kl_ani = scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18
+  scale_kl_ani = real(scale_t**3 / (RHOAV*R_PLANET**3) * 1.d18,kind=CUSTOM_REAL)
   ! final unit : [s km^(-3) (kg/m^3)^(-1)]
-  scale_kl_rho = scale_t * scale_displ_inv / RHOAV * 1.d9
+  scale_kl_rho = real(scale_t * scale_displ_inv / RHOAV * 1.d9,kind=CUSTOM_REAL)
 
   ! allocates temporary arrays
   allocate(rho_kl_crust_mantle_reg(npoints_slice_reg), &
@@ -140,7 +140,7 @@ subroutine save_regular_kernels_cm()
       do j = 1, NGLLY
         do i = 1, NGLLX
 
-          hlagrange = hxir_reg(i,ipoint) * hetar_reg(j,ipoint) * hgammar_reg(k,ipoint)
+          hlagrange = real(hxir_reg(i,ipoint) * hetar_reg(j,ipoint) * hgammar_reg(k,ipoint),kind=CUSTOM_REAL)
 
           if (ANISOTROPIC_KL) then
 
diff --git a/src/specfem3D/setup_sources_receivers.f90 b/src/specfem3D/setup_sources_receivers.f90
index 97256573f..296348d06 100644
--- a/src/specfem3D/setup_sources_receivers.f90
+++ b/src/specfem3D/setup_sources_receivers.f90
@@ -1904,12 +1904,12 @@ subroutine setup_receivers_precompute_intp()
         do k = 1,NGLLZ
           do j = 1,NGLLY
             do i = 1,NGLLX
-              hxi = hxir_adjstore(i,irec_local)
-              heta = hetar_adjstore(j,irec_local)
-              hgamma = hgammar_adjstore(k,irec_local)
+              hxi = real(hxir_adjstore(i,irec_local),kind=CUSTOM_REAL)
+              heta = real(hetar_adjstore(j,irec_local),kind=CUSTOM_REAL)
+              hgamma = real(hgammar_adjstore(k,irec_local),kind=CUSTOM_REAL)
               ! checks if array values valid
               ! Lagrange interpolators shoud be about in a range ~ [-0.2,1.2]
-              if (abs(hxi) > 2.0 .or. abs(heta) > 2.0 .or. abs(hgamma) > 2.0) then
+              if (abs(hxi) > 2.0_CUSTOM_REAL .or. abs(heta) > 2.0_CUSTOM_REAL .or. abs(hgamma) > 2.0_CUSTOM_REAL) then
                 print *,'hxi/heta/hgamma = ',hxi,heta,hgamma,irec_local,i,j,k
                 print *,'ERROR: trying to use arrays hxir_adjstore/hetar_adjstore/hgammar_adjstore with irec_local = ', &
                         irec_local,' but these array values are invalid!'
diff --git a/src/specfem3D/write_movie_surface.f90 b/src/specfem3D/write_movie_surface.f90
index bb37a0e0b..8e70ca87c 100644
--- a/src/specfem3D/write_movie_surface.f90
+++ b/src/specfem3D/write_movie_surface.f90
@@ -186,14 +186,14 @@ subroutine write_movie_surface()
         ! wavefield values
         if (MOVIE_VOLUME_TYPE == 5) then
           ! stores displacement
-          store_val_ux(ipoin) = displ_crust_mantle(1,iglob)*scale_displ
-          store_val_uy(ipoin) = displ_crust_mantle(2,iglob)*scale_displ
-          store_val_uz(ipoin) = displ_crust_mantle(3,iglob)*scale_displ
+          store_val_ux(ipoin) = displ_crust_mantle(1,iglob) * real(scale_displ,kind=CUSTOM_REAL)
+          store_val_uy(ipoin) = displ_crust_mantle(2,iglob) * real(scale_displ,kind=CUSTOM_REAL)
+          store_val_uz(ipoin) = displ_crust_mantle(3,iglob) * real(scale_displ,kind=CUSTOM_REAL)
         else
           ! stores velocity
-          store_val_ux(ipoin) = veloc_crust_mantle(1,iglob)*scale_veloc
-          store_val_uy(ipoin) = veloc_crust_mantle(2,iglob)*scale_veloc
-          store_val_uz(ipoin) = veloc_crust_mantle(3,iglob)*scale_veloc
+          store_val_ux(ipoin) = veloc_crust_mantle(1,iglob) * real(scale_veloc,kind=CUSTOM_REAL)
+          store_val_uy(ipoin) = veloc_crust_mantle(2,iglob) * real(scale_veloc,kind=CUSTOM_REAL)
+          store_val_uz(ipoin) = veloc_crust_mantle(3,iglob) * real(scale_veloc,kind=CUSTOM_REAL)
         endif
 
       enddo
diff --git a/src/specfem3D/write_movie_volume.f90 b/src/specfem3D/write_movie_volume.f90
index 0d67a1305..3e128c53f 100644
--- a/src/specfem3D/write_movie_volume.f90
+++ b/src/specfem3D/write_movie_volume.f90
@@ -877,7 +877,7 @@ subroutine write_movie_volume_displnorm(displ_crust_mantle,displ_inner_core,disp
           do i = 1, NGLLX
             iglob = ibool_crust_mantle(i,j,k,ispec)
             ! norm
-            tmp_data(i,j,k,ispec) = scale_displ * sqrt( displ_crust_mantle(1,iglob)**2 &
+            tmp_data(i,j,k,ispec) = real(scale_displ,kind=CUSTOM_REAL) * sqrt( displ_crust_mantle(1,iglob)**2 &
                                           + displ_crust_mantle(2,iglob)**2 &
                                           + displ_crust_mantle(3,iglob)**2 )
           enddo
@@ -929,7 +929,7 @@ subroutine write_movie_volume_displnorm(displ_crust_mantle,displ_inner_core,disp
           do i = 1, NGLLX
             iglob = ibool_inner_core(i,j,k,ispec)
             ! norm
-            tmp_data(i,j,k,ispec) = scale_displ * sqrt( displ_inner_core(1,iglob)**2 &
+            tmp_data(i,j,k,ispec) = real(scale_displ,kind=CUSTOM_REAL) * sqrt( displ_inner_core(1,iglob)**2 &
                                           + displ_inner_core(2,iglob)**2 &
                                           + displ_inner_core(3,iglob)**2 )
           enddo
@@ -994,7 +994,7 @@ subroutine write_movie_volume_velnorm(veloc_crust_mantle,veloc_inner_core,veloc_
           do i = 1, NGLLX
             iglob = ibool_crust_mantle(i,j,k,ispec)
             ! norm of velocity
-            tmp_data(i,j,k,ispec) = scale_veloc * sqrt( veloc_crust_mantle(1,iglob)**2 &
+            tmp_data(i,j,k,ispec) = real(scale_veloc,kind=CUSTOM_REAL) * sqrt( veloc_crust_mantle(1,iglob)**2 &
                                           + veloc_crust_mantle(2,iglob)**2 &
                                           + veloc_crust_mantle(3,iglob)**2 )
           enddo
@@ -1046,7 +1046,7 @@ subroutine write_movie_volume_velnorm(veloc_crust_mantle,veloc_inner_core,veloc_
           do i = 1, NGLLX
             iglob = ibool_inner_core(i,j,k,ispec)
             ! norm of velocity
-            tmp_data(i,j,k,ispec) = scale_veloc * sqrt( veloc_inner_core(1,iglob)**2 &
+            tmp_data(i,j,k,ispec) = real(scale_veloc,kind=CUSTOM_REAL) * sqrt( veloc_inner_core(1,iglob)**2 &
                                           + veloc_inner_core(2,iglob)**2 &
                                           + veloc_inner_core(3,iglob)**2 )
           enddo
@@ -1099,7 +1099,7 @@ subroutine write_movie_volume_accelnorm(accel_crust_mantle,accel_inner_core,acce
   logical,parameter :: OUTPUT_INNER_CORE = .true.
 
   ! dimensionalized scaling
-  scale_accel = scale_veloc * scale_t_inv
+  scale_accel = real(scale_veloc * scale_t_inv,kind=CUSTOM_REAL)
 
   ! outputs norm of acceleration
   if (OUTPUT_CRUST_MANTLE) then
diff --git a/src/specfem3D/write_seismograms.f90 b/src/specfem3D/write_seismograms.f90
index a413d1af4..94e3e3454 100644
--- a/src/specfem3D/write_seismograms.f90
+++ b/src/specfem3D/write_seismograms.f90
@@ -589,8 +589,8 @@ subroutine write_one_seismogram(one_seismogram,irec,irec_local,is_for_asdf)
          phi = 0.d0
       endif
 
-      cphi = cos(phi*DEGREES_TO_RADIANS)
-      sphi = sin(phi*DEGREES_TO_RADIANS)
+      cphi = real(cos(phi*DEGREES_TO_RADIANS),kind=CUSTOM_REAL)
+      sphi = real(sin(phi*DEGREES_TO_RADIANS),kind=CUSTOM_REAL)
 
       ! do the rotation of the components and put result in
       ! new variable seismogram_tmp
diff --git a/src/tomography/compute_kernel_integral.f90 b/src/tomography/compute_kernel_integral.f90
index ace9a6239..aacdb1581 100644
--- a/src/tomography/compute_kernel_integral.f90
+++ b/src/tomography/compute_kernel_integral.f90
@@ -83,7 +83,7 @@ subroutine compute_kernel_integral_iso()
   do k = 1,NGLLZ
     do j = 1,NGLLY
       do i = 1,NGLLX
-        wgll_cube(i,j,k) = wxgll(i)*wygll(j)*wzgll(k)
+        wgll_cube(i,j,k) = real(wxgll(i)*wygll(j)*wzgll(k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -285,7 +285,7 @@ subroutine compute_kernel_integral_tiso()
   do k = 1,NGLLZ
     do j = 1,NGLLY
       do i = 1,NGLLX
-        wgll_cube(i,j,k) = wxgll(i)*wygll(j)*wzgll(k)
+        wgll_cube(i,j,k) = real(wxgll(i)*wygll(j)*wzgll(k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -515,7 +515,7 @@ subroutine compute_kernel_integral_tiso_iso()
   do k = 1,NGLLZ
     do j = 1,NGLLY
       do i = 1,NGLLX
-        wgll_cube(i,j,k) = wxgll(i)*wygll(j)*wzgll(k)
+        wgll_cube(i,j,k) = real(wxgll(i)*wygll(j)*wzgll(k),kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
diff --git a/src/tomography/get_cg_direction.f90 b/src/tomography/get_cg_direction.f90
index 1eee0c157..f4da49d4f 100644
--- a/src/tomography/get_cg_direction.f90
+++ b/src/tomography/get_cg_direction.f90
@@ -64,9 +64,9 @@ subroutine get_gradient_cg_tiso()
 
   ! normalized radii
   ! top at 50km depth
-  KERNEL_R_TOP = (R_PLANET_KM - 50.0 ) / R_PLANET_KM ! shallow depth
+  KERNEL_R_TOP = real((R_PLANET_KM - 50.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! shallow depth
   ! bottom at 100km depth
-  KERNEL_R_BOTTOM = (R_PLANET_KM - 100.0 ) / R_PLANET_KM ! deep depth
+  KERNEL_R_BOTTOM = real((R_PLANET_KM - 100.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! deep depth
 
   ! allocate arrays for storing gradient
   ! transversely isotropic arrays
@@ -397,7 +397,7 @@ subroutine get_gradient_cg_tiso()
       max = maxval(depthmax)
       maxindex = maxloc(depthmax)
       depthmax_depth = depthmax_radius(maxindex(1))
-      depthmax_depth = R_PLANET_KM *( 1.0 - depthmax_depth )
+      depthmax_depth = real(R_PLANET_KM *( 1.d0 - depthmax_depth ),kind=CUSTOM_REAL)
       ! maximum in given depth range
       print *,'  using depth maximum: '
       print *,'  between depths (top/bottom)   : ',KERNEL_R_top,KERNEL_R_bottom
diff --git a/src/tomography/get_sd_direction.f90 b/src/tomography/get_sd_direction.f90
index 753e8e455..c213f5ace 100644
--- a/src/tomography/get_sd_direction.f90
+++ b/src/tomography/get_sd_direction.f90
@@ -46,9 +46,9 @@ subroutine get_gradient_steepest_iso()
 
   ! normalized radii
   ! top at 50km depth
-  KERNEL_R_TOP = (R_PLANET_KM - 50.0 ) / R_PLANET_KM ! shallow depth
+  KERNEL_R_TOP = real((R_PLANET_KM - 50.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! shallow depth
   ! bottom at 100km depth
-  KERNEL_R_BOTTOM = (R_PLANET_KM - 100.0 ) / R_PLANET_KM ! deep depth
+  KERNEL_R_BOTTOM = real((R_PLANET_KM - 100.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! deep depth
 
   ! allocate arrays for storing gradient
   ! isotropic arrays
@@ -136,7 +136,7 @@ subroutine get_gradient_steepest_iso()
     ! maximum of all processes stored in max_vsv
     call max_all_cr(max,max_beta)
     max = max_beta
-    depth_max = R_PLANET_KM *( 1.0 - depth_max )
+    depth_max = real(R_PLANET_KM *( 1.d0 - depth_max ),kind=CUSTOM_REAL)
   endif
 
   ! determines step length based on maximum gradient value (either shear or bulk)
@@ -263,9 +263,9 @@ subroutine get_gradient_steepest_tiso()
 
   ! normalized radii
   ! top at 50km depth
-  KERNEL_R_TOP = (R_PLANET_KM - 50.0 ) / R_PLANET_KM ! shallow depth
+  KERNEL_R_TOP = real((R_PLANET_KM - 50.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! shallow depth
   ! bottom at 100km depth
-  KERNEL_R_BOTTOM = (R_PLANET_KM - 100.0 ) / R_PLANET_KM ! deep depth
+  KERNEL_R_BOTTOM = real((R_PLANET_KM - 100.0 ) / R_PLANET_KM,kind=CUSTOM_REAL) ! deep depth
 
   ! allocate arrays for storing gradient
   ! transversely isotropic arrays
@@ -352,7 +352,7 @@ subroutine get_gradient_steepest_tiso()
     ! maximum of all processes stored in max_vsv
     call max_all_cr(max,max_vsv)
     max = max_vsv
-    depth_max = R_PLANET_KM *( 1.0 - depth_max )
+    depth_max = real(R_PLANET_KM *( 1.d0 - depth_max ),kind=CUSTOM_REAL)
   endif
 
   ! determines step length
diff --git a/src/tomography/postprocess_sensitivity_kernels/create_cross_section.F90 b/src/tomography/postprocess_sensitivity_kernels/create_cross_section.F90
index b0e1200c5..38f08e737 100644
--- a/src/tomography/postprocess_sensitivity_kernels/create_cross_section.F90
+++ b/src/tomography/postprocess_sensitivity_kernels/create_cross_section.F90
@@ -483,7 +483,8 @@ program cross_section
              model2(nglob_target),stat=ier)
     if (ier /= 0) stop 'Error allocating target model point arrays'
     x2(:) = 0.0_CUSTOM_REAL; y2(:) = 0.0_CUSTOM_REAL; z2(:) = 0.0_CUSTOM_REAL
-    model_distance2(:) = HUGEVAL; model2(:) = 0.0_CUSTOM_REAL
+    model_distance2(:) = real(HUGEVAL,kind=CUSTOM_REAL)
+    model2(:) = 0.0_CUSTOM_REAL
 
     ! creates cross-section points
     call set_horiz_cross_section_points(myrank,nglob_target,x2,y2,z2, &
@@ -555,7 +556,8 @@ program cross_section
              model2(nglob_target),stat=ier)
     if (ier /= 0) stop 'Error allocating target model point arrays'
     x2(:) = 0.0_CUSTOM_REAL; y2(:) = 0.0_CUSTOM_REAL; z2(:) = 0.0_CUSTOM_REAL
-    model_distance2(:) = HUGEVAL; model2(:) = 0.0_CUSTOM_REAL
+    model_distance2(:) = real(HUGEVAL,kind=CUSTOM_REAL)
+    model2(:) = 0.0_CUSTOM_REAL
 
     ! creates cross-section points
     call set_vertical_cross_section_points(myrank,nglob_target,x2,y2,z2, &
@@ -992,9 +994,9 @@ subroutine set_horiz_cross_section_points(myrank,nglob_target,x2,y2,z2, &
         iglob = iglob + 1
         if (iglob > nglob_target) stop 'Error iglob exceeds total size'
 
-        x2(iglob) = x_target
-        y2(iglob) = y_target
-        z2(iglob) = z_target
+        x2(iglob) = real(x_target,kind=CUSTOM_REAL)
+        y2(iglob) = real(y_target,kind=CUSTOM_REAL)
+        z2(iglob) = real(z_target,kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -1304,9 +1306,9 @@ subroutine set_vertical_cross_section_points(myrank,nglob_target,x2,y2,z2, &
       iglob = iglob + 1
       if (iglob > nglob_target) stop 'Error iglob exceeds total size'
 
-      x2(iglob) = x_target
-      y2(iglob) = y_target
-      z2(iglob) = z_target
+      x2(iglob) = real(x_target,kind=CUSTOM_REAL)
+      y2(iglob) = real(y_target,kind=CUSTOM_REAL)
+      z2(iglob) = real(z_target,kind=CUSTOM_REAL)
     enddo
   enddo
   ! checks point count
@@ -1470,7 +1472,7 @@ subroutine get_model_values_cross_section(nglob_target,x2,y2,z2,model2,model_dis
   call synchronize_all()
 
   ! initializes closest distance
-  model_distance2(:) = HUGEVAL
+  model_distance2(:) = real(HUGEVAL,kind=CUSTOM_REAL)
 
   ! normalized search radius (around target)
   r_search = 1.d0 * typical_size
@@ -1830,7 +1832,7 @@ subroutine set_interpolated_value(dist_min,nglob_target,model2,model_distance2,
   if (dist_min < model_distance2(iglob)) then
 
     ! sets new minimum distance
-    model_distance2(iglob) = dist_min
+    model_distance2(iglob) = real(dist_min,kind=CUSTOM_REAL)
 
     ! interpolate model values
     call interpolate_element_value(xi,eta,gamma,ispec_selected, &
@@ -2188,7 +2190,7 @@ subroutine write_cross_section(nglob_target,x2,y2,z2,model2,model_distance2, &
   logical, parameter :: PLOT_ALL_POINTS = .false.
 
   ! minimum distance allowed
-  distance_limit = 1.0 * typical_size
+  distance_limit = real(1.d0 * typical_size,kind=CUSTOM_REAL)
 
   ! opens file
   open(IOUT,file=trim(filename),status='unknown', action='write',iostat=ier)
@@ -2320,7 +2322,7 @@ subroutine get_cross_section_avg(nglob_target,x2,y2,z2,model2,model_distance2, &
   ipoints_integral = 0
 
   ! minimum distance allowed
-  distance_limit = 1.0 * typical_size
+  distance_limit = real(1.d0 * typical_size,kind=CUSTOM_REAL)
 
   ! loops over all cross-section points
   do iglob = 1,nglob_target
@@ -2335,9 +2337,9 @@ subroutine get_cross_section_avg(nglob_target,x2,y2,z2,model2,model_distance2, &
 
     ! stores lat/lon/r in x2,y2,z2 arrays
     ! such that we don't need to recompute these when writing out
-    x2(iglob) = lat
-    y2(iglob) = lon
-    z2(iglob) = r
+    x2(iglob) = real(lat,kind=CUSTOM_REAL)
+    y2(iglob) = real(lon,kind=CUSTOM_REAL)
+    z2(iglob) = real(r,kind=CUSTOM_REAL)
 
     ! distance to closest mesh point
     dist_min = model_distance2(iglob)
@@ -2407,7 +2409,7 @@ subroutine get_cross_section_avg(nglob_target,x2,y2,z2,model2,model_distance2, &
 
     ! difference
     diff = m_val - m_avg_total
-    model_diff(iglob) = diff
+    model_diff(iglob) = real(diff,kind=CUSTOM_REAL)
 
     ! relative perturbations
     ! logarithmic perturbation: log( m_new) - log( m_avg) = log( m_new / m_avg )
@@ -2423,7 +2425,7 @@ subroutine get_cross_section_avg(nglob_target,x2,y2,z2,model2,model_distance2, &
         pert = (m_val - m_avg_total) / abs(m_avg_total)
       endif
     endif
-    model_pert(iglob) = pert
+    model_pert(iglob) = real(pert,kind=CUSTOM_REAL)
 
     ! only points within close distance to mesh points will be considered for statistics
     if (dist_min < distance_limit) then
diff --git a/src/tomography/postprocess_sensitivity_kernels/laplacian_smoothing_sem.F90 b/src/tomography/postprocess_sensitivity_kernels/laplacian_smoothing_sem.F90
index 5bd887061..04ffecdd6 100644
--- a/src/tomography/postprocess_sensitivity_kernels/laplacian_smoothing_sem.F90
+++ b/src/tomography/postprocess_sensitivity_kernels/laplacian_smoothing_sem.F90
@@ -121,11 +121,11 @@ program smooth_laplacian_sem
 
   ! Hessian
   logical :: is_hess
+  logical :: is_kernel
 
   ! ADIOS
 #ifdef USE_ADIOS_INSTEAD_OF_MESH
   integer(kind=8) :: group_size_inc,local_dim
-  logical :: is_kernel
 #endif
 
   ! number of steps to reach 100 percent, i.e. 10 outputs info for every 10 percent
@@ -587,16 +587,16 @@ program smooth_laplacian_sem
           !   Ly = Lh2 * (1 - e2 * (sin(theta) * sin(phi)) ** 2 )
 
           ! Apply scaling
-          dxsi_dx(i,j,k,iel)  = dxsi_dxl * Lh2
-          deta_dx(i,j,k,iel)  = deta_dxl * Lh2
-          dgam_dx(i,j,k,iel)  = dgam_dxl * Lv2
-          dxsi_dy(i,j,k,iel)  = dxsi_dyl * Lh2
-          deta_dy(i,j,k,iel)  = deta_dyl * Lh2
-          dgam_dy(i,j,k,iel)  = dgam_dyl * Lv2
-          dxsi_dz(i,j,k,iel)  = dxsi_dzl * Lh2
-          deta_dz(i,j,k,iel)  = deta_dzl * Lh2
-          dgam_dz(i,j,k,iel)  = dgam_dzl * Lv2
-          jacobian(i,j,k,iel) = jacobianl / (Lh*Lh*Lv)
+          dxsi_dx(i,j,k,iel)  = real(dxsi_dxl * Lh2,kind=CUSTOM_REAL)
+          deta_dx(i,j,k,iel)  = real(deta_dxl * Lh2,kind=CUSTOM_REAL)
+          dgam_dx(i,j,k,iel)  = real(dgam_dxl * Lv2,kind=CUSTOM_REAL)
+          dxsi_dy(i,j,k,iel)  = real(dxsi_dyl * Lh2,kind=CUSTOM_REAL)
+          deta_dy(i,j,k,iel)  = real(deta_dyl * Lh2,kind=CUSTOM_REAL)
+          dgam_dy(i,j,k,iel)  = real(dgam_dyl * Lv2,kind=CUSTOM_REAL)
+          dxsi_dz(i,j,k,iel)  = real(dxsi_dzl * Lh2,kind=CUSTOM_REAL)
+          deta_dz(i,j,k,iel)  = real(deta_dzl * Lh2,kind=CUSTOM_REAL)
+          dgam_dz(i,j,k,iel)  = real(dgam_dzl * Lv2,kind=CUSTOM_REAL)
+          jacobian(i,j,k,iel) = real(jacobianl / (Lh*Lh*Lv),kind=CUSTOM_REAL)
         enddo
       enddo
     enddo
@@ -641,18 +641,13 @@ program smooth_laplacian_sem
   !         must solve A A s = M m
   !         where A = (M + K)
   !    done with two conjugate gradients
+  is_hess = .false.
+  is_kernel = .false.
+
   do iker = 1, nker
     !! Read input kernels
     kernel_name = kernel_names(iker)
-#ifdef USE_ADIOS_INSTEAD_OF_MESH
-    ! ADIOS single file opening
-    ! user output
-    if (myrank == 0) then
-      print *, 'reading in ADIOS input file : ',trim(input_file)
-    endif
-    call init_adios_group(myadios_val_group, "ValReader")
-    call open_file_adios_read(myadios_val_file, myadios_val_group, trim(input_file))
-    ! ADIOS array name
+
     ! determines if parameter name is for a kernel
     is_kernel = .false.
     if (len_trim(kernel_name) > 3) then
@@ -666,6 +661,16 @@ program smooth_laplacian_sem
         is_hess = .true.
       endif
     endif
+
+#ifdef USE_ADIOS_INSTEAD_OF_MESH
+    ! ADIOS single file opening
+    ! user output
+    if (myrank == 0) then
+      print *, 'reading in ADIOS input file : ',trim(input_file)
+    endif
+    call init_adios_group(myadios_val_group, "ValReader")
+    call open_file_adios_read(myadios_val_file, myadios_val_group, trim(input_file))
+    ! ADIOS array name
     if (is_kernel) then
       ! NOTE: reg1 => crust_mantle, others are not implemented
       varname = trim(kernel_name) // "_crust_mantle"
diff --git a/src/tomography/postprocess_sensitivity_kernels/smooth_sem.F90 b/src/tomography/postprocess_sensitivity_kernels/smooth_sem.F90
index 96e69bb3c..f3016146d 100644
--- a/src/tomography/postprocess_sensitivity_kernels/smooth_sem.F90
+++ b/src/tomography/postprocess_sensitivity_kernels/smooth_sem.F90
@@ -396,8 +396,8 @@ program smooth_sem_globe
   if (NCHUNKS_VAL == 6) then
     element_size = real(TWO_PI / dble(4) * R_PLANET_KM / dble(NEX_XI_VAL),kind=CUSTOM_REAL)
   else
-    ANGULAR_WIDTH_XI_RAD = ANGULAR_WIDTH_XI_IN_DEGREES_VAL * DEGREES_TO_RADIANS
-    ANGULAR_WIDTH_ETA_RAD = ANGULAR_WIDTH_ETA_IN_DEGREES_VAL * DEGREES_TO_RADIANS
+    ANGULAR_WIDTH_XI_RAD = real(ANGULAR_WIDTH_XI_IN_DEGREES_VAL * DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
+    ANGULAR_WIDTH_ETA_RAD = real(ANGULAR_WIDTH_ETA_IN_DEGREES_VAL * DEGREES_TO_RADIANS,kind=CUSTOM_REAL)
     element_size = max( ANGULAR_WIDTH_XI_RAD/NEX_XI_VAL,ANGULAR_WIDTH_ETA_RAD/NEX_ETA_VAL ) * real(R_PLANET_KM,kind=CUSTOM_REAL)
   endif
 
@@ -500,9 +500,9 @@ program smooth_sem_globe
 !          and vertical distance as radial distance?
 
 ! not squared since epicentral distance is taken? values from bk seem to be closer to squared ones...
-  norm_h = sqrt(2.0*PI) * sigma_h
+  norm_h = real(sqrt(2.d0*PI) * sigma_h,kind=CUSTOM_REAL)
   norm_h = norm_h * norm_h ! squared since 2 horizontal directions
-  norm_v = sqrt(2.0*PI) * sigma_v
+  norm_v = real(sqrt(2.d0*PI) * sigma_v,kind=CUSTOM_REAL)
   norm   = norm_h * norm_v
   !norm = (sqrt(2.0*PI) * sigma) ** 3 ! for sigma_h = sigma_v = sigma
 
@@ -1514,7 +1514,7 @@ subroutine compute_smooth(nker,bk,tk, &
   ! kd-tree search uses either a spherical or ellipsoid search
   logical,parameter :: DO_SEARCH_ELLIP = .true.
 
-  real(kind=CUSTOM_REAL),parameter :: PI2 = PI * PI ! squared
+  real(kind=CUSTOM_REAL),parameter :: PI2 = real(PI * PI,kind=CUSTOM_REAL) ! squared
 
   ! debugging
   !logical, parameter :: DEBUG = .false.
@@ -1873,7 +1873,7 @@ subroutine get_distance_vec_squared(dist_h,dist_v,x0,y0,z0,x1,y1,z1)
   real(kind=CUSTOM_REAL) :: theta,ratio,alpha
   real(kind=CUSTOM_REAL) :: vx,vy,vz,r0,r1
 
-  real(kind=CUSTOM_REAL),parameter :: PI2 = PI * PI ! squared
+  real(kind=CUSTOM_REAL),parameter :: PI2 = real(PI * PI,kind=CUSTOM_REAL) ! squared
 
   ! note: instead of distance we use distance squared to avoid too many sqrt() operations
 

From 3e8dfaa1344cc3da8e546ee1edbdd02095b5b617 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 00:36:31 +0200
Subject: [PATCH 04/11] adds read_mesh_* routines for infinite element mesh

---
 src/shared/memory_eval.f90                  |    1 +
 src/specfem3D/bcast_mesh_databases.f90      | 1015 ++++++++++++
 src/specfem3D/initialize_simulation.F90     |   13 +-
 src/specfem3D/prepare_timerun.F90           |    3 +
 src/specfem3D/read_arrays_solver.f90        |    9 +-
 src/specfem3D/read_arrays_solver_adios.F90  |   27 +-
 src/specfem3D/read_mesh_databases.F90       | 1543 ++++++++++---------
 src/specfem3D/read_mesh_databases_adios.f90 |  231 ++-
 src/specfem3D/read_mesh_parameters.F90      |   14 +-
 src/specfem3D/rules.mk                      |    7 +
 src/specfem3D/specfem3D_par.F90             |   57 +-
 11 files changed, 2158 insertions(+), 762 deletions(-)
 create mode 100644 src/specfem3D/bcast_mesh_databases.f90

diff --git a/src/shared/memory_eval.f90 b/src/shared/memory_eval.f90
index 08216dcbd..4854585e7 100644
--- a/src/shared/memory_eval.f90
+++ b/src/shared/memory_eval.f90
@@ -178,6 +178,7 @@ subroutine memory_eval(NEX_PER_PROC_XI,NEX_PER_PROC_ETA, &
     NSPEC_INNER_CORE_STRAIN_ONLY = 0
   endif
 
+  ! adjoint sizes
   if ((SIMULATION_TYPE == 1 .and. SAVE_FORWARD) .or. SIMULATION_TYPE == 3) then
     NSPEC_CRUST_MANTLE_ADJOINT = NSPEC_REGIONS(IREGION_CRUST_MANTLE)
     NSPEC_OUTER_CORE_ADJOINT = NSPEC_REGIONS(IREGION_OUTER_CORE)
diff --git a/src/specfem3D/bcast_mesh_databases.f90 b/src/specfem3D/bcast_mesh_databases.f90
new file mode 100644
index 000000000..9ede276f9
--- /dev/null
+++ b/src/specfem3D/bcast_mesh_databases.f90
@@ -0,0 +1,1015 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine bcast_mesh_databases_CM()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  implicit none
+
+! note: the size(..) function returns either integer(kind=4) or integer(kind=8)
+!       depending on compiler flags (-mcmedium), thus adding a kind argument to have integer(kind=4) output
+
+  !call bcast_all_i_for_database(NSPEC_CRUST_MANTLE, 1)
+  !call bcast_all_i_for_database(NGLOB_CRUST_MANTLE, 1)
+  !call bcast_all_i_for_database(NGLOB_XY_CM, 1)
+
+  if (size(rho_vp_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(rho_vp_crust_mantle(1,1,1,1), size(rho_vp_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(rho_vs_crust_mantle(1,1,1,1), size(rho_vs_crust_mantle,kind=4))
+  endif
+
+  if (size(xstore_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(xstore_crust_mantle(1), size(xstore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(ystore_crust_mantle(1), size(ystore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(zstore_crust_mantle(1), size(zstore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(xix_crust_mantle(1,1,1,1), size(xix_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(xiy_crust_mantle(1,1,1,1), size(xiy_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(xiz_crust_mantle(1,1,1,1), size(xiz_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(etax_crust_mantle(1,1,1,1), size(etax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(etay_crust_mantle(1,1,1,1), size(etay_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(etaz_crust_mantle(1,1,1,1), size(etaz_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(gammax_crust_mantle(1,1,1,1), size(gammax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(gammay_crust_mantle(1,1,1,1), size(gammay_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(gammaz_crust_mantle(1,1,1,1), size(gammaz_crust_mantle,kind=4))
+  endif
+
+  if (size(rhostore_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(rhostore_crust_mantle(1,1,1,1), size(rhostore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(kappavstore_crust_mantle(1,1,1,1), size(kappavstore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(muvstore_crust_mantle(1,1,1,1), size(muvstore_crust_mantle,kind=4))
+  endif
+
+  if (size(kappahstore_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(kappahstore_crust_mantle(1,1,1,1), size(kappahstore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(muhstore_crust_mantle(1,1,1,1), size(muhstore_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(eta_anisostore_crust_mantle(1,1,1,1), size(eta_anisostore_crust_mantle,kind=4))
+  endif
+
+  if (size(c11store_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(c11store_crust_mantle(1,1,1,1), size(c11store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c12store_crust_mantle(1,1,1,1), size(c12store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c13store_crust_mantle(1,1,1,1), size(c13store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c14store_crust_mantle(1,1,1,1), size(c14store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c15store_crust_mantle(1,1,1,1), size(c15store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c16store_crust_mantle(1,1,1,1), size(c16store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c22store_crust_mantle(1,1,1,1), size(c22store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c23store_crust_mantle(1,1,1,1), size(c23store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c24store_crust_mantle(1,1,1,1), size(c24store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c25store_crust_mantle(1,1,1,1), size(c25store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c26store_crust_mantle(1,1,1,1), size(c26store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c33store_crust_mantle(1,1,1,1), size(c33store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c34store_crust_mantle(1,1,1,1), size(c34store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c35store_crust_mantle(1,1,1,1), size(c35store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c36store_crust_mantle(1,1,1,1), size(c36store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c44store_crust_mantle(1,1,1,1), size(c44store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c45store_crust_mantle(1,1,1,1), size(c45store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c46store_crust_mantle(1,1,1,1), size(c46store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c55store_crust_mantle(1,1,1,1), size(c55store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c56store_crust_mantle(1,1,1,1), size(c56store_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(c66store_crust_mantle(1,1,1,1), size(c66store_crust_mantle,kind=4))
+  endif
+
+  if (size(mu0store_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(mu0store_crust_mantle(1,1,1,1), size(mu0store_crust_mantle,kind=4))
+  endif
+
+  if (size(ibool_crust_mantle) > 0) then
+    call bcast_all_i_for_database(ibool_crust_mantle(1,1,1,1), size(ibool_crust_mantle,kind=4))
+    call bcast_all_l_for_database(ispec_is_tiso_crust_mantle(1), size(ispec_is_tiso_crust_mantle,kind=4))
+  endif
+
+  if (size(rmassx_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(rmassx_crust_mantle(1), size(rmassx_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(rmassy_crust_mantle(1), size(rmassy_crust_mantle,kind=4))
+  endif
+
+  if (size(rmassz_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(rmassz_crust_mantle(1), size(rmassz_crust_mantle,kind=4))
+  endif
+
+  !call bcast_all_i_for_database(NGLOB_CRUST_MANTLE_OCEANS, 1)
+  if (size(rmass_ocean_load) > 0) then
+    call bcast_all_cr_for_database(rmass_ocean_load(1), size(rmass_ocean_load,kind=4))
+  endif
+
+  if (size(b_rmassx_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(b_rmassx_crust_mantle(1), size(b_rmassx_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(b_rmassy_crust_mantle(1), size(b_rmassy_crust_mantle,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_CM
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_OC()
+
+  use specfem_par
+  use specfem_par_outercore
+  implicit none
+
+  !call bcast_all_i_for_database(NSPEC_OUTER_CORE, 1)
+  !call bcast_all_i_for_database(NGLOB_OUTER_CORE, 1)
+
+  if (size(vp_outer_core) > 0) then
+    call bcast_all_cr_for_database(vp_outer_core(1,1,1,1), size(vp_outer_core,kind=4))
+  endif
+
+  if (size(xstore_outer_core) > 0) then
+    call bcast_all_cr_for_database(xstore_outer_core(1), size(xstore_outer_core,kind=4))
+    call bcast_all_cr_for_database(ystore_outer_core(1), size(ystore_outer_core,kind=4))
+    call bcast_all_cr_for_database(zstore_outer_core(1), size(zstore_outer_core,kind=4))
+  endif
+
+  if (size(xix_outer_core) > 0) then
+    call bcast_all_cr_for_database(xix_outer_core(1,1,1,1), size(xix_outer_core,kind=4))
+    call bcast_all_cr_for_database(xiy_outer_core(1,1,1,1), size(xiy_outer_core,kind=4))
+    call bcast_all_cr_for_database(xiz_outer_core(1,1,1,1), size(xiz_outer_core,kind=4))
+    call bcast_all_cr_for_database(etax_outer_core(1,1,1,1), size(etax_outer_core,kind=4))
+    call bcast_all_cr_for_database(etay_outer_core(1,1,1,1), size(etay_outer_core,kind=4))
+    call bcast_all_cr_for_database(etaz_outer_core(1,1,1,1), size(etaz_outer_core,kind=4))
+    call bcast_all_cr_for_database(gammax_outer_core(1,1,1,1), size(gammax_outer_core,kind=4))
+    call bcast_all_cr_for_database(gammay_outer_core(1,1,1,1), size(gammay_outer_core,kind=4))
+    call bcast_all_cr_for_database(gammaz_outer_core(1,1,1,1), size(gammaz_outer_core,kind=4))
+  endif
+
+  if (size(rhostore_outer_core) > 0) then
+    call bcast_all_cr_for_database(rhostore_outer_core(1,1,1,1), size(rhostore_outer_core,kind=4))
+    call bcast_all_cr_for_database(kappavstore_outer_core(1,1,1,1), size(kappavstore_outer_core,kind=4))
+  endif
+
+  if (size(ibool_outer_core) > 0) then
+    call bcast_all_i_for_database(ibool_outer_core(1,1,1,1), size(ibool_outer_core,kind=4))
+  endif
+
+  if (size(rmass_outer_core) > 0) then
+    call bcast_all_cr_for_database(rmass_outer_core(1), size(rmass_outer_core,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_OC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_IC()
+
+  use specfem_par
+  use specfem_par_innercore
+  implicit none
+
+  !call bcast_all_i_for_database(NSPEC_INNER_CORE, 1)
+  !call bcast_all_i_for_database(NGLOB_INNER_CORE, 1)
+  !call bcast_all_i_for_database(NGLOB_XY_IC, 1)
+
+  if (size(xstore_inner_core) > 0) then
+    call bcast_all_cr_for_database(xstore_inner_core(1), size(xstore_inner_core,kind=4))
+    call bcast_all_cr_for_database(ystore_inner_core(1), size(ystore_inner_core,kind=4))
+    call bcast_all_cr_for_database(zstore_inner_core(1), size(zstore_inner_core,kind=4))
+  endif
+
+  if (size(xix_inner_core) > 0) then
+    call bcast_all_cr_for_database(xix_inner_core(1,1,1,1), size(xix_inner_core,kind=4))
+    call bcast_all_cr_for_database(xiy_inner_core(1,1,1,1), size(xiy_inner_core,kind=4))
+    call bcast_all_cr_for_database(xiz_inner_core(1,1,1,1), size(xiz_inner_core,kind=4))
+    call bcast_all_cr_for_database(etax_inner_core(1,1,1,1), size(etax_inner_core,kind=4))
+    call bcast_all_cr_for_database(etay_inner_core(1,1,1,1), size(etay_inner_core,kind=4))
+    call bcast_all_cr_for_database(etaz_inner_core(1,1,1,1), size(etaz_inner_core,kind=4))
+    call bcast_all_cr_for_database(gammax_inner_core(1,1,1,1), size(gammax_inner_core,kind=4))
+    call bcast_all_cr_for_database(gammay_inner_core(1,1,1,1), size(gammay_inner_core,kind=4))
+    call bcast_all_cr_for_database(gammaz_inner_core(1,1,1,1), size(gammaz_inner_core,kind=4))
+  endif
+
+  if (size(rhostore_inner_core) > 0) then
+    call bcast_all_cr_for_database(rhostore_inner_core(1,1,1,1), size(rhostore_inner_core,kind=4))
+    call bcast_all_cr_for_database(kappavstore_inner_core(1,1,1,1), size(kappavstore_inner_core,kind=4))
+    call bcast_all_cr_for_database(muvstore_inner_core(1,1,1,1), size(muvstore_inner_core,kind=4))
+  endif
+
+  if (size(c11store_inner_core) > 0) then
+    call bcast_all_cr_for_database(c11store_inner_core(1,1,1,1), size(c11store_inner_core,kind=4))
+    call bcast_all_cr_for_database(c12store_inner_core(1,1,1,1), size(c12store_inner_core,kind=4))
+    call bcast_all_cr_for_database(c13store_inner_core(1,1,1,1), size(c13store_inner_core,kind=4))
+    call bcast_all_cr_for_database(c33store_inner_core(1,1,1,1), size(c33store_inner_core,kind=4))
+    call bcast_all_cr_for_database(c44store_inner_core(1,1,1,1), size(c44store_inner_core,kind=4))
+  endif
+
+  if (size(ibool_inner_core) > 0) then
+    call bcast_all_i_for_database(ibool_inner_core(1,1,1,1), size(ibool_inner_core,kind=4))
+    call bcast_all_i_for_database(idoubling_inner_core(1), size(idoubling_inner_core,kind=4))
+  endif
+
+  if (size(rmassx_inner_core) > 0) then
+    call bcast_all_cr_for_database(rmassx_inner_core(1), size(rmassx_inner_core,kind=4))
+    call bcast_all_cr_for_database(rmassy_inner_core(1), size(rmassy_inner_core,kind=4))
+  endif
+
+  if (size(rmassz_inner_core) > 0) then
+    call bcast_all_cr_for_database(rmassz_inner_core(1), size(rmassz_inner_core,kind=4))
+  endif
+
+  if (size(b_rmassx_inner_core) > 0) then
+    call bcast_all_cr_for_database(b_rmassx_inner_core(1), size(b_rmassx_inner_core,kind=4))
+    call bcast_all_cr_for_database(b_rmassy_inner_core(1), size(b_rmassy_inner_core,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_IC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_TRINF()
+
+  use specfem_par
+  use specfem_par_trinfinite
+  implicit none
+
+  if (size(xstore_trinfinite) > 0) then
+    call bcast_all_cr_for_database(xstore_trinfinite(1), size(xstore_trinfinite,kind=4))
+    call bcast_all_cr_for_database(ystore_trinfinite(1), size(ystore_trinfinite,kind=4))
+    call bcast_all_cr_for_database(zstore_trinfinite(1), size(zstore_trinfinite,kind=4))
+  endif
+
+  if (size(xix_trinfinite) > 0) then
+    call bcast_all_cr_for_database(xix_trinfinite(1,1,1,1), size(xix_trinfinite,kind=4))
+    call bcast_all_cr_for_database(xiy_trinfinite(1,1,1,1), size(xiy_trinfinite,kind=4))
+    call bcast_all_cr_for_database(xiz_trinfinite(1,1,1,1), size(xiz_trinfinite,kind=4))
+    call bcast_all_cr_for_database(etax_trinfinite(1,1,1,1), size(etax_trinfinite,kind=4))
+    call bcast_all_cr_for_database(etay_trinfinite(1,1,1,1), size(etay_trinfinite,kind=4))
+    call bcast_all_cr_for_database(etaz_trinfinite(1,1,1,1), size(etaz_trinfinite,kind=4))
+    call bcast_all_cr_for_database(gammax_trinfinite(1,1,1,1), size(gammax_trinfinite,kind=4))
+    call bcast_all_cr_for_database(gammay_trinfinite(1,1,1,1), size(gammay_trinfinite,kind=4))
+    call bcast_all_cr_for_database(gammaz_trinfinite(1,1,1,1), size(gammaz_trinfinite,kind=4))
+  endif
+
+  if (size(ibool_trinfinite) > 0) then
+    call bcast_all_i_for_database(ibool_trinfinite(1,1,1,1), size(ibool_trinfinite,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_TRINF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_INF()
+
+  use specfem_par
+  use specfem_par_infinite
+  implicit none
+
+  if (size(xstore_infinite) > 0) then
+    call bcast_all_cr_for_database(xstore_infinite(1), size(xstore_infinite,kind=4))
+    call bcast_all_cr_for_database(ystore_infinite(1), size(ystore_infinite,kind=4))
+    call bcast_all_cr_for_database(zstore_infinite(1), size(zstore_infinite,kind=4))
+  endif
+
+  if (size(xix_infinite) > 0) then
+    call bcast_all_cr_for_database(xix_infinite(1,1,1,1), size(xix_infinite,kind=4))
+    call bcast_all_cr_for_database(xiy_infinite(1,1,1,1), size(xiy_infinite,kind=4))
+    call bcast_all_cr_for_database(xiz_infinite(1,1,1,1), size(xiz_infinite,kind=4))
+    call bcast_all_cr_for_database(etax_infinite(1,1,1,1), size(etax_infinite,kind=4))
+    call bcast_all_cr_for_database(etay_infinite(1,1,1,1), size(etay_infinite,kind=4))
+    call bcast_all_cr_for_database(etaz_infinite(1,1,1,1), size(etaz_infinite,kind=4))
+    call bcast_all_cr_for_database(gammax_infinite(1,1,1,1), size(gammax_infinite,kind=4))
+    call bcast_all_cr_for_database(gammay_infinite(1,1,1,1), size(gammay_infinite,kind=4))
+    call bcast_all_cr_for_database(gammaz_infinite(1,1,1,1), size(gammaz_infinite,kind=4))
+  endif
+
+  if (size(ibool_infinite) > 0) then
+    call bcast_all_i_for_database(ibool_infinite(1,1,1,1), size(ibool_infinite,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_INF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_coupling()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+
+  implicit none
+
+  ! crust/mantle
+  call bcast_all_i_for_database(nspec2D_xmin_crust_mantle, 1)
+  call bcast_all_i_for_database(nspec2D_xmax_crust_mantle, 1)
+  call bcast_all_i_for_database(nspec2D_ymin_crust_mantle, 1)
+  call bcast_all_i_for_database(nspec2D_ymax_crust_mantle, 1)
+  !call bcast_all_i_for_database(nspec2D_zmin_crust_mantle, 1)
+
+  if (size(ibelm_xmin_crust_mantle) > 0) then
+    call bcast_all_i_for_database(ibelm_xmin_crust_mantle(1), size(ibelm_xmin_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibelm_xmax_crust_mantle(1), size(ibelm_xmax_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibelm_ymin_crust_mantle(1), size(ibelm_ymin_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibelm_ymax_crust_mantle(1), size(ibelm_ymax_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibelm_bottom_crust_mantle(1), size(ibelm_bottom_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibelm_top_crust_mantle(1), size(ibelm_top_crust_mantle,kind=4))
+  endif
+
+  if (size(normal_xmin_crust_mantle) > 0) then
+    call bcast_all_cr_for_database(normal_xmin_crust_mantle(1,1,1,1), size(normal_xmin_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(normal_xmax_crust_mantle(1,1,1,1), size(normal_xmax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(normal_ymin_crust_mantle(1,1,1,1), size(normal_ymin_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(normal_ymax_crust_mantle(1,1,1,1), size(normal_ymax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(normal_bottom_crust_mantle(1,1,1,1), size(normal_bottom_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(normal_top_crust_mantle(1,1,1,1), size(normal_top_crust_mantle,kind=4))
+
+    call bcast_all_cr_for_database(jacobian2D_xmin_crust_mantle(1,1,1), size(jacobian2D_xmin_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_xmax_crust_mantle(1,1,1), size(jacobian2D_xmax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_ymin_crust_mantle(1,1,1), size(jacobian2D_ymin_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_ymax_crust_mantle(1,1,1), size(jacobian2D_ymax_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_bottom_crust_mantle(1,1,1), size(jacobian2D_bottom_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_top_crust_mantle(1,1,1), size(jacobian2D_top_crust_mantle,kind=4))
+  endif
+
+  ! outer core
+  call bcast_all_i_for_database(nspec2D_xmin_outer_core, 1)
+  call bcast_all_i_for_database(nspec2D_xmax_outer_core, 1)
+  call bcast_all_i_for_database(nspec2D_ymin_outer_core, 1)
+  call bcast_all_i_for_database(nspec2D_ymax_outer_core, 1)
+
+  if (size(ibelm_xmin_outer_core) > 0) then
+    call bcast_all_i_for_database(ibelm_xmin_outer_core(1), size(ibelm_xmin_outer_core,kind=4))
+    call bcast_all_i_for_database(ibelm_xmax_outer_core(1), size(ibelm_xmax_outer_core,kind=4))
+    call bcast_all_i_for_database(ibelm_ymin_outer_core(1), size(ibelm_ymin_outer_core,kind=4))
+    call bcast_all_i_for_database(ibelm_ymax_outer_core(1), size(ibelm_ymax_outer_core,kind=4))
+    call bcast_all_i_for_database(ibelm_bottom_outer_core(1), size(ibelm_bottom_outer_core,kind=4))
+    call bcast_all_i_for_database(ibelm_top_outer_core(1), size(ibelm_top_outer_core,kind=4))
+  endif
+
+  if (size(normal_xmin_outer_core) > 0) then
+    call bcast_all_cr_for_database(normal_xmin_outer_core(1,1,1,1), size(normal_xmin_outer_core,kind=4))
+    call bcast_all_cr_for_database(normal_xmax_outer_core(1,1,1,1), size(normal_xmax_outer_core,kind=4))
+    call bcast_all_cr_for_database(normal_ymin_outer_core(1,1,1,1), size(normal_ymin_outer_core,kind=4))
+    call bcast_all_cr_for_database(normal_ymax_outer_core(1,1,1,1), size(normal_ymax_outer_core,kind=4))
+    call bcast_all_cr_for_database(normal_bottom_outer_core(1,1,1,1), size(normal_bottom_outer_core,kind=4))
+    call bcast_all_cr_for_database(normal_top_outer_core(1,1,1,1), size(normal_top_outer_core,kind=4))
+
+    call bcast_all_cr_for_database(jacobian2D_xmin_outer_core(1,1,1), size(jacobian2D_xmin_outer_core,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_xmax_outer_core(1,1,1), size(jacobian2D_xmax_outer_core,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_ymin_outer_core(1,1,1), size(jacobian2D_ymin_outer_core,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_ymax_outer_core(1,1,1), size(jacobian2D_ymax_outer_core,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_bottom_outer_core(1,1,1), size(jacobian2D_bottom_outer_core,kind=4))
+    call bcast_all_cr_for_database(jacobian2D_top_outer_core(1,1,1), size(jacobian2D_top_outer_core,kind=4))
+  endif
+
+  ! inner core
+  call bcast_all_i_for_database(nspec2D_xmin_inner_core, 1)
+  call bcast_all_i_for_database(nspec2D_xmax_inner_core, 1)
+  call bcast_all_i_for_database(nspec2D_ymin_inner_core, 1)
+  call bcast_all_i_for_database(nspec2D_ymax_inner_core, 1)
+
+  ! boundary parameters
+  if (size(ibelm_xmin_inner_core) > 0) then
+    call bcast_all_i_for_database(ibelm_xmin_inner_core(1), size(ibelm_xmin_inner_core,kind=4))
+    call bcast_all_i_for_database(ibelm_xmax_inner_core(1), size(ibelm_xmax_inner_core,kind=4))
+    call bcast_all_i_for_database(ibelm_ymin_inner_core(1), size(ibelm_ymin_inner_core,kind=4))
+    call bcast_all_i_for_database(ibelm_ymax_inner_core(1), size(ibelm_ymax_inner_core,kind=4))
+    call bcast_all_i_for_database(ibelm_bottom_inner_core(1), size(ibelm_bottom_inner_core,kind=4))
+    call bcast_all_i_for_database(ibelm_top_inner_core(1), size(ibelm_top_inner_core,kind=4))
+  endif
+
+  if (FULL_GRAVITY) then
+    if (ADD_TRINF) then
+      ! transition-to-infinite
+      call bcast_all_i_for_database(nspec2D_xmin_trinfinite, 1)
+      call bcast_all_i_for_database(nspec2D_xmax_trinfinite, 1)
+      call bcast_all_i_for_database(nspec2D_ymin_trinfinite, 1)
+      call bcast_all_i_for_database(nspec2D_ymax_trinfinite, 1)
+      if (size(ibelm_xmin_trinfinite) > 0) then
+        call bcast_all_i_for_database(ibelm_xmin_trinfinite(1), size(ibelm_xmin_trinfinite,kind=4))
+        call bcast_all_i_for_database(ibelm_xmax_trinfinite(1), size(ibelm_xmax_trinfinite,kind=4))
+        call bcast_all_i_for_database(ibelm_ymin_trinfinite(1), size(ibelm_ymin_trinfinite,kind=4))
+        call bcast_all_i_for_database(ibelm_ymax_trinfinite(1), size(ibelm_ymax_trinfinite,kind=4))
+        call bcast_all_i_for_database(ibelm_bottom_trinfinite(1), size(ibelm_bottom_trinfinite,kind=4))
+        call bcast_all_i_for_database(ibelm_top_trinfinite(1), size(ibelm_top_trinfinite,kind=4))
+      endif
+    endif
+    ! infinite
+    call bcast_all_i_for_database(nspec2D_xmin_infinite, 1)
+    call bcast_all_i_for_database(nspec2D_xmax_infinite, 1)
+    call bcast_all_i_for_database(nspec2D_ymin_infinite, 1)
+    call bcast_all_i_for_database(nspec2D_ymax_infinite, 1)
+    if (size(ibelm_xmin_infinite) > 0) then
+      call bcast_all_i_for_database(ibelm_xmin_infinite(1), size(ibelm_xmin_infinite,kind=4))
+      call bcast_all_i_for_database(ibelm_xmax_infinite(1), size(ibelm_xmax_infinite,kind=4))
+      call bcast_all_i_for_database(ibelm_ymin_infinite(1), size(ibelm_ymin_infinite,kind=4))
+      call bcast_all_i_for_database(ibelm_ymax_infinite(1), size(ibelm_ymax_infinite,kind=4))
+      call bcast_all_i_for_database(ibelm_bottom_infinite(1), size(ibelm_bottom_infinite,kind=4))
+      call bcast_all_i_for_database(ibelm_top_infinite(1), size(ibelm_top_infinite,kind=4))
+    endif
+  endif
+
+  ! -- Boundary Mesh for crust and mantle ---
+  if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
+    if (size(ibelm_moho_top) > 0) then
+      call bcast_all_i_for_database(ibelm_moho_top(1), size(ibelm_moho_top,kind=4))
+      call bcast_all_i_for_database(ibelm_moho_bot(1), size(ibelm_moho_bot,kind=4))
+      call bcast_all_cr_for_database(normal_moho(1,1,1,1), size(normal_moho,kind=4))
+    endif
+    if (size(ibelm_400_top) > 0) then
+      call bcast_all_i_for_database(ibelm_400_top(1), size(ibelm_400_top,kind=4))
+      call bcast_all_i_for_database(ibelm_400_bot(1), size(ibelm_400_bot,kind=4))
+      call bcast_all_cr_for_database(normal_400(1,1,1,1), size(normal_400,kind=4))
+    endif
+    if (size(ibelm_670_top) > 0) then
+      call bcast_all_i_for_database(ibelm_670_top(1), size(ibelm_670_top,kind=4))
+      call bcast_all_i_for_database(ibelm_670_bot(1), size(ibelm_670_bot,kind=4))
+      call bcast_all_cr_for_database(normal_670(1,1,1,1), size(normal_670,kind=4))
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_coupling
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_MPI_CM()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! MPI interfaces
+  call bcast_all_i_for_database(num_interfaces_crust_mantle, 1)
+
+  ! could also test for not allocated, only reader processes have
+  ! allocated these arrays.
+  if (.not. I_should_read_the_database) then
+    allocate(my_neighbors_crust_mantle(num_interfaces_crust_mantle), &
+            nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_crust_mantle etc.')
+    my_neighbors_crust_mantle(:) = 0; nibool_interfaces_crust_mantle(:) = 0
+  endif
+  if (num_interfaces_crust_mantle > 0) then
+    call bcast_all_i_for_database(max_nibool_interfaces_cm, 1)
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_crust_mantle')
+      ibool_interfaces_crust_mantle(:,:) = 0
+    endif
+
+    call bcast_all_i_for_database(my_neighbors_crust_mantle(1), size(my_neighbors_crust_mantle,kind=4))
+    call bcast_all_i_for_database(nibool_interfaces_crust_mantle(1), size(nibool_interfaces_crust_mantle,kind=4))
+    call bcast_all_i_for_database(ibool_interfaces_crust_mantle(1,1), size(ibool_interfaces_crust_mantle,kind=4))
+  else
+    ! dummy array
+    max_nibool_interfaces_cm = 0
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_crust_mantle')
+      ibool_interfaces_crust_mantle(:,:) = 0
+    endif
+  endif
+
+  ! inner / outer elements
+  call bcast_all_i_for_database(nspec_inner_crust_mantle, 1)
+  call bcast_all_i_for_database(nspec_outer_crust_mantle, 1)
+  call bcast_all_i_for_database(num_phase_ispec_crust_mantle, 1)
+  if (num_phase_ispec_crust_mantle < 0 ) &
+    call exit_mpi(myrank,'Error num_phase_ispec_crust_mantle is < zero')
+
+  if (.not. I_should_read_the_database) then
+    allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_crust_mantle')
+    phase_ispec_inner_crust_mantle(:,:) = 0
+  endif
+
+  if (num_phase_ispec_crust_mantle > 0) then
+    call bcast_all_i_for_database(phase_ispec_inner_crust_mantle(1,1), size(phase_ispec_inner_crust_mantle,kind=4))
+  endif
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    call bcast_all_i_for_database(num_colors_outer_crust_mantle, 1)
+    call bcast_all_i_for_database(num_colors_inner_crust_mantle, 1)
+
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_crust_mantle array')
+      num_elem_colors_crust_mantle(:) = 0
+    endif
+
+    call bcast_all_i_for_database(num_elem_colors_crust_mantle(1), size(num_elem_colors_crust_mantle,kind=4))
+  else
+    ! allocates dummy arrays
+    num_colors_outer_crust_mantle = 0
+    num_colors_inner_crust_mantle = 0
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_crust_mantle array')
+      num_elem_colors_crust_mantle(:) = 0
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_MPI_CM
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_MPI_OC()
+
+  use specfem_par
+  use specfem_par_outercore
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! MPI interfaces
+  call bcast_all_i_for_database(num_interfaces_outer_core, 1)
+  if (.not. I_should_read_the_database) then
+    allocate(my_neighbors_outer_core(num_interfaces_outer_core), &
+             nibool_interfaces_outer_core(num_interfaces_outer_core), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_outer_core etc.')
+    my_neighbors_outer_core(:) = 0; nibool_interfaces_outer_core(:) = 0
+  endif
+
+  if (num_interfaces_outer_core > 0) then
+    call bcast_all_i_for_database(max_nibool_interfaces_oc, 1)
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
+               stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_outer_core')
+      ibool_interfaces_outer_core(:,:) = 0
+    endif
+  endif
+
+  if (num_interfaces_outer_core > 0) then
+    call bcast_all_i_for_database(my_neighbors_outer_core(1), size(my_neighbors_outer_core,kind=4))
+    call bcast_all_i_for_database(nibool_interfaces_outer_core(1), size(nibool_interfaces_outer_core,kind=4))
+    call bcast_all_i_for_database(ibool_interfaces_outer_core(1,1), size(ibool_interfaces_outer_core,kind=4))
+  else
+    ! dummy array
+    max_nibool_interfaces_oc = 0
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_outer_core(0,0),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_outer_core')
+      ibool_interfaces_outer_core(:,:) = 0
+    endif
+  endif
+
+  ! inner / outer elements
+  call bcast_all_i_for_database(nspec_inner_outer_core, 1)
+  call bcast_all_i_for_database(nspec_outer_outer_core, 1)
+  call bcast_all_i_for_database(num_phase_ispec_outer_core, 1)
+  if (num_phase_ispec_outer_core < 0 ) &
+    call exit_mpi(myrank,'Error num_phase_ispec_outer_core is < zero')
+
+  if (.not. I_should_read_the_database) then
+    allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_outer_core')
+    phase_ispec_inner_outer_core(:,:) = 0
+  endif
+
+  if (num_phase_ispec_outer_core > 0) then
+    call bcast_all_i_for_database(phase_ispec_inner_outer_core(1,1), size(phase_ispec_inner_outer_core,kind=4))
+  endif
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    call bcast_all_i_for_database(num_colors_outer_outer_core, 1)
+    call bcast_all_i_for_database(num_colors_inner_outer_core, 1)
+
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+               stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_outer_core array')
+      num_elem_colors_outer_core(:) = 0
+    endif
+
+    call bcast_all_i_for_database(num_elem_colors_outer_core(1), size(num_elem_colors_outer_core,kind=4))
+  else
+    ! allocates dummy arrays
+    num_colors_outer_outer_core = 0
+    num_colors_inner_outer_core = 0
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+               stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_outer_core array')
+      num_elem_colors_outer_core(:) = 0
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_MPI_OC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_MPI_IC()
+
+  use specfem_par
+  use specfem_par_innercore
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! MPI interfaces
+  call bcast_all_i_for_database(num_interfaces_inner_core, 1)
+  if (.not. I_should_read_the_database) then
+    allocate(my_neighbors_inner_core(num_interfaces_inner_core), &
+            nibool_interfaces_inner_core(num_interfaces_inner_core), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_inner_core etc.')
+    my_neighbors_inner_core(:) = 0; nibool_interfaces_inner_core(:) = 0
+  endif
+
+  if (num_interfaces_inner_core > 0) then
+    call bcast_all_i_for_database(max_nibool_interfaces_ic, 1)
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_inner_core(max_nibool_interfaces_ic,num_interfaces_inner_core), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_inner_core')
+      ibool_interfaces_inner_core(:,:) = 0
+    endif
+
+    call bcast_all_i_for_database(my_neighbors_inner_core(1), size(my_neighbors_inner_core,kind=4))
+    call bcast_all_i_for_database(nibool_interfaces_inner_core(1), size(nibool_interfaces_inner_core,kind=4))
+    call bcast_all_i_for_database(ibool_interfaces_inner_core(1,1), size(ibool_interfaces_inner_core,kind=4))
+  else
+    ! dummy array
+    max_nibool_interfaces_ic = 0
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_inner_core(0,0),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_inner_core')
+      ibool_interfaces_inner_core(:,:) = 0
+    endif
+  endif
+
+  ! inner / outer elements
+  call bcast_all_i_for_database(nspec_inner_inner_core, 1)
+  call bcast_all_i_for_database(nspec_outer_inner_core, 1)
+  call bcast_all_i_for_database(num_phase_ispec_inner_core, 1)
+  if (num_phase_ispec_inner_core < 0 ) &
+    call exit_mpi(myrank,'Error num_phase_ispec_inner_core is < zero')
+
+  if (.not. I_should_read_the_database) then
+    allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_inner_core')
+    phase_ispec_inner_inner_core(:,:) = 0
+  endif
+
+  if (num_phase_ispec_inner_core > 0) then
+    call bcast_all_i_for_database(phase_ispec_inner_inner_core(1,1), size(phase_ispec_inner_inner_core,kind=4))
+  endif
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    call bcast_all_i_for_database(num_colors_outer_inner_core, 1)
+    call bcast_all_i_for_database(num_colors_inner_inner_core, 1)
+
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_inner_core array')
+      num_elem_colors_inner_core(:) = 0
+    endif
+
+    call bcast_all_i_for_database(num_elem_colors_inner_core(1), size(num_elem_colors_inner_core,kind=4))
+  else
+    ! allocates dummy arrays
+    num_colors_outer_inner_core = 0
+    num_colors_inner_inner_core = 0
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_inner_core array')
+      num_elem_colors_inner_core(:) = 0
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_MPI_IC
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_MPI_TRINF()
+
+  use specfem_par
+  use specfem_par_trinfinite
+  use specfem_par_full_gravity
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! MPI interfaces
+  call bcast_all_i_for_database(num_interfaces_trinfinite, 1)
+  if (.not. I_should_read_the_database) then
+    allocate(my_neighbors_trinfinite(num_interfaces_trinfinite), &
+            nibool_interfaces_trinfinite(num_interfaces_trinfinite), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_trinfinite etc.')
+    my_neighbors_trinfinite(:) = 0; nibool_interfaces_trinfinite(:) = 0
+  endif
+
+  if (num_interfaces_trinfinite > 0) then
+    call bcast_all_i_for_database(max_nibool_interfaces_trinfinite, 1)
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_trinfinite')
+      ibool_interfaces_trinfinite(:,:) = 0
+    endif
+
+    call bcast_all_i_for_database(my_neighbors_trinfinite(1), size(my_neighbors_trinfinite,kind=4))
+    call bcast_all_i_for_database(nibool_interfaces_trinfinite(1), size(nibool_interfaces_trinfinite,kind=4))
+    call bcast_all_i_for_database(ibool_interfaces_trinfinite(1,1), size(ibool_interfaces_trinfinite,kind=4))
+  else
+    ! dummy array
+    max_nibool_interfaces_trinfinite = 0
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_trinfinite(0,0),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_trinfinite')
+      ibool_interfaces_trinfinite(:,:) = 0
+    endif
+  endif
+
+  ! inner / outer elements
+  call bcast_all_i_for_database(nspec_inner_trinfinite, 1)
+  call bcast_all_i_for_database(nspec_outer_trinfinite, 1)
+  call bcast_all_i_for_database(num_phase_ispec_trinfinite, 1)
+  if (num_phase_ispec_trinfinite < 0 ) &
+    call exit_mpi(myrank,'Error num_phase_ispec_trinfinite is < zero')
+
+  if (.not. I_should_read_the_database) then
+    allocate(phase_ispec_inner_trinfinite(num_phase_ispec_trinfinite,2), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_trinfinite')
+    phase_ispec_inner_trinfinite(:,:) = 0
+  endif
+
+  if (num_phase_ispec_trinfinite > 0) then
+    call bcast_all_i_for_database(phase_ispec_inner_trinfinite(1,1), size(phase_ispec_inner_trinfinite,kind=4))
+  endif
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    call bcast_all_i_for_database(num_colors_outer_trinfinite, 1)
+    call bcast_all_i_for_database(num_colors_inner_trinfinite, 1)
+
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_trinfinite(num_colors_outer_trinfinite + num_colors_inner_trinfinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_trinfinite array')
+      num_elem_colors_trinfinite(:) = 0
+    endif
+
+    call bcast_all_i_for_database(num_elem_colors_trinfinite(1), size(num_elem_colors_trinfinite,kind=4))
+  else
+    ! allocates dummy arrays
+    num_colors_outer_trinfinite = 0
+    num_colors_inner_trinfinite = 0
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_trinfinite(num_colors_outer_trinfinite + num_colors_inner_trinfinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_trinfinite array')
+      num_elem_colors_trinfinite(:) = 0
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_MPI_TRINF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_MPI_INF()
+
+  use specfem_par
+  use specfem_par_infinite
+  use specfem_par_full_gravity
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! MPI interfaces
+  call bcast_all_i_for_database(num_interfaces_infinite, 1)
+  if (.not. I_should_read_the_database) then
+    allocate(my_neighbors_infinite(num_interfaces_infinite), &
+            nibool_interfaces_infinite(num_interfaces_infinite), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_infinite etc.')
+    my_neighbors_infinite(:) = 0; nibool_interfaces_infinite(:) = 0
+  endif
+
+  if (num_interfaces_infinite > 0) then
+    call bcast_all_i_for_database(max_nibool_interfaces_infinite, 1)
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_infinite')
+      ibool_interfaces_infinite(:,:) = 0
+    endif
+
+    call bcast_all_i_for_database(my_neighbors_infinite(1), size(my_neighbors_infinite,kind=4))
+    call bcast_all_i_for_database(nibool_interfaces_infinite(1), size(nibool_interfaces_infinite,kind=4))
+    call bcast_all_i_for_database(ibool_interfaces_infinite(1,1), size(ibool_interfaces_infinite,kind=4))
+  else
+    ! dummy array
+    max_nibool_interfaces_infinite = 0
+    if (.not. I_should_read_the_database) then
+      allocate(ibool_interfaces_infinite(0,0),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_infinite')
+      ibool_interfaces_infinite(:,:) = 0
+    endif
+  endif
+
+  ! inner / outer elements
+  call bcast_all_i_for_database(nspec_inner_infinite, 1)
+  call bcast_all_i_for_database(nspec_outer_infinite, 1)
+  call bcast_all_i_for_database(num_phase_ispec_infinite, 1)
+  if (num_phase_ispec_infinite < 0 ) &
+    call exit_mpi(myrank,'Error num_phase_ispec_infinite is < zero')
+
+  if (.not. I_should_read_the_database) then
+    allocate(phase_ispec_inner_infinite(num_phase_ispec_infinite,2), &
+            stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_infinite')
+    phase_ispec_inner_infinite(:,:) = 0
+  endif
+
+  if (num_phase_ispec_infinite > 0) then
+    call bcast_all_i_for_database(phase_ispec_inner_infinite(1,1), size(phase_ispec_inner_infinite,kind=4))
+  endif
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    call bcast_all_i_for_database(num_colors_outer_infinite, 1)
+    call bcast_all_i_for_database(num_colors_inner_infinite, 1)
+
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_infinite(num_colors_outer_infinite + num_colors_inner_infinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_infinite array')
+      num_elem_colors_infinite(:) = 0
+    endif
+
+    call bcast_all_i_for_database(num_elem_colors_infinite(1), size(num_elem_colors_infinite,kind=4))
+  else
+    ! allocates dummy arrays
+    num_colors_outer_infinite = 0
+    num_colors_inner_infinite = 0
+    if (.not. I_should_read_the_database) then
+      allocate(num_elem_colors_infinite(num_colors_outer_infinite + num_colors_inner_infinite), &
+              stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_infinite array')
+      num_elem_colors_infinite(:) = 0
+    endif
+  endif
+
+  end subroutine bcast_mesh_databases_MPI_INF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine bcast_mesh_databases_stacey()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! crust and mantle
+  if (NSPEC_CRUST_MANTLE > 0) then
+    call bcast_all_i_for_database(num_abs_boundary_faces_crust_mantle,1)
+
+    if (.not. I_should_read_the_database) then
+      ! allocates absorbing boundary arrays
+      if (num_abs_boundary_faces_crust_mantle > 0) then
+        allocate(abs_boundary_ispec_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
+        allocate(abs_boundary_ijk_crust_mantle(3,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
+        allocate(abs_boundary_jacobian2Dw_crust_mantle(NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
+        allocate(abs_boundary_normal_crust_mantle(NDIM,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_normal')
+        allocate(abs_boundary_npoin_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
+        if (ier /= 0) stop 'Error allocating array abs_boundary_ispec etc.'
+      else
+        ! dummy arrays
+        allocate(abs_boundary_ispec_crust_mantle(1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
+        allocate(abs_boundary_ijk_crust_mantle(1,1,1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
+        allocate(abs_boundary_jacobian2Dw_crust_mantle(1,1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
+        allocate(abs_boundary_normal_crust_mantle(1,1,1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_normal')
+        allocate(abs_boundary_npoin_crust_mantle(1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
+      endif
+      abs_boundary_ispec_crust_mantle(:) = 0; abs_boundary_npoin_crust_mantle(:) = 0
+      abs_boundary_ijk_crust_mantle(:,:,:) = 0
+      abs_boundary_jacobian2Dw_crust_mantle(:,:) = 0.0; abs_boundary_normal_crust_mantle(:,:,:) = 0.0
+    endif
+    call bcast_all_i_for_database(abs_boundary_ispec_crust_mantle(1), size(abs_boundary_ispec_crust_mantle,kind=4))
+    call bcast_all_i_for_database(abs_boundary_npoin_crust_mantle(1), size(abs_boundary_npoin_crust_mantle,kind=4))
+    call bcast_all_i_for_database(abs_boundary_ijk_crust_mantle(1,1,1), size(abs_boundary_ijk_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(abs_boundary_jacobian2Dw_crust_mantle(1,1), size(abs_boundary_jacobian2Dw_crust_mantle,kind=4))
+    call bcast_all_cr_for_database(abs_boundary_normal_crust_mantle(1,1,1), size(abs_boundary_normal_crust_mantle,kind=4))
+  endif
+
+  ! outer core
+  if (NSPEC_OUTER_CORE > 0) then
+    call bcast_all_i_for_database(num_abs_boundary_faces_outer_core,1)
+
+    if (.not. I_should_read_the_database) then
+      ! allocates absorbing boundary arrays
+      if (num_abs_boundary_faces_outer_core > 0) then
+        allocate(abs_boundary_ispec_outer_core(num_abs_boundary_faces_outer_core),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
+        allocate(abs_boundary_ijk_outer_core(3,NGLLSQUARE,num_abs_boundary_faces_outer_core),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
+        allocate(abs_boundary_jacobian2Dw_outer_core(NGLLSQUARE,num_abs_boundary_faces_outer_core),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
+        allocate(abs_boundary_npoin_outer_core(num_abs_boundary_faces_outer_core),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
+        if (ier /= 0) stop 'Error allocating array abs_boundary_ispec etc.'
+      else
+        ! dummy arrays
+        allocate(abs_boundary_ispec_outer_core(1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
+        allocate(abs_boundary_ijk_outer_core(1,1,1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
+        allocate(abs_boundary_jacobian2Dw_outer_core(1,1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
+        allocate(abs_boundary_npoin_outer_core(1),stat=ier)
+        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
+      endif
+      abs_boundary_ispec_outer_core(:) = 0; abs_boundary_npoin_outer_core(:) = 0
+      abs_boundary_ijk_outer_core(:,:,:) = 0
+      abs_boundary_jacobian2Dw_outer_core(:,:) = 0.0
+    endif
+    call bcast_all_i_for_database(abs_boundary_ispec_outer_core(1), size(abs_boundary_ispec_outer_core,kind=4))
+    call bcast_all_i_for_database(abs_boundary_npoin_outer_core(1), size(abs_boundary_npoin_outer_core,kind=4))
+    call bcast_all_i_for_database(abs_boundary_ijk_outer_core(1,1,1), size(abs_boundary_ijk_outer_core,kind=4))
+    call bcast_all_cr_for_database(abs_boundary_jacobian2Dw_outer_core(1,1), size(abs_boundary_jacobian2Dw_outer_core,kind=4))
+  endif
+
+  end subroutine bcast_mesh_databases_stacey
+
diff --git a/src/specfem3D/initialize_simulation.F90 b/src/specfem3D/initialize_simulation.F90
index 807380f2e..f2c81d7f2 100644
--- a/src/specfem3D/initialize_simulation.F90
+++ b/src/specfem3D/initialize_simulation.F90
@@ -267,6 +267,17 @@ subroutine initialize_simulation()
     COMPUTE_AND_STORE_STRAIN = .false.
   endif
 
+  ! no full gravity
+  if (.not. FULL_GRAVITY) then
+    ! zero-out region sizes
+    ! transition-to-infinite
+    NSPEC_TRINFINITE = 0
+    NGLOB_TRINFINITE = 0
+    ! infinite
+    NSPEC_INFINITE = 0
+    NGLOB_INFINITE = 0
+  endif
+
   ! checks flags
   call initialize_simulation_check()
 
@@ -595,8 +606,6 @@ subroutine initialize_simulation_check()
         .or. NGLOB_INFINITE_ADJOINT /= NGLOB_INFINITE) &
         call exit_MPI(myrank, 'improper dimensions of adjoint arrays for infinite regions, please recompile solver')
     endif
-    ! safety stop
-    stop 'FULL_GRAVITY is not fully implemented yet, please set it to .false. for now...'
   endif
 
   end subroutine initialize_simulation_check
diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90
index 869e57120..79ba3fd0f 100644
--- a/src/specfem3D/prepare_timerun.F90
+++ b/src/specfem3D/prepare_timerun.F90
@@ -63,6 +63,9 @@ subroutine prepare_timerun()
   ! precomputes gravity factors
   call prepare_gravity()
 
+  ! full gravity preparation
+  if (FULL_GRAVITY) call SIEM_prepare_solver()
+
   ! precomputes attenuation factors
   call prepare_attenuation()
 
diff --git a/src/specfem3D/read_arrays_solver.f90 b/src/specfem3D/read_arrays_solver.f90
index 25e65c795..e429bbfc3 100644
--- a/src/specfem3D/read_arrays_solver.f90
+++ b/src/specfem3D/read_arrays_solver.f90
@@ -30,7 +30,8 @@
   subroutine read_arrays_solver(iregion_code, &
                                 nspec,nglob,nglob_xy, &
                                 nspec_iso,nspec_tiso,nspec_ani, &
-                                rho_vp,rho_vs,xstore,ystore,zstore, &
+                                rho_vp,rho_vs, &
+                                xstore,ystore,zstore, &
                                 xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                 rhostore, kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
                                 c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
@@ -142,6 +143,12 @@ subroutine read_arrays_solver(iregion_code, &
   read(IIN) gammay
   read(IIN) gammaz
 
+  ! checks if anything else to do for infinite regions
+  if (iregion_code == IREGION_TRINFINITE .or. iregion_code == IREGION_INFINITE) then
+    close(IIN)
+    return
+  endif
+
   ! model arrays
   read(IIN) rhostore
   read(IIN) kappavstore
diff --git a/src/specfem3D/read_arrays_solver_adios.F90 b/src/specfem3D/read_arrays_solver_adios.F90
index 5782f63a8..209887b23 100644
--- a/src/specfem3D/read_arrays_solver_adios.F90
+++ b/src/specfem3D/read_arrays_solver_adios.F90
@@ -27,10 +27,12 @@
 
 !===============================================================================
 !> \brief Read adios arrays created by the mesher (file: regX_solver_data.bp)
+
   subroutine read_arrays_solver_adios(iregion_code, &
                                       nspec,nglob,nglob_xy, &
                                       nspec_iso,nspec_tiso,nspec_ani, &
-                                      rho_vp,rho_vs,xstore,ystore,zstore, &
+                                      rho_vp,rho_vs, &
+                                      xstore,ystore,zstore, &
                                       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                                       rhostore, kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
                                       c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
@@ -215,6 +217,23 @@ subroutine read_arrays_solver_adios(iregion_code, &
   ! perform actual reading
   call read_adios_perform(myadios_file)
 
+  ! checks if anything else to do for infinite regions
+  if (iregion_code == IREGION_TRINFINITE .or. iregion_code == IREGION_INFINITE) then
+    ! ends step for this region
+    call read_adios_end_step(myadios_file)
+
+    ! Clean everything and close the ADIOS file
+    do i = 1, sel_num
+      sel => selections(i)
+      call delete_adios_selection(sel)
+    enddo
+
+    ! closes adios file & cleans/removes group object
+    call close_file_adios_read_and_finalize_method(myadios_file)
+    call delete_adios_group(myadios_group,"SolverReader")
+    return
+  endif
+
   local_dim = NGLLX * NGLLY * NGLLZ * nspec_iso  ! see read_mesh_databases for settings of nspec_iso
   start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
   sel_num = sel_num+1
@@ -391,7 +410,6 @@ subroutine read_arrays_solver_adios(iregion_code, &
   ! perform actual reading
   call read_adios_perform(myadios_file)
 
-
   if ((ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION_VAL .and. iregion_code == IREGION_CRUST_MANTLE) .or. &
       (ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION_VAL .and. iregion_code == IREGION_INNER_CORE)) then
     local_dim = nglob_xy
@@ -418,14 +436,13 @@ subroutine read_arrays_solver_adios(iregion_code, &
 
     call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
                                    trim(region_name) // "rmass_ocean_load/array", rmass_ocean_load)
+    ! perform actual reading
+    call read_adios_perform(myadios_file)
   endif
 
   ! ends step for this region
   call read_adios_end_step(myadios_file)
 
-  ! perform actual reading
-  call read_adios_perform(myadios_file)
-
   ! Clean everything and close the ADIOS file
   do i = 1, sel_num
     sel => selections(i)
diff --git a/src/specfem3D/read_mesh_databases.F90 b/src/specfem3D/read_mesh_databases.F90
index f9df360fe..f12e7eedb 100644
--- a/src/specfem3D/read_mesh_databases.F90
+++ b/src/specfem3D/read_mesh_databases.F90
@@ -87,6 +87,27 @@ subroutine read_mesh_databases()
   endif
   call read_mesh_databases_IC()
 
+  ! full gravity arrays
+  if (FULL_GRAVITY_VAL) then
+    ! transition-to-infinite region
+    if (ADD_TRINF) then
+      if (SYNC_READING) call synchronize_all()
+      if (myrank == 0) then
+        write(IMAIN,*) '  reading in transition-to-infinite databases...'
+        call flush_IMAIN()
+      endif
+      call read_mesh_databases_TRINF()
+    endif
+
+    ! infinite region
+    if (SYNC_READING) call synchronize_all()
+    if (myrank == 0) then
+      write(IMAIN,*) '  reading in infinite databases...'
+      call flush_IMAIN()
+    endif
+    call read_mesh_databases_INF()
+  endif
+
   ! reads "boundary.bin" files to couple mantle with outer core and inner core boundaries
   if (SYNC_READING) call synchronize_all()
   if (myrank == 0) then
@@ -821,6 +842,266 @@ subroutine read_mesh_databases_IC()
 
   end subroutine read_mesh_databases_IC
 
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine read_mesh_databases_TRINF()
+
+! mesh for TRINFINITE region
+
+  use specfem_par
+  use specfem_par_trinfinite
+  implicit none
+
+  ! local parameters
+  integer :: nspec_iso,nspec_tiso,nspec_ani,NGLOB_XY_dummy
+  integer :: ier
+
+  ! dummy array that does not need to be actually read
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+  real(kind=CUSTOM_REAL), dimension(1) :: dummy_rmass
+
+  logical, dimension(:), allocatable :: dummy_ispec_is_tiso
+  integer, dimension(:), allocatable :: dummy_idoubling
+
+  ! transition-to-infinite region
+  ! (no isotropy, no tiso, no anisotropy)
+  ! (no rmass)
+  nspec_iso = 0
+  nspec_tiso = 0
+  nspec_ani = 0
+
+  ! dummy allocation
+  allocate(dummy_ispec_is_tiso(NSPEC_TRINFINITE), &
+           dummy_idoubling(NSPEC_TRINFINITE), &
+           stat=ier)
+  if (ier /= 0) stop 'Error allocating dummy rmass and dummy ispec/idoubling in trinfinite region'
+  dummy_ispec_is_tiso(:) = .false.; dummy_idoubling(:) = 0
+
+  ! x/y/z locations
+  allocate(xstore_trinfinite(NGLOB_TRINFINITE), &
+           ystore_trinfinite(NGLOB_TRINFINITE), &
+           zstore_trinfinite(NGLOB_TRINFINITE),stat=ier)
+  if (ier /= 0) stop 'Error allocating x/y/zstore in trinfinite'
+  xstore_trinfinite(:) = 0.0; ystore_trinfinite(:) = 0.0; zstore_trinfinite(:) = 0.0
+
+  allocate(ibool_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           xix_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           xiy_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           xiz_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           etax_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           etay_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           etaz_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           gammax_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           gammay_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE), &
+           gammaz_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE),stat=ier)
+  if (ier /= 0) stop 'Error allocating arrays ibool_trinfinite,..'
+  ibool_trinfinite(:,:,:,:) = 0
+  xix_trinfinite(:,:,:,:) = 0.0; xiy_trinfinite(:,:,:,:) = 0.0; xiz_trinfinite(:,:,:,:) = 0.0
+  etax_trinfinite(:,:,:,:) = 0.0; etay_trinfinite(:,:,:,:) = 0.0; etaz_trinfinite(:,:,:,:) = 0.0
+  gammax_trinfinite(:,:,:,:) = 0.0; gammay_trinfinite(:,:,:,:) = 0.0; gammaz_trinfinite(:,:,:,:) = 0.0
+
+  ! dummy rmass
+  NGLOB_XY_dummy = 0
+  dummy_rmass(:) = 0.0
+
+  ! reads in arrays
+  if (I_should_read_the_database .and. NSPEC_TRINFINITE > 0) then
+    if (ADIOS_FOR_ARRAYS_SOLVER) then
+      call read_arrays_solver_adios(IREGION_TRINFINITE, &
+                                    NSPEC_TRINFINITE,NGLOB_TRINFINITE,NGLOB_XY_dummy, &
+                                    nspec_iso,nspec_tiso,nspec_ani, &
+                                    dummy_array,dummy_array, &
+                                    xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+                                    xix_trinfinite,xiy_trinfinite,xiz_trinfinite, &
+                                    etax_trinfinite,etay_trinfinite,etaz_trinfinite, &
+                                    gammax_trinfinite,gammay_trinfinite,gammaz_trinfinite, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array, &
+                                    ibool_trinfinite,dummy_idoubling,dummy_ispec_is_tiso, &
+                                    dummy_rmass,dummy_rmass,dummy_rmass, &
+                                    1,dummy_array, &
+                                    dummy_rmass,dummy_rmass)
+    else
+      call read_arrays_solver(IREGION_TRINFINITE, &
+                                    NSPEC_TRINFINITE,NGLOB_TRINFINITE,NGLOB_XY_dummy, &
+                                    nspec_iso,nspec_tiso,nspec_ani, &
+                                    dummy_array,dummy_array, &
+                                    xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+                                    xix_trinfinite,xiy_trinfinite,xiz_trinfinite, &
+                                    etax_trinfinite,etay_trinfinite,etaz_trinfinite, &
+                                    gammax_trinfinite,gammay_trinfinite,gammaz_trinfinite, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array, &
+                                    ibool_trinfinite,dummy_idoubling,dummy_ispec_is_tiso, &
+                                    dummy_rmass,dummy_rmass,dummy_rmass, &
+                                    1,dummy_array, &
+                                    dummy_rmass,dummy_rmass)
+    endif
+  endif
+  ! broadcast to other processes
+  call bcast_mesh_databases_TRINF()
+
+  ! free dummy arrays
+  deallocate(dummy_ispec_is_tiso,dummy_idoubling)
+
+  ! check
+  if (NSPEC_TRINFINITE > 0) then
+    ! check that the number of points in this slice is correct
+    if (minval(ibool_trinfinite(:,:,:,:)) /= 1 .or. &
+        maxval(ibool_trinfinite(:,:,:,:)) /= NGLOB_TRINFINITE) &
+      call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in transition infinite region')
+  endif
+
+  end subroutine read_mesh_databases_TRINF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine read_mesh_databases_INF()
+
+! mesh for INFINITE region
+
+  use specfem_par
+  use specfem_par_infinite
+  implicit none
+
+  ! local parameters
+  integer :: nspec_iso,nspec_tiso,nspec_ani,NGLOB_XY_dummy
+  integer :: ier
+
+  ! dummy array that does not need to be actually read
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+  real(kind=CUSTOM_REAL), dimension(1) :: dummy_rmass
+
+  logical, dimension(:), allocatable :: dummy_ispec_is_tiso
+  integer, dimension(:), allocatable :: dummy_idoubling
+
+  ! infinite region
+  ! (no isotropy, no tiso, no anisotropy)
+  ! (no rmass)
+  nspec_iso = 0
+  nspec_tiso = 0
+  nspec_ani = 0
+
+  ! dummy allocation
+  allocate(dummy_ispec_is_tiso(NSPEC_INFINITE), &
+           dummy_idoubling(NSPEC_INFINITE), &
+           stat=ier)
+  if (ier /= 0) stop 'Error allocating dummy rmass and dummy ispec/idoubling in infinite region'
+  dummy_ispec_is_tiso(:) = .false.; dummy_idoubling(:) = 0
+
+  ! x/y/z locations
+  allocate(xstore_infinite(NGLOB_INFINITE), &
+           ystore_infinite(NGLOB_INFINITE), &
+           zstore_infinite(NGLOB_INFINITE),stat=ier)
+  if (ier /= 0) stop 'Error allocating x/y/zstore in infinite'
+  xstore_infinite(:) = 0.0; ystore_infinite(:) = 0.0; zstore_infinite(:) = 0.0
+
+  allocate(ibool_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           xix_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           xiy_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           xiz_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           etax_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           etay_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           etaz_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           gammax_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           gammay_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE), &
+           gammaz_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE),stat=ier)
+  if (ier /= 0) stop 'Error allocating arrays ibool_infinite,..'
+  ibool_infinite(:,:,:,:) = 0
+  xix_infinite(:,:,:,:) = 0.0; xiy_infinite(:,:,:,:) = 0.0; xiz_infinite(:,:,:,:) = 0.0
+  etax_infinite(:,:,:,:) = 0.0; etay_infinite(:,:,:,:) = 0.0; etaz_infinite(:,:,:,:) = 0.0
+  gammax_infinite(:,:,:,:) = 0.0; gammay_infinite(:,:,:,:) = 0.0; gammaz_infinite(:,:,:,:) = 0.0
+
+  ! dummy rmass
+  NGLOB_XY_dummy = 0
+  dummy_rmass(:) = 0.0
+
+  ! reads in arrays
+  if (I_should_read_the_database .and. NSPEC_INFINITE > 0) then
+    if (ADIOS_FOR_ARRAYS_SOLVER) then
+      call read_arrays_solver_adios(IREGION_INFINITE, &
+                                    NSPEC_INFINITE,NGLOB_INFINITE,NGLOB_XY_dummy, &
+                                    nspec_iso,nspec_tiso,nspec_ani, &
+                                    dummy_array,dummy_array, &
+                                    xstore_infinite,ystore_infinite,zstore_infinite, &
+                                    xix_infinite,xiy_infinite,xiz_infinite, &
+                                    etax_infinite,etay_infinite,etaz_infinite, &
+                                    gammax_infinite,gammay_infinite,gammaz_infinite, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array, &
+                                    ibool_infinite,dummy_idoubling,dummy_ispec_is_tiso, &
+                                    dummy_rmass,dummy_rmass,dummy_rmass, &
+                                    1,dummy_array, &
+                                    dummy_rmass,dummy_rmass)
+    else
+      call read_arrays_solver(IREGION_INFINITE, &
+                                    NSPEC_INFINITE,NGLOB_INFINITE,NGLOB_XY_dummy, &
+                                    nspec_iso,nspec_tiso,nspec_ani, &
+                                    dummy_array,dummy_array, &
+                                    xstore_infinite,ystore_infinite,zstore_infinite, &
+                                    xix_infinite,xiy_infinite,xiz_infinite, &
+                                    etax_infinite,etay_infinite,etaz_infinite, &
+                                    gammax_infinite,gammay_infinite,gammaz_infinite, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array,dummy_array,dummy_array, &
+                                    dummy_array, &
+                                    ibool_infinite,dummy_idoubling,dummy_ispec_is_tiso, &
+                                    dummy_rmass,dummy_rmass,dummy_rmass, &
+                                    1,dummy_array, &
+                                    dummy_rmass,dummy_rmass)
+    endif
+  endif
+  ! broadcast to other processes
+  call bcast_mesh_databases_INF()
+
+  ! free dummy arrays
+  deallocate(dummy_ispec_is_tiso,dummy_idoubling)
+
+  ! check
+  if (NSPEC_INFINITE > 0) then
+    ! check that the number of points in this slice is correct
+    if (minval(ibool_infinite(:,:,:,:)) /= 1 .or. &
+       maxval(ibool_infinite(:,:,:,:)) /= NGLOB_INFINITE) &
+      call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in infinite region')
+  endif
+
+  end subroutine read_mesh_databases_INF
+
 !
 !-------------------------------------------------------------------------------------------------
 !
@@ -834,6 +1115,9 @@ subroutine read_mesh_databases_coupling()
   use specfem_par_innercore
   use specfem_par_outercore
 
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+
   implicit none
 
   ! local parameters
@@ -922,6 +1206,34 @@ subroutine read_mesh_databases_coupling()
   nspec2D_xmin_inner_core = 0; nspec2D_xmax_inner_core = 0
   nspec2D_ymin_inner_core = 0; nspec2D_ymax_inner_core = 0
 
+  ! infinite regions
+  if (FULL_GRAVITY) then
+    ! transition-to-infinite
+    if (ADD_TRINF) then
+      allocate(ibelm_xmin_trinfinite(NSPEC2DMAX_XMIN_XMAX_TRINF),ibelm_xmax_trinfinite(NSPEC2DMAX_XMIN_XMAX_TRINF), &
+               ibelm_ymin_trinfinite(NSPEC2DMAX_YMIN_YMAX_TRINF),ibelm_ymax_trinfinite(NSPEC2DMAX_YMIN_YMAX_TRINF), &
+               ibelm_bottom_trinfinite(NSPEC2D_BOTTOM_TRINF),ibelm_top_trinfinite(NSPEC2D_TOP_TRINF),stat=ier)
+      if (ier /= 0) stop 'Error allocating arrays ibelm_xmin_trinfinite,..'
+      ibelm_xmin_trinfinite(:) = 0; ibelm_xmax_trinfinite(:) = 0
+      ibelm_ymin_trinfinite(:) = 0; ibelm_ymax_trinfinite(:) = 0
+      ibelm_bottom_trinfinite(:) = 0; ibelm_top_trinfinite(:) = 0
+    endif
+    nspec2D_xmin_trinfinite = 0; nspec2D_xmax_trinfinite = 0
+    nspec2D_ymin_trinfinite = 0; nspec2D_ymax_trinfinite = 0
+
+    ! infinite
+    allocate(ibelm_xmin_infinite(NSPEC2DMAX_XMIN_XMAX_INF),ibelm_xmax_infinite(NSPEC2DMAX_XMIN_XMAX_INF), &
+             ibelm_ymin_infinite(NSPEC2DMAX_YMIN_YMAX_INF),ibelm_ymax_infinite(NSPEC2DMAX_YMIN_YMAX_INF), &
+             ibelm_bottom_infinite(NSPEC2D_BOTTOM_INF),ibelm_top_infinite(NSPEC2D_TOP_INF),stat=ier)
+    if (ier /= 0) stop 'Error allocating arrays ibelm_xmin_infinite,..'
+    ibelm_xmin_infinite(:) = 0; ibelm_xmax_infinite(:) = 0
+    ibelm_ymin_infinite(:) = 0; ibelm_ymax_infinite(:) = 0
+    ibelm_bottom_infinite(:) = 0; ibelm_top_infinite(:) = 0
+
+    nspec2D_xmin_infinite = 0; nspec2D_xmax_infinite = 0
+    nspec2D_ymin_infinite = 0; nspec2D_ymax_infinite = 0
+  endif
+
   ! reads in arrays
   if (I_should_read_the_database) then
     if (ADIOS_FOR_ARRAYS_SOLVER) then
@@ -1044,6 +1356,69 @@ subroutine read_mesh_databases_coupling()
         close(IIN)
       endif
 
+      ! full gravity
+      if (FULL_GRAVITY) then
+        if (ADD_TRINF) then
+          !
+          ! transition-to-infinite
+          !
+          if (NSPEC_TRINFINITE > 0) then
+            ! create name of database
+            call create_name_database(prname,myrank,IREGION_TRINFINITE,LOCAL_PATH)
+
+            ! read info
+            open(unit=IIN,file=prname(1:len_trim(prname))//'boundary.bin', &
+                  status='old',form='unformatted',action='read',iostat=ier)
+            if (ier /= 0 ) call exit_mpi(myrank,'Error opening trinfinite boundary.bin file')
+
+            read(IIN) nspec2D_xmin_trinfinite
+            read(IIN) nspec2D_xmax_trinfinite
+            read(IIN) nspec2D_ymin_trinfinite
+            read(IIN) nspec2D_ymax_trinfinite
+            read(IIN) njunk1
+            read(IIN) njunk2
+
+            ! boundary parameters
+            read(IIN) ibelm_xmin_trinfinite
+            read(IIN) ibelm_xmax_trinfinite
+            read(IIN) ibelm_ymin_trinfinite
+            read(IIN) ibelm_ymax_trinfinite
+            read(IIN) ibelm_bottom_trinfinite
+            read(IIN) ibelm_top_trinfinite
+            close(IIN)
+          endif
+        endif
+
+        !
+        ! infinite
+        !
+        if (NSPEC_INFINITE > 0) then
+          ! create name of database
+          call create_name_database(prname,myrank,IREGION_INFINITE,LOCAL_PATH)
+
+          ! read info
+          open(unit=IIN,file=prname(1:len_trim(prname))//'boundary.bin', &
+                status='old',form='unformatted',action='read',iostat=ier)
+          if (ier /= 0 ) call exit_mpi(myrank,'Error opening infinite boundary.bin file')
+
+          read(IIN) nspec2D_xmin_infinite
+          read(IIN) nspec2D_xmax_infinite
+          read(IIN) nspec2D_ymin_infinite
+          read(IIN) nspec2D_ymax_infinite
+          read(IIN) njunk1
+          read(IIN) njunk2
+
+          ! boundary parameters
+          read(IIN) ibelm_xmin_infinite
+          read(IIN) ibelm_xmax_infinite
+          read(IIN) ibelm_ymin_infinite
+          read(IIN) ibelm_ymax_infinite
+          read(IIN) ibelm_bottom_infinite
+          read(IIN) ibelm_top_infinite
+          close(IIN)
+        endif
+      endif
+
       ! -- Boundary Mesh for crust and mantle ---
       if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
         if (NSPEC_CRUST_MANTLE > 0) then
@@ -1099,6 +1474,23 @@ subroutine read_mesh_databases_coupling()
       nspec2D_ymax_inner_core < 0 .or. nspec2d_ymax_inner_core > NSPEC2DMAX_YMIN_YMAX_IC ) &
     call exit_mpi(myrank, 'Error reading inner core boundary')
 
+  if (FULL_GRAVITY) then
+    if (ADD_TRINF) then
+      ! transition-to-infinite
+      if (nspec2D_xmin_trinfinite < 0 .or. nspec2d_xmin_trinfinite > NSPEC2DMAX_XMIN_XMAX_TRINF .or. &
+          nspec2D_xmax_trinfinite < 0 .or. nspec2d_xmax_trinfinite > NSPEC2DMAX_XMIN_XMAX_TRINF .or. &
+          nspec2D_ymin_trinfinite < 0 .or. nspec2d_ymin_trinfinite > NSPEC2DMAX_YMIN_YMAX_TRINF .or. &
+          nspec2D_ymax_trinfinite < 0 .or. nspec2d_ymax_trinfinite > NSPEC2DMAX_YMIN_YMAX_TRINF ) &
+        call exit_mpi(myrank, 'Error reading trinfinite boundary')
+    endif
+    ! infinite
+    if (nspec2D_xmin_infinite < 0 .or. nspec2d_xmin_infinite > NSPEC2DMAX_XMIN_XMAX_INF .or. &
+        nspec2D_xmax_infinite < 0 .or. nspec2d_xmax_infinite > NSPEC2DMAX_XMIN_XMAX_INF .or. &
+        nspec2D_ymin_infinite < 0 .or. nspec2d_ymin_infinite > NSPEC2DMAX_YMIN_YMAX_INF .or. &
+        nspec2D_ymax_infinite < 0 .or. nspec2d_ymax_infinite > NSPEC2DMAX_YMIN_YMAX_INF ) &
+      call exit_mpi(myrank, 'Error reading infinite boundary')
+  endif
+
   ! Boundary Mesh for crust and mantle
   if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
     allocate(moho_kl(NGLLX,NGLLY,NSPEC2D_MOHO), &
@@ -1247,6 +1639,9 @@ subroutine read_mesh_databases_MPI()
   use specfem_par_crustmantle
   use specfem_par_innercore
   use specfem_par_outercore
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+  use specfem_par_full_gravity
   implicit none
 
   ! local parameters
@@ -1257,12 +1652,16 @@ subroutine read_mesh_databases_MPI()
   integer :: shape_2d(2), shape_3d(3)
 
   ! read MPI interfaces from file
-
   num_interfaces_crust_mantle = 0
   num_interfaces_outer_core = 0
   num_interfaces_inner_core = 0
 
+  num_interfaces_trinfinite = 0
+  num_interfaces_infinite = 0
+
+  !
   ! crust mantle
+  !
   if (I_should_read_the_database .and. NSPEC_CRUST_MANTLE > 0) then
     if (ADIOS_FOR_MPI_ARRAYS) then
       call read_mesh_databases_MPI_adios(IREGION_CRUST_MANTLE)
@@ -1357,7 +1756,9 @@ subroutine read_mesh_databases_MPI()
     b_request_send_vector_cm(:) = 0; b_request_recv_vector_cm(:) = 0
   endif
 
+  !
   ! outer core
+  !
   if (I_should_read_the_database .and. NSPEC_OUTER_CORE > 0) then
     if (ADIOS_FOR_MPI_ARRAYS) then
       call read_mesh_databases_MPI_adios(IREGION_OUTER_CORE)
@@ -1449,7 +1850,9 @@ subroutine read_mesh_databases_MPI()
     b_request_send_scalar_oc(:) = 0; b_request_recv_scalar_oc(:) = 0
   endif
 
+  !
   ! inner core
+  !
   if (I_should_read_the_database .and. NSPEC_INNER_CORE > 0) then
     if (ADIOS_FOR_MPI_ARRAYS) then
       call read_mesh_databases_MPI_adios(IREGION_INNER_CORE)
@@ -1544,6 +1947,199 @@ subroutine read_mesh_databases_MPI()
     b_request_send_vector_ic(:) = 0; b_request_recv_vector_ic(:) = 0
   endif
 
+  ! full gravity
+  if (FULL_GRAVITY) then
+    if (ADD_TRINF) then
+      !
+      ! transition-to-infinite
+      !
+      if (I_should_read_the_database .and. NSPEC_TRINFINITE > 0) then
+        if (ADIOS_FOR_MPI_ARRAYS) then
+          call read_mesh_databases_MPI_adios(IREGION_TRINFINITE)
+        else
+          call read_mesh_databases_MPI_TRINF()
+        endif
+      endif
+      call bcast_mesh_databases_MPI_TRINF()
+
+      ! checks interface values read
+      if (NSPEC_TRINFINITE > 0) then
+        if (minval(my_neighbors_trinfinite) < 0 .or. maxval(my_neighbors_trinfinite) >= NPROCTOT) then
+          print *,'Error: invalid MPI neighbors min/max',minval(my_neighbors_trinfinite),maxval(my_neighbors_trinfinite),NPROCTOT
+          call exit_mpi(myrank,'Error invalid MPI neighbors trinfinite')
+        endif
+      endif
+
+      do i = 1,num_interfaces_trinfinite
+        ! number of points on interface
+        num_poin = nibool_interfaces_trinfinite(i)
+        if (num_poin <= 0 .or. num_poin > NGLOB_TRINFINITE) then
+          print *,'Error: invalid nibool_interfaces_trinfinite ',num_poin,'interface',i,'nglob',NGLOB_TRINFINITE
+          call exit_mpi(myrank,'Error invalid nibool_interfaces_trinfinite')
+        endif
+        ! iglob min/max
+        iglob_min = minval(ibool_interfaces_trinfinite(1:num_poin,i))
+        iglob_max = maxval(ibool_interfaces_trinfinite(1:num_poin,i))
+        if (iglob_min <= 0 .or. iglob_max > NGLOB_TRINFINITE) then
+          print *,'Error: invalid ibool_interfaces_trinfinite min/max ',iglob_min,iglob_max,'interface',i,'nglob',NGLOB_TRINFINITE
+          call exit_mpi(myrank,'Error invalid ibool_interfaces_trinfinite')
+        endif
+      enddo
+      ! checks min/max
+      if (num_phase_ispec_trinfinite > 0) then
+        do i = 1,2
+          if (i == 1) then
+            num_elements = nspec_outer_trinfinite
+          else
+            num_elements = nspec_inner_trinfinite
+          endif
+          ispec_min = minval(phase_ispec_inner_trinfinite(1:num_elements,i))
+          ispec_max = maxval(phase_ispec_inner_trinfinite(1:num_elements,i))
+          if (ispec_min <= 0 .or. ispec_max > NSPEC_TRINFINITE) then
+            print *,'Error: invalid phase_ispec_inner_trinfinite min/max ',ispec_min,ispec_max,'phase',i, &
+                    nspec_outer_trinfinite,nspec_inner_trinfinite,'nspec',NSPEC_TRINFINITE
+            call exit_mpi(myrank,'Error invalid phase_ispec_inner_trinfinite')
+          endif
+        enddo
+      endif
+
+      ! MPI buffers
+      if (USE_CUDA_AWARE_MPI) then
+        ! allocates buffers on GPU
+        shape_2d(1) = max_nibool_interfaces_trinfinite
+        shape_2d(2) = num_interfaces_trinfinite
+        call allocate_gpu_buffer_2d(buffer_send_scalar_trinfinite,shape_2d)
+        call allocate_gpu_buffer_2d(buffer_recv_scalar_trinfinite,shape_2d)
+      else
+        ! allocates buffers on CPU
+        allocate(buffer_send_scalar_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite), &
+                 buffer_recv_scalar_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite),stat=ier)
+        if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array buffer_send_scalar_trinfinite etc.')
+        buffer_send_scalar_trinfinite(:,:) = 0.0; buffer_recv_scalar_trinfinite(:,:) = 0.0
+      endif
+      ! request buffers
+      allocate(request_send_scalar_trinfinite(num_interfaces_trinfinite), &
+               request_recv_scalar_trinfinite(num_interfaces_trinfinite),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array request_send_scalar_trinf etc.')
+      request_send_scalar_trinfinite(:) = 0; request_recv_scalar_trinfinite(:) = 0
+
+      if (SIMULATION_TYPE == 3) then
+        if (USE_CUDA_AWARE_MPI) then
+          ! allocates buffers on GPU
+          shape_2d(1) = max_nibool_interfaces_trinfinite
+          shape_2d(2) = num_interfaces_trinfinite
+          call allocate_gpu_buffer_2d(b_buffer_send_scalar_trinfinite,shape_2d)
+          call allocate_gpu_buffer_2d(b_buffer_recv_scalar_trinfinite,shape_2d)
+        else
+          ! allocates buffers on CPU
+          allocate(b_buffer_send_scalar_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite), &
+                   b_buffer_recv_scalar_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite),stat=ier)
+          if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array b_buffer_send_scalar_trinfinite etc.')
+          b_buffer_send_scalar_trinfinite(:,:) = 0.0; b_buffer_recv_scalar_trinfinite(:,:) = 0.0
+        endif
+        ! request buffers
+        allocate(b_request_send_scalar_trinfinite(num_interfaces_trinfinite), &
+                 b_request_recv_scalar_trinfinite(num_interfaces_trinfinite),stat=ier)
+        if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array b_request_send_scalar_trinfinite etc.')
+        b_request_send_scalar_trinfinite(:) = 0; b_request_recv_scalar_trinfinite(:) = 0
+      endif
+    endif
+
+    !
+    ! infinite
+    !
+    if (I_should_read_the_database .and. NSPEC_INFINITE > 0) then
+      if (ADIOS_FOR_MPI_ARRAYS) then
+        call read_mesh_databases_MPI_adios(IREGION_INFINITE)
+      else
+        call read_mesh_databases_MPI_INF()
+      endif
+    endif
+    call bcast_mesh_databases_MPI_INF()
+
+    ! checks interface values read
+    if (NSPEC_INFINITE > 0) then
+      if (minval(my_neighbors_infinite) < 0 .or. maxval(my_neighbors_infinite) >= NPROCTOT) then
+        print *,'Error: invalid MPI neighbors min/max',minval(my_neighbors_infinite),maxval(my_neighbors_infinite),NPROCTOT
+        call exit_mpi(myrank,'Error invalid MPI neighbors infinite')
+      endif
+    endif
+
+    do i = 1,num_interfaces_infinite
+      ! number of points on interface
+      num_poin = nibool_interfaces_infinite(i)
+      if (num_poin <= 0 .or. num_poin > NGLOB_INFINITE) then
+        print *,'Error: invalid nibool_interfaces_infinite ',num_poin,'interface',i,'nglob',NGLOB_INFINITE
+        call exit_mpi(myrank,'Error invalid nibool_interfaces_infinite')
+      endif
+      ! iglob min/max
+      iglob_min = minval(ibool_interfaces_infinite(1:num_poin,i))
+      iglob_max = maxval(ibool_interfaces_infinite(1:num_poin,i))
+      if (iglob_min <= 0 .or. iglob_max > NGLOB_INFINITE) then
+        print *,'Error: invalid ibool_interfaces_infinite min/max ',iglob_min,iglob_max,'interface',i,'nglob',NGLOB_INFINITE
+        call exit_mpi(myrank,'Error invalid ibool_interfaces_infinite')
+      endif
+    enddo
+    ! checks min/max
+    if (num_phase_ispec_infinite > 0) then
+      do i = 1,2
+        if (i == 1) then
+          num_elements = nspec_outer_infinite
+        else
+          num_elements = nspec_inner_infinite
+        endif
+        ispec_min = minval(phase_ispec_inner_infinite(1:num_elements,i))
+        ispec_max = maxval(phase_ispec_inner_infinite(1:num_elements,i))
+        if (ispec_min <= 0 .or. ispec_max > NSPEC_INFINITE) then
+          print *,'Error: invalid phase_ispec_inner_infinite min/max ',ispec_min,ispec_max,'phase',i, &
+                  nspec_outer_infinite,nspec_inner_infinite,'nspec',NSPEC_INFINITE
+          call exit_mpi(myrank,'Error invalid phase_ispec_inner_infinite')
+        endif
+      enddo
+    endif
+
+    ! MPI buffers
+    if (USE_CUDA_AWARE_MPI) then
+      ! allocates buffers on GPU
+      shape_2d(1) = max_nibool_interfaces_infinite
+      shape_2d(2) = num_interfaces_infinite
+      call allocate_gpu_buffer_2d(buffer_send_scalar_infinite,shape_2d)
+      call allocate_gpu_buffer_2d(buffer_recv_scalar_infinite,shape_2d)
+    else
+      ! allocates buffers on CPU
+      allocate(buffer_send_scalar_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite), &
+               buffer_recv_scalar_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array buffer_send_scalar_infinite etc.')
+      buffer_send_scalar_infinite(:,:) = 0.0; buffer_recv_scalar_infinite(:,:) = 0.0
+    endif
+    ! request buffers
+    allocate(request_send_scalar_infinite(num_interfaces_infinite), &
+             request_recv_scalar_infinite(num_interfaces_infinite),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array request_send_scalar_infinite etc.')
+    request_send_scalar_infinite(:) = 0; request_recv_scalar_infinite(:) = 0
+
+    if (SIMULATION_TYPE == 3) then
+      if (USE_CUDA_AWARE_MPI) then
+        ! allocates buffers on GPU
+        shape_2d(1) = max_nibool_interfaces_infinite
+        shape_2d(2) = num_interfaces_infinite
+        call allocate_gpu_buffer_2d(b_buffer_send_scalar_infinite,shape_2d)
+        call allocate_gpu_buffer_2d(b_buffer_recv_scalar_infinite,shape_2d)
+      else
+        ! allocates buffers on CPU
+        allocate(b_buffer_send_scalar_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite), &
+                 b_buffer_recv_scalar_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite),stat=ier)
+        if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array b_buffer_send_scalar_infinite etc.')
+        b_buffer_send_scalar_infinite(:,:) = 0.0; b_buffer_recv_scalar_infinite(:,:) = 0.0
+      endif
+      ! request buffers
+      allocate(b_request_send_scalar_infinite(num_interfaces_infinite), &
+               b_request_recv_scalar_infinite(num_interfaces_infinite),stat=ier)
+      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array b_request_send_scalar_infinite etc.')
+      b_request_send_scalar_infinite(:) = 0; b_request_recv_scalar_infinite(:) = 0
+    endif
+  endif
+
   ! user output
   if (myrank == 0) then
     write(IMAIN,*) '  for overlapping of communications with calculations:'
@@ -1566,6 +2162,22 @@ subroutine read_mesh_databases_MPI()
       write(IMAIN,*) '  percentage of volume elements in inner core ',100. - percentage_edge,'%'
       write(IMAIN,*)
     endif
+
+    if (FULL_GRAVITY) then
+      if (ADD_TRINF .and. NSPEC_TRINFINITE > 0) then
+        percentage_edge = 100. * nspec_trinfinite / real(NSPEC_TRINFINITE)
+        write(IMAIN,*) '  percentage of edge elements in trinfinite layer ',percentage_edge,'%'
+        write(IMAIN,*) '  percentage of volume elements in trinfinite layer ',100. - percentage_edge,'%'
+        write(IMAIN,*)
+      endif
+      if (NSPEC_INFINITE > 0) then
+        percentage_edge = 100. * nspec_infinite / real(NSPEC_INFINITE)
+        write(IMAIN,*) '  percentage of edge elements in infinite layer ',percentage_edge,'%'
+        write(IMAIN,*) '  percentage of volume elements in infinite layer ',100. - percentage_edge,'%'
+        write(IMAIN,*)
+      endif
+    endif
+
     call flush_IMAIN()
   endif
   ! synchronizes MPI processes
@@ -1860,50 +2472,219 @@ subroutine read_mesh_databases_MPI_IC()
 
   end subroutine read_mesh_databases_MPI_IC
 
-
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine read_mesh_databases_stacey()
+  subroutine read_mesh_databases_MPI_TRINF()
 
   use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_innercore
-  use specfem_par_outercore
-
+  use specfem_par_trinfinite
+  use specfem_par_full_gravity
   implicit none
 
   ! local parameters
   integer :: ier
 
-  ! initializes
-  num_abs_boundary_faces_crust_mantle = 0
-  num_abs_boundary_faces_outer_core = 0
+  ! transition-to-infinite region
 
-  ! reads in arrays
-  if (I_should_read_the_database) then
-    if (ADIOS_FOR_ARRAYS_SOLVER) then
-      call read_mesh_databases_stacey_adios()
-    else
-      ! crust and mantle
-      if (NSPEC_CRUST_MANTLE > 0) then
-        ! create name of database
-        call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,IREGION_TRINFINITE,LOCAL_PATH)
 
-        ! read arrays for Stacey conditions
-        open(unit=IIN,file=prname(1:len_trim(prname))//'stacey.bin', &
-              status='old',form='unformatted',action='read',iostat=ier)
-        if (ier /= 0 ) call exit_MPI(myrank,'Error opening stacey.bin file for crust mantle')
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error opening solver_data_mpi.bin')
 
-        read(IIN) num_abs_boundary_faces_crust_mantle
+  ! MPI interfaces
+  read(IIN) num_interfaces_trinfinite
+  allocate(my_neighbors_trinfinite(num_interfaces_trinfinite), &
+           nibool_interfaces_trinfinite(num_interfaces_trinfinite), &
+           stat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_trinfinite etc.')
+  my_neighbors_trinfinite(:) = 0; nibool_interfaces_trinfinite(:) = 0
 
-        ! allocates absorbing boundary arrays
-        if (num_abs_boundary_faces_crust_mantle > 0) then
-          allocate(abs_boundary_ispec_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
-          if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
-          allocate(abs_boundary_ijk_crust_mantle(3,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
-          if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
+  if (num_interfaces_trinfinite > 0) then
+    read(IIN) max_nibool_interfaces_trinfinite
+    allocate(ibool_interfaces_trinfinite(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_trinfinite')
+    ibool_interfaces_trinfinite(:,:) = 0
+
+    read(IIN) my_neighbors_trinfinite
+    read(IIN) nibool_interfaces_trinfinite
+    read(IIN) ibool_interfaces_trinfinite
+  else
+    ! dummy array
+    max_nibool_interfaces_trinfinite = 0
+    allocate(ibool_interfaces_trinfinite(0,0),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_trinfinite')
+    ibool_interfaces_trinfinite(:,:) = 0
+  endif
+
+  ! inner / outer elements
+  read(IIN) nspec_inner_trinfinite,nspec_outer_trinfinite
+  read(IIN) num_phase_ispec_trinfinite
+  if (num_phase_ispec_trinfinite < 0 ) call exit_mpi(myrank,'Error num_phase_ispec_trinfinite is < zero')
+
+  allocate(phase_ispec_inner_trinfinite(num_phase_ispec_trinfinite,2),stat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_trinfinite')
+  phase_ispec_inner_trinfinite(:,:) = 0
+
+  if (num_phase_ispec_trinfinite > 0 ) read(IIN) phase_ispec_inner_trinfinite
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    read(IIN) num_colors_outer_trinfinite,num_colors_inner_trinfinite
+
+    allocate(num_elem_colors_trinfinite(num_colors_outer_trinfinite + num_colors_inner_trinfinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_trinfinite array')
+    num_elem_colors_trinfinite(:) = 0
+
+    read(IIN) num_elem_colors_trinfinite
+  else
+    ! allocates dummy arrays
+    num_colors_outer_trinfinite = 0
+    num_colors_inner_trinfinite = 0
+    allocate(num_elem_colors_trinfinite(num_colors_outer_trinfinite + num_colors_inner_trinfinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_trinfinite array')
+    num_elem_colors_trinfinite(:) = 0
+  endif
+
+  close(IIN)
+
+  end subroutine read_mesh_databases_MPI_TRINF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine read_mesh_databases_MPI_INF()
+
+  use specfem_par
+  use specfem_par_infinite
+  use specfem_par_full_gravity
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! infinite region
+
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,IREGION_INFINITE,LOCAL_PATH)
+
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error opening solver_data_mpi.bin')
+
+  ! MPI interfaces
+  read(IIN) num_interfaces_infinite
+  allocate(my_neighbors_infinite(num_interfaces_infinite), &
+           nibool_interfaces_infinite(num_interfaces_infinite), &
+           stat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_infinite etc.')
+  my_neighbors_infinite(:) = 0; nibool_interfaces_infinite(:) = 0
+
+  if (num_interfaces_infinite > 0) then
+    read(IIN) max_nibool_interfaces_infinite
+    allocate(ibool_interfaces_infinite(max_nibool_interfaces_infinite,num_interfaces_infinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_infinite')
+    ibool_interfaces_infinite(:,:) = 0
+
+    read(IIN) my_neighbors_infinite
+    read(IIN) nibool_interfaces_infinite
+    read(IIN) ibool_interfaces_infinite
+  else
+    ! dummy array
+    max_nibool_interfaces_infinite = 0
+    allocate(ibool_interfaces_infinite(0,0),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_infinite')
+    ibool_interfaces_infinite(:,:) = 0
+  endif
+
+  ! inner / outer elements
+  read(IIN) nspec_inner_infinite,nspec_outer_infinite
+  read(IIN) num_phase_ispec_infinite
+  if (num_phase_ispec_infinite < 0 ) call exit_mpi(myrank,'Error num_phase_ispec_infinite is < zero')
+
+  allocate(phase_ispec_inner_infinite(num_phase_ispec_infinite,2),stat=ier)
+  if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_infinite')
+  phase_ispec_inner_infinite(:,:) = 0
+
+  if (num_phase_ispec_infinite > 0 ) read(IIN) phase_ispec_inner_infinite
+
+  ! mesh coloring for GPUs
+  if (USE_MESH_COLORING_GPU) then
+    ! colors
+    read(IIN) num_colors_outer_infinite,num_colors_inner_infinite
+
+    allocate(num_elem_colors_infinite(num_colors_outer_infinite + num_colors_inner_infinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_infinite array')
+    num_elem_colors_infinite(:) = 0
+
+    read(IIN) num_elem_colors_infinite
+  else
+    ! allocates dummy arrays
+    num_colors_outer_infinite = 0
+    num_colors_inner_infinite = 0
+    allocate(num_elem_colors_infinite(num_colors_outer_infinite + num_colors_inner_infinite), &
+             stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_infinite array')
+    num_elem_colors_infinite(:) = 0
+  endif
+
+  close(IIN)
+
+  end subroutine read_mesh_databases_MPI_INF
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine read_mesh_databases_stacey()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  ! local parameters
+  integer :: ier
+
+  ! initializes
+  num_abs_boundary_faces_crust_mantle = 0
+  num_abs_boundary_faces_outer_core = 0
+
+  ! reads in arrays
+  if (I_should_read_the_database) then
+    if (ADIOS_FOR_ARRAYS_SOLVER) then
+      call read_mesh_databases_stacey_adios()
+    else
+      ! crust and mantle
+      if (NSPEC_CRUST_MANTLE > 0) then
+        ! create name of database
+        call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+        ! read arrays for Stacey conditions
+        open(unit=IIN,file=prname(1:len_trim(prname))//'stacey.bin', &
+              status='old',form='unformatted',action='read',iostat=ier)
+        if (ier /= 0 ) call exit_MPI(myrank,'Error opening stacey.bin file for crust mantle')
+
+        read(IIN) num_abs_boundary_faces_crust_mantle
+
+        ! allocates absorbing boundary arrays
+        if (num_abs_boundary_faces_crust_mantle > 0) then
+          allocate(abs_boundary_ispec_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
+          if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
+          allocate(abs_boundary_ijk_crust_mantle(3,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
+          if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
           allocate(abs_boundary_jacobian2Dw_crust_mantle(NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
           if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
           allocate(abs_boundary_normal_crust_mantle(NDIM,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
@@ -2106,705 +2887,3 @@ subroutine read_mesh_databases_regular_kl()
   endif
 
   end subroutine read_mesh_databases_regular_kl
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_CM()
-
-  use specfem_par
-  use specfem_par_crustmantle
-  implicit none
-
-! note: the size(..) function returns either integer(kind=4) or integer(kind=8)
-!       depending on compiler flags (-mcmedium), thus adding a kind argument to have integer(kind=4) output
-
-  !call bcast_all_i_for_database(NSPEC_CRUST_MANTLE, 1)
-  !call bcast_all_i_for_database(NGLOB_CRUST_MANTLE, 1)
-  !call bcast_all_i_for_database(NGLOB_XY_CM, 1)
-
-  if (size(rho_vp_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(rho_vp_crust_mantle(1,1,1,1), size(rho_vp_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(rho_vs_crust_mantle(1,1,1,1), size(rho_vs_crust_mantle,kind=4))
-  endif
-
-  if (size(xstore_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(xstore_crust_mantle(1), size(xstore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(ystore_crust_mantle(1), size(ystore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(zstore_crust_mantle(1), size(zstore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(xix_crust_mantle(1,1,1,1), size(xix_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(xiy_crust_mantle(1,1,1,1), size(xiy_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(xiz_crust_mantle(1,1,1,1), size(xiz_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(etax_crust_mantle(1,1,1,1), size(etax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(etay_crust_mantle(1,1,1,1), size(etay_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(etaz_crust_mantle(1,1,1,1), size(etaz_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(gammax_crust_mantle(1,1,1,1), size(gammax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(gammay_crust_mantle(1,1,1,1), size(gammay_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(gammaz_crust_mantle(1,1,1,1), size(gammaz_crust_mantle,kind=4))
-  endif
-
-  if (size(rhostore_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(rhostore_crust_mantle(1,1,1,1), size(rhostore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(kappavstore_crust_mantle(1,1,1,1), size(kappavstore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(muvstore_crust_mantle(1,1,1,1), size(muvstore_crust_mantle,kind=4))
-  endif
-
-  if (size(kappahstore_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(kappahstore_crust_mantle(1,1,1,1), size(kappahstore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(muhstore_crust_mantle(1,1,1,1), size(muhstore_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(eta_anisostore_crust_mantle(1,1,1,1), size(eta_anisostore_crust_mantle,kind=4))
-  endif
-
-  if (size(c11store_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(c11store_crust_mantle(1,1,1,1), size(c11store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c12store_crust_mantle(1,1,1,1), size(c12store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c13store_crust_mantle(1,1,1,1), size(c13store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c14store_crust_mantle(1,1,1,1), size(c14store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c15store_crust_mantle(1,1,1,1), size(c15store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c16store_crust_mantle(1,1,1,1), size(c16store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c22store_crust_mantle(1,1,1,1), size(c22store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c23store_crust_mantle(1,1,1,1), size(c23store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c24store_crust_mantle(1,1,1,1), size(c24store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c25store_crust_mantle(1,1,1,1), size(c25store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c26store_crust_mantle(1,1,1,1), size(c26store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c33store_crust_mantle(1,1,1,1), size(c33store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c34store_crust_mantle(1,1,1,1), size(c34store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c35store_crust_mantle(1,1,1,1), size(c35store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c36store_crust_mantle(1,1,1,1), size(c36store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c44store_crust_mantle(1,1,1,1), size(c44store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c45store_crust_mantle(1,1,1,1), size(c45store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c46store_crust_mantle(1,1,1,1), size(c46store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c55store_crust_mantle(1,1,1,1), size(c55store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c56store_crust_mantle(1,1,1,1), size(c56store_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(c66store_crust_mantle(1,1,1,1), size(c66store_crust_mantle,kind=4))
-  endif
-
-  if (size(mu0store_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(mu0store_crust_mantle(1,1,1,1), size(mu0store_crust_mantle,kind=4))
-  endif
-
-  if (size(ibool_crust_mantle) > 0) then
-    call bcast_all_i_for_database(ibool_crust_mantle(1,1,1,1), size(ibool_crust_mantle,kind=4))
-    call bcast_all_l_for_database(ispec_is_tiso_crust_mantle(1), size(ispec_is_tiso_crust_mantle,kind=4))
-  endif
-
-  if (size(rmassx_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(rmassx_crust_mantle(1), size(rmassx_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(rmassy_crust_mantle(1), size(rmassy_crust_mantle,kind=4))
-  endif
-
-  if (size(rmassz_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(rmassz_crust_mantle(1), size(rmassz_crust_mantle,kind=4))
-  endif
-
-  !call bcast_all_i_for_database(NGLOB_CRUST_MANTLE_OCEANS, 1)
-  if (size(rmass_ocean_load) > 0) then
-    call bcast_all_cr_for_database(rmass_ocean_load(1), size(rmass_ocean_load,kind=4))
-  endif
-
-  if (size(b_rmassx_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(b_rmassx_crust_mantle(1), size(b_rmassx_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(b_rmassy_crust_mantle(1), size(b_rmassy_crust_mantle,kind=4))
-  endif
-
-  end subroutine bcast_mesh_databases_CM
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_OC()
-
-  use specfem_par
-  use specfem_par_outercore
-  implicit none
-
-  !call bcast_all_i_for_database(NSPEC_OUTER_CORE, 1)
-  !call bcast_all_i_for_database(NGLOB_OUTER_CORE, 1)
-
-  if (size(vp_outer_core) > 0) then
-    call bcast_all_cr_for_database(vp_outer_core(1,1,1,1), size(vp_outer_core,kind=4))
-  endif
-
-  if (size(xstore_outer_core) > 0) then
-    call bcast_all_cr_for_database(xstore_outer_core(1), size(xstore_outer_core,kind=4))
-    call bcast_all_cr_for_database(ystore_outer_core(1), size(ystore_outer_core,kind=4))
-    call bcast_all_cr_for_database(zstore_outer_core(1), size(zstore_outer_core,kind=4))
-  endif
-
-  if (size(xix_outer_core) > 0) then
-    call bcast_all_cr_for_database(xix_outer_core(1,1,1,1), size(xix_outer_core,kind=4))
-    call bcast_all_cr_for_database(xiy_outer_core(1,1,1,1), size(xiy_outer_core,kind=4))
-    call bcast_all_cr_for_database(xiz_outer_core(1,1,1,1), size(xiz_outer_core,kind=4))
-    call bcast_all_cr_for_database(etax_outer_core(1,1,1,1), size(etax_outer_core,kind=4))
-    call bcast_all_cr_for_database(etay_outer_core(1,1,1,1), size(etay_outer_core,kind=4))
-    call bcast_all_cr_for_database(etaz_outer_core(1,1,1,1), size(etaz_outer_core,kind=4))
-    call bcast_all_cr_for_database(gammax_outer_core(1,1,1,1), size(gammax_outer_core,kind=4))
-    call bcast_all_cr_for_database(gammay_outer_core(1,1,1,1), size(gammay_outer_core,kind=4))
-    call bcast_all_cr_for_database(gammaz_outer_core(1,1,1,1), size(gammaz_outer_core,kind=4))
-  endif
-
-  if (size(rhostore_outer_core) > 0) then
-    call bcast_all_cr_for_database(rhostore_outer_core(1,1,1,1), size(rhostore_outer_core,kind=4))
-    call bcast_all_cr_for_database(kappavstore_outer_core(1,1,1,1), size(kappavstore_outer_core,kind=4))
-  endif
-
-  if (size(ibool_outer_core) > 0) then
-    call bcast_all_i_for_database(ibool_outer_core(1,1,1,1), size(ibool_outer_core,kind=4))
-  endif
-
-  if (size(rmass_outer_core) > 0) then
-    call bcast_all_cr_for_database(rmass_outer_core(1), size(rmass_outer_core,kind=4))
-  endif
-
-  end subroutine bcast_mesh_databases_OC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_IC()
-
-  use specfem_par
-  use specfem_par_innercore
-  implicit none
-
-  !call bcast_all_i_for_database(NSPEC_INNER_CORE, 1)
-  !call bcast_all_i_for_database(NGLOB_INNER_CORE, 1)
-  !call bcast_all_i_for_database(NGLOB_XY_IC, 1)
-
-  if (size(xstore_inner_core) > 0) then
-    call bcast_all_cr_for_database(xstore_inner_core(1), size(xstore_inner_core,kind=4))
-    call bcast_all_cr_for_database(ystore_inner_core(1), size(ystore_inner_core,kind=4))
-    call bcast_all_cr_for_database(zstore_inner_core(1), size(zstore_inner_core,kind=4))
-  endif
-
-  if (size(xix_inner_core) > 0) then
-    call bcast_all_cr_for_database(xix_inner_core(1,1,1,1), size(xix_inner_core,kind=4))
-    call bcast_all_cr_for_database(xiy_inner_core(1,1,1,1), size(xiy_inner_core,kind=4))
-    call bcast_all_cr_for_database(xiz_inner_core(1,1,1,1), size(xiz_inner_core,kind=4))
-    call bcast_all_cr_for_database(etax_inner_core(1,1,1,1), size(etax_inner_core,kind=4))
-    call bcast_all_cr_for_database(etay_inner_core(1,1,1,1), size(etay_inner_core,kind=4))
-    call bcast_all_cr_for_database(etaz_inner_core(1,1,1,1), size(etaz_inner_core,kind=4))
-    call bcast_all_cr_for_database(gammax_inner_core(1,1,1,1), size(gammax_inner_core,kind=4))
-    call bcast_all_cr_for_database(gammay_inner_core(1,1,1,1), size(gammay_inner_core,kind=4))
-    call bcast_all_cr_for_database(gammaz_inner_core(1,1,1,1), size(gammaz_inner_core,kind=4))
-  endif
-
-  if (size(rhostore_inner_core) > 0) then
-    call bcast_all_cr_for_database(rhostore_inner_core(1,1,1,1), size(rhostore_inner_core,kind=4))
-    call bcast_all_cr_for_database(kappavstore_inner_core(1,1,1,1), size(kappavstore_inner_core,kind=4))
-    call bcast_all_cr_for_database(muvstore_inner_core(1,1,1,1), size(muvstore_inner_core,kind=4))
-  endif
-
-  if (size(c11store_inner_core) > 0) then
-    call bcast_all_cr_for_database(c11store_inner_core(1,1,1,1), size(c11store_inner_core,kind=4))
-    call bcast_all_cr_for_database(c12store_inner_core(1,1,1,1), size(c12store_inner_core,kind=4))
-    call bcast_all_cr_for_database(c13store_inner_core(1,1,1,1), size(c13store_inner_core,kind=4))
-    call bcast_all_cr_for_database(c33store_inner_core(1,1,1,1), size(c33store_inner_core,kind=4))
-    call bcast_all_cr_for_database(c44store_inner_core(1,1,1,1), size(c44store_inner_core,kind=4))
-  endif
-
-  if (size(ibool_inner_core) > 0) then
-    call bcast_all_i_for_database(ibool_inner_core(1,1,1,1), size(ibool_inner_core,kind=4))
-    call bcast_all_i_for_database(idoubling_inner_core(1), size(idoubling_inner_core,kind=4))
-  endif
-
-  if (size(rmassx_inner_core) > 0) then
-    call bcast_all_cr_for_database(rmassx_inner_core(1), size(rmassx_inner_core,kind=4))
-    call bcast_all_cr_for_database(rmassy_inner_core(1), size(rmassy_inner_core,kind=4))
-  endif
-
-  if (size(rmassz_inner_core) > 0) then
-    call bcast_all_cr_for_database(rmassz_inner_core(1), size(rmassz_inner_core,kind=4))
-  endif
-
-  if (size(b_rmassx_inner_core) > 0) then
-    call bcast_all_cr_for_database(b_rmassx_inner_core(1), size(b_rmassx_inner_core,kind=4))
-    call bcast_all_cr_for_database(b_rmassy_inner_core(1), size(b_rmassy_inner_core,kind=4))
-  endif
-
-  end subroutine bcast_mesh_databases_IC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_coupling()
-
-  use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_innercore
-  use specfem_par_outercore
-
-  implicit none
-
-  call bcast_all_i_for_database(nspec2D_xmin_crust_mantle, 1)
-  call bcast_all_i_for_database(nspec2D_xmax_crust_mantle, 1)
-  call bcast_all_i_for_database(nspec2D_ymin_crust_mantle, 1)
-  call bcast_all_i_for_database(nspec2D_ymax_crust_mantle, 1)
-  !call bcast_all_i_for_database(nspec2D_zmin_crust_mantle, 1)
-
-  if (size(ibelm_xmin_crust_mantle) > 0) then
-    call bcast_all_i_for_database(ibelm_xmin_crust_mantle(1), size(ibelm_xmin_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibelm_xmax_crust_mantle(1), size(ibelm_xmax_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibelm_ymin_crust_mantle(1), size(ibelm_ymin_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibelm_ymax_crust_mantle(1), size(ibelm_ymax_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibelm_bottom_crust_mantle(1), size(ibelm_bottom_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibelm_top_crust_mantle(1), size(ibelm_top_crust_mantle,kind=4))
-  endif
-
-  if (size(normal_xmin_crust_mantle) > 0) then
-    call bcast_all_cr_for_database(normal_xmin_crust_mantle(1,1,1,1), size(normal_xmin_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(normal_xmax_crust_mantle(1,1,1,1), size(normal_xmax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(normal_ymin_crust_mantle(1,1,1,1), size(normal_ymin_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(normal_ymax_crust_mantle(1,1,1,1), size(normal_ymax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(normal_bottom_crust_mantle(1,1,1,1), size(normal_bottom_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(normal_top_crust_mantle(1,1,1,1), size(normal_top_crust_mantle,kind=4))
-
-    call bcast_all_cr_for_database(jacobian2D_xmin_crust_mantle(1,1,1), size(jacobian2D_xmin_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_xmax_crust_mantle(1,1,1), size(jacobian2D_xmax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_ymin_crust_mantle(1,1,1), size(jacobian2D_ymin_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_ymax_crust_mantle(1,1,1), size(jacobian2D_ymax_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_bottom_crust_mantle(1,1,1), size(jacobian2D_bottom_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_top_crust_mantle(1,1,1), size(jacobian2D_top_crust_mantle,kind=4))
-  endif
-
-  call bcast_all_i_for_database(nspec2D_xmin_outer_core, 1)
-  call bcast_all_i_for_database(nspec2D_xmax_outer_core, 1)
-  call bcast_all_i_for_database(nspec2D_ymin_outer_core, 1)
-  call bcast_all_i_for_database(nspec2D_ymax_outer_core, 1)
-
-  if (size(ibelm_xmin_outer_core) > 0) then
-    call bcast_all_i_for_database(ibelm_xmin_outer_core(1), size(ibelm_xmin_outer_core,kind=4))
-    call bcast_all_i_for_database(ibelm_xmax_outer_core(1), size(ibelm_xmax_outer_core,kind=4))
-    call bcast_all_i_for_database(ibelm_ymin_outer_core(1), size(ibelm_ymin_outer_core,kind=4))
-    call bcast_all_i_for_database(ibelm_ymax_outer_core(1), size(ibelm_ymax_outer_core,kind=4))
-    call bcast_all_i_for_database(ibelm_bottom_outer_core(1), size(ibelm_bottom_outer_core,kind=4))
-    call bcast_all_i_for_database(ibelm_top_outer_core(1), size(ibelm_top_outer_core,kind=4))
-  endif
-
-  if (size(normal_xmin_outer_core) > 0) then
-    call bcast_all_cr_for_database(normal_xmin_outer_core(1,1,1,1), size(normal_xmin_outer_core,kind=4))
-    call bcast_all_cr_for_database(normal_xmax_outer_core(1,1,1,1), size(normal_xmax_outer_core,kind=4))
-    call bcast_all_cr_for_database(normal_ymin_outer_core(1,1,1,1), size(normal_ymin_outer_core,kind=4))
-    call bcast_all_cr_for_database(normal_ymax_outer_core(1,1,1,1), size(normal_ymax_outer_core,kind=4))
-    call bcast_all_cr_for_database(normal_bottom_outer_core(1,1,1,1), size(normal_bottom_outer_core,kind=4))
-    call bcast_all_cr_for_database(normal_top_outer_core(1,1,1,1), size(normal_top_outer_core,kind=4))
-
-    call bcast_all_cr_for_database(jacobian2D_xmin_outer_core(1,1,1), size(jacobian2D_xmin_outer_core,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_xmax_outer_core(1,1,1), size(jacobian2D_xmax_outer_core,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_ymin_outer_core(1,1,1), size(jacobian2D_ymin_outer_core,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_ymax_outer_core(1,1,1), size(jacobian2D_ymax_outer_core,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_bottom_outer_core(1,1,1), size(jacobian2D_bottom_outer_core,kind=4))
-    call bcast_all_cr_for_database(jacobian2D_top_outer_core(1,1,1), size(jacobian2D_top_outer_core,kind=4))
-  endif
-
-  call bcast_all_i_for_database(nspec2D_xmin_inner_core, 1)
-  call bcast_all_i_for_database(nspec2D_xmax_inner_core, 1)
-  call bcast_all_i_for_database(nspec2D_ymin_inner_core, 1)
-  call bcast_all_i_for_database(nspec2D_ymax_inner_core, 1)
-
-  ! boundary parameters
-  if (size(ibelm_xmin_inner_core) > 0) then
-    call bcast_all_i_for_database(ibelm_xmin_inner_core(1), size(ibelm_xmin_inner_core,kind=4))
-    call bcast_all_i_for_database(ibelm_xmax_inner_core(1), size(ibelm_xmax_inner_core,kind=4))
-    call bcast_all_i_for_database(ibelm_ymin_inner_core(1), size(ibelm_ymin_inner_core,kind=4))
-    call bcast_all_i_for_database(ibelm_ymax_inner_core(1), size(ibelm_ymax_inner_core,kind=4))
-    call bcast_all_i_for_database(ibelm_bottom_inner_core(1), size(ibelm_bottom_inner_core,kind=4))
-    call bcast_all_i_for_database(ibelm_top_inner_core(1), size(ibelm_top_inner_core,kind=4))
-  endif
-
-  ! -- Boundary Mesh for crust and mantle ---
-  if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
-    if (size(ibelm_moho_top) > 0) then
-      call bcast_all_i_for_database(ibelm_moho_top(1), size(ibelm_moho_top,kind=4))
-      call bcast_all_i_for_database(ibelm_moho_bot(1), size(ibelm_moho_bot,kind=4))
-      call bcast_all_cr_for_database(normal_moho(1,1,1,1), size(normal_moho,kind=4))
-    endif
-    if (size(ibelm_400_top) > 0) then
-      call bcast_all_i_for_database(ibelm_400_top(1), size(ibelm_400_top,kind=4))
-      call bcast_all_i_for_database(ibelm_400_bot(1), size(ibelm_400_bot,kind=4))
-      call bcast_all_cr_for_database(normal_400(1,1,1,1), size(normal_400,kind=4))
-    endif
-    if (size(ibelm_670_top) > 0) then
-      call bcast_all_i_for_database(ibelm_670_top(1), size(ibelm_670_top,kind=4))
-      call bcast_all_i_for_database(ibelm_670_bot(1), size(ibelm_670_bot,kind=4))
-      call bcast_all_cr_for_database(normal_670(1,1,1,1), size(normal_670,kind=4))
-    endif
-  endif
-
-  end subroutine bcast_mesh_databases_coupling
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_MPI_CM()
-
-  use specfem_par
-  use specfem_par_crustmantle
-  implicit none
-
-  ! local parameters
-  integer :: ier
-
-  ! MPI interfaces
-  call bcast_all_i_for_database(num_interfaces_crust_mantle, 1)
-
-  ! could also test for not allocated, only reader processes have
-  ! allocated these arrays.
-  if (.not. I_should_read_the_database) then
-    allocate(my_neighbors_crust_mantle(num_interfaces_crust_mantle), &
-            nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
-            stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_crust_mantle etc.')
-    my_neighbors_crust_mantle(:) = 0; nibool_interfaces_crust_mantle(:) = 0
-  endif
-  if (num_interfaces_crust_mantle > 0) then
-    call bcast_all_i_for_database(max_nibool_interfaces_cm, 1)
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_crust_mantle')
-      ibool_interfaces_crust_mantle(:,:) = 0
-    endif
-
-    call bcast_all_i_for_database(my_neighbors_crust_mantle(1), size(my_neighbors_crust_mantle,kind=4))
-    call bcast_all_i_for_database(nibool_interfaces_crust_mantle(1), size(nibool_interfaces_crust_mantle,kind=4))
-    call bcast_all_i_for_database(ibool_interfaces_crust_mantle(1,1), size(ibool_interfaces_crust_mantle,kind=4))
-  else
-    ! dummy array
-    max_nibool_interfaces_cm = 0
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_crust_mantle')
-      ibool_interfaces_crust_mantle(:,:) = 0
-    endif
-  endif
-
-  ! inner / outer elements
-  call bcast_all_i_for_database(nspec_inner_crust_mantle, 1)
-  call bcast_all_i_for_database(nspec_outer_crust_mantle, 1)
-  call bcast_all_i_for_database(num_phase_ispec_crust_mantle, 1)
-  if (num_phase_ispec_crust_mantle < 0 ) &
-    call exit_mpi(myrank,'Error num_phase_ispec_crust_mantle is < zero')
-
-  if (.not. I_should_read_the_database) then
-    allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_crust_mantle')
-    phase_ispec_inner_crust_mantle(:,:) = 0
-  endif
-
-  if (num_phase_ispec_crust_mantle > 0) then
-    call bcast_all_i_for_database(phase_ispec_inner_crust_mantle(1,1), size(phase_ispec_inner_crust_mantle,kind=4))
-  endif
-
-  ! mesh coloring for GPUs
-  if (USE_MESH_COLORING_GPU) then
-    ! colors
-    call bcast_all_i_for_database(num_colors_outer_crust_mantle, 1)
-    call bcast_all_i_for_database(num_colors_inner_crust_mantle, 1)
-
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_crust_mantle array')
-      num_elem_colors_crust_mantle(:) = 0
-    endif
-
-    call bcast_all_i_for_database(num_elem_colors_crust_mantle(1), size(num_elem_colors_crust_mantle,kind=4))
-  else
-    ! allocates dummy arrays
-    num_colors_outer_crust_mantle = 0
-    num_colors_inner_crust_mantle = 0
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_crust_mantle array')
-      num_elem_colors_crust_mantle(:) = 0
-    endif
-  endif
-
-  end subroutine bcast_mesh_databases_MPI_CM
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_MPI_OC()
-
-  use specfem_par
-  use specfem_par_outercore
-  implicit none
-
-  ! local parameters
-  integer :: ier
-
-  ! MPI interfaces
-  call bcast_all_i_for_database(num_interfaces_outer_core, 1)
-  if (.not. I_should_read_the_database) then
-    allocate(my_neighbors_outer_core(num_interfaces_outer_core), &
-             nibool_interfaces_outer_core(num_interfaces_outer_core), &
-             stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_outer_core etc.')
-    my_neighbors_outer_core(:) = 0; nibool_interfaces_outer_core(:) = 0
-  endif
-
-  if (num_interfaces_outer_core > 0) then
-    call bcast_all_i_for_database(max_nibool_interfaces_oc, 1)
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_outer_core(max_nibool_interfaces_oc,num_interfaces_outer_core), &
-               stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_outer_core')
-      ibool_interfaces_outer_core(:,:) = 0
-    endif
-  endif
-
-  if (num_interfaces_outer_core > 0) then
-    call bcast_all_i_for_database(my_neighbors_outer_core(1), size(my_neighbors_outer_core,kind=4))
-    call bcast_all_i_for_database(nibool_interfaces_outer_core(1), size(nibool_interfaces_outer_core,kind=4))
-    call bcast_all_i_for_database(ibool_interfaces_outer_core(1,1), size(ibool_interfaces_outer_core,kind=4))
-  else
-    ! dummy array
-    max_nibool_interfaces_oc = 0
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_outer_core(0,0),stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_outer_core')
-      ibool_interfaces_outer_core(:,:) = 0
-    endif
-  endif
-
-  ! inner / outer elements
-  call bcast_all_i_for_database(nspec_inner_outer_core, 1)
-  call bcast_all_i_for_database(nspec_outer_outer_core, 1)
-  call bcast_all_i_for_database(num_phase_ispec_outer_core, 1)
-  if (num_phase_ispec_outer_core < 0 ) &
-    call exit_mpi(myrank,'Error num_phase_ispec_outer_core is < zero')
-
-  if (.not. I_should_read_the_database) then
-    allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_outer_core')
-    phase_ispec_inner_outer_core(:,:) = 0
-  endif
-
-  if (num_phase_ispec_outer_core > 0) then
-    call bcast_all_i_for_database(phase_ispec_inner_outer_core(1,1), size(phase_ispec_inner_outer_core,kind=4))
-  endif
-
-  ! mesh coloring for GPUs
-  if (USE_MESH_COLORING_GPU) then
-    ! colors
-    call bcast_all_i_for_database(num_colors_outer_outer_core, 1)
-    call bcast_all_i_for_database(num_colors_inner_outer_core, 1)
-
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
-               stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_outer_core array')
-      num_elem_colors_outer_core(:) = 0
-    endif
-
-    call bcast_all_i_for_database(num_elem_colors_outer_core(1), size(num_elem_colors_outer_core,kind=4))
-  else
-    ! allocates dummy arrays
-    num_colors_outer_outer_core = 0
-    num_colors_inner_outer_core = 0
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
-               stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_outer_core array')
-      num_elem_colors_outer_core(:) = 0
-    endif
-  endif
-
-  end subroutine bcast_mesh_databases_MPI_OC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_MPI_IC()
-
-  use specfem_par
-  use specfem_par_innercore
-  implicit none
-
-  ! local parameters
-  integer :: ier
-
-  ! MPI interfaces
-  call bcast_all_i_for_database(num_interfaces_inner_core, 1)
-  if (.not. I_should_read_the_database) then
-    allocate(my_neighbors_inner_core(num_interfaces_inner_core), &
-            nibool_interfaces_inner_core(num_interfaces_inner_core), &
-            stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_inner_core etc.')
-    my_neighbors_inner_core(:) = 0; nibool_interfaces_inner_core(:) = 0
-  endif
-
-  if (num_interfaces_inner_core > 0) then
-    call bcast_all_i_for_database(max_nibool_interfaces_ic, 1)
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_inner_core(max_nibool_interfaces_ic,num_interfaces_inner_core), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_inner_core')
-      ibool_interfaces_inner_core(:,:) = 0
-    endif
-
-    call bcast_all_i_for_database(my_neighbors_inner_core(1), size(my_neighbors_inner_core,kind=4))
-    call bcast_all_i_for_database(nibool_interfaces_inner_core(1), size(nibool_interfaces_inner_core,kind=4))
-    call bcast_all_i_for_database(ibool_interfaces_inner_core(1,1), size(ibool_interfaces_inner_core,kind=4))
-  else
-    ! dummy array
-    max_nibool_interfaces_ic = 0
-    if (.not. I_should_read_the_database) then
-      allocate(ibool_interfaces_inner_core(0,0),stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array dummy ibool_interfaces_inner_core')
-      ibool_interfaces_inner_core(:,:) = 0
-    endif
-  endif
-
-  ! inner / outer elements
-  call bcast_all_i_for_database(nspec_inner_inner_core, 1)
-  call bcast_all_i_for_database(nspec_outer_inner_core, 1)
-  call bcast_all_i_for_database(num_phase_ispec_inner_core, 1)
-  if (num_phase_ispec_inner_core < 0 ) &
-    call exit_mpi(myrank,'Error num_phase_ispec_inner_core is < zero')
-
-  if (.not. I_should_read_the_database) then
-    allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2), &
-            stat=ier)
-    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_inner_core')
-    phase_ispec_inner_inner_core(:,:) = 0
-  endif
-
-  if (num_phase_ispec_inner_core > 0) then
-    call bcast_all_i_for_database(phase_ispec_inner_inner_core(1,1), size(phase_ispec_inner_inner_core,kind=4))
-  endif
-
-  ! mesh coloring for GPUs
-  if (USE_MESH_COLORING_GPU) then
-    ! colors
-    call bcast_all_i_for_database(num_colors_outer_inner_core, 1)
-    call bcast_all_i_for_database(num_colors_inner_inner_core, 1)
-
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_inner_core array')
-      num_elem_colors_inner_core(:) = 0
-    endif
-
-    call bcast_all_i_for_database(num_elem_colors_inner_core(1), size(num_elem_colors_inner_core,kind=4))
-  else
-    ! allocates dummy arrays
-    num_colors_outer_inner_core = 0
-    num_colors_inner_inner_core = 0
-    if (.not. I_should_read_the_database) then
-      allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
-              stat=ier)
-      if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_inner_core array')
-      num_elem_colors_inner_core(:) = 0
-    endif
-  endif
-
-  end subroutine bcast_mesh_databases_MPI_IC
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine bcast_mesh_databases_stacey()
-
-  use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_innercore
-  use specfem_par_outercore
-
-  implicit none
-
-  ! local parameters
-  integer :: ier
-
-  ! crust and mantle
-  if (NSPEC_CRUST_MANTLE > 0) then
-    call bcast_all_i_for_database(num_abs_boundary_faces_crust_mantle,1)
-
-    if (.not. I_should_read_the_database) then
-      ! allocates absorbing boundary arrays
-      if (num_abs_boundary_faces_crust_mantle > 0) then
-        allocate(abs_boundary_ispec_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
-        allocate(abs_boundary_ijk_crust_mantle(3,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
-        allocate(abs_boundary_jacobian2Dw_crust_mantle(NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
-        allocate(abs_boundary_normal_crust_mantle(NDIM,NGLLSQUARE,num_abs_boundary_faces_crust_mantle),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_normal')
-        allocate(abs_boundary_npoin_crust_mantle(num_abs_boundary_faces_crust_mantle),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
-        if (ier /= 0) stop 'Error allocating array abs_boundary_ispec etc.'
-      else
-        ! dummy arrays
-        allocate(abs_boundary_ispec_crust_mantle(1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
-        allocate(abs_boundary_ijk_crust_mantle(1,1,1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
-        allocate(abs_boundary_jacobian2Dw_crust_mantle(1,1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
-        allocate(abs_boundary_normal_crust_mantle(1,1,1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_normal')
-        allocate(abs_boundary_npoin_crust_mantle(1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
-      endif
-      abs_boundary_ispec_crust_mantle(:) = 0; abs_boundary_npoin_crust_mantle(:) = 0
-      abs_boundary_ijk_crust_mantle(:,:,:) = 0
-      abs_boundary_jacobian2Dw_crust_mantle(:,:) = 0.0; abs_boundary_normal_crust_mantle(:,:,:) = 0.0
-    endif
-    call bcast_all_i_for_database(abs_boundary_ispec_crust_mantle(1), size(abs_boundary_ispec_crust_mantle,kind=4))
-    call bcast_all_i_for_database(abs_boundary_npoin_crust_mantle(1), size(abs_boundary_npoin_crust_mantle,kind=4))
-    call bcast_all_i_for_database(abs_boundary_ijk_crust_mantle(1,1,1), size(abs_boundary_ijk_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(abs_boundary_jacobian2Dw_crust_mantle(1,1), size(abs_boundary_jacobian2Dw_crust_mantle,kind=4))
-    call bcast_all_cr_for_database(abs_boundary_normal_crust_mantle(1,1,1), size(abs_boundary_normal_crust_mantle,kind=4))
-  endif
-
-  ! outer core
-  if (NSPEC_OUTER_CORE > 0) then
-    call bcast_all_i_for_database(num_abs_boundary_faces_outer_core,1)
-
-    if (.not. I_should_read_the_database) then
-      ! allocates absorbing boundary arrays
-      if (num_abs_boundary_faces_outer_core > 0) then
-        allocate(abs_boundary_ispec_outer_core(num_abs_boundary_faces_outer_core),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
-        allocate(abs_boundary_ijk_outer_core(3,NGLLSQUARE,num_abs_boundary_faces_outer_core),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
-        allocate(abs_boundary_jacobian2Dw_outer_core(NGLLSQUARE,num_abs_boundary_faces_outer_core),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
-        allocate(abs_boundary_npoin_outer_core(num_abs_boundary_faces_outer_core),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
-        if (ier /= 0) stop 'Error allocating array abs_boundary_ispec etc.'
-      else
-        ! dummy arrays
-        allocate(abs_boundary_ispec_outer_core(1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ispec')
-        allocate(abs_boundary_ijk_outer_core(1,1,1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_ijk')
-        allocate(abs_boundary_jacobian2Dw_outer_core(1,1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_jacobian2Dw')
-        allocate(abs_boundary_npoin_outer_core(1),stat=ier)
-        if (ier /= 0) call exit_mpi(myrank,'Error allocating array abs_boundary_npoin')
-      endif
-      abs_boundary_ispec_outer_core(:) = 0; abs_boundary_npoin_outer_core(:) = 0
-      abs_boundary_ijk_outer_core(:,:,:) = 0
-      abs_boundary_jacobian2Dw_outer_core(:,:) = 0.0
-    endif
-    call bcast_all_i_for_database(abs_boundary_ispec_outer_core(1), size(abs_boundary_ispec_outer_core,kind=4))
-    call bcast_all_i_for_database(abs_boundary_npoin_outer_core(1), size(abs_boundary_npoin_outer_core,kind=4))
-    call bcast_all_i_for_database(abs_boundary_ijk_outer_core(1,1,1), size(abs_boundary_ijk_outer_core,kind=4))
-    call bcast_all_cr_for_database(abs_boundary_jacobian2Dw_outer_core(1,1), size(abs_boundary_jacobian2Dw_outer_core,kind=4))
-  endif
-
-  end subroutine bcast_mesh_databases_stacey
diff --git a/src/specfem3D/read_mesh_databases_adios.f90 b/src/specfem3D/read_mesh_databases_adios.f90
index 4ee48db82..e57aa4720 100644
--- a/src/specfem3D/read_mesh_databases_adios.f90
+++ b/src/specfem3D/read_mesh_databases_adios.f90
@@ -37,6 +37,9 @@ subroutine read_mesh_databases_coupling_adios()
   use specfem_par_innercore
   use specfem_par_outercore
 
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+
   use adios_helpers_mod
   use manager_adios
 
@@ -407,6 +410,118 @@ subroutine read_mesh_databases_coupling_adios()
     call delete_adios_selection(sel)
   endif
 
+  ! transition-to-infinite
+  if (NSPEC_TRINFINITE > 0) then
+    write(region_name,"('reg',i1, '/')") IREGION_TRINFINITE
+
+    ! number of elements
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_xmin",nspec2D_xmin_trinfinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_xmax",nspec2D_xmax_trinfinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_ymin",nspec2D_ymin_trinfinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_ymax",nspec2D_ymax_trinfinite)
+
+    ! boundary elements
+    local_dim = NSPEC2DMAX_XMIN_XMAX_TRINF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_xmin/array", ibelm_xmin_trinfinite)
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_xmax/array", ibelm_xmax_trinfinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2DMAX_YMIN_YMAX_TRINF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_ymin/array", ibelm_ymin_trinfinite)
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_ymax/array", ibelm_ymax_trinfinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2D_BOTTOM_TRINF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_bottom/array", ibelm_bottom_trinfinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2D_TOP_TRINF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_top/array", ibelm_top_trinfinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+  endif
+
+  ! infinite
+  if (NSPEC_INFINITE > 0) then
+    write(region_name,"('reg',i1, '/')") IREGION_INFINITE
+
+    ! number of elements
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_xmin",nspec2D_xmin_infinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_xmax",nspec2D_xmax_infinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_ymin",nspec2D_ymin_infinite)
+    call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "nspec2D_ymax",nspec2D_ymax_infinite)
+
+    ! boundary elements
+    local_dim = NSPEC2DMAX_XMIN_XMAX_INF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_xmin/array", ibelm_xmin_infinite)
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_xmax/array", ibelm_xmax_infinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2DMAX_YMIN_YMAX_INF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_ymin/array", ibelm_ymin_infinite)
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_ymax/array", ibelm_ymax_infinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2D_BOTTOM_INF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_bottom/array", ibelm_bottom_infinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+
+    local_dim = NSPEC2D_TOP_INF
+    start(1) = local_dim * int(myrank,kind=8); count(1) = local_dim
+    call set_selection_boundingbox(sel, start, count)
+
+    call read_adios_schedule_array(myadios_file, myadios_group, sel, start, count, &
+                                   trim(region_name) // "ibelm_top/array", ibelm_top_infinite)
+
+    call read_adios_perform(myadios_file)
+    call delete_adios_selection(sel)
+  endif
+
   ! closes adios file
   call close_file_adios_read_and_finalize_method(myadios_file)
   call delete_adios_group(myadios_group,"BoundaryReader")
@@ -624,6 +739,10 @@ subroutine read_mesh_databases_MPI_adios(iregion_code)
   use specfem_par_outercore
   use specfem_par_innercore
 
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+  use specfem_par_full_gravity
+
   use adios_helpers_mod
   use manager_adios
 
@@ -683,16 +802,26 @@ subroutine read_mesh_databases_MPI_adios(iregion_code)
   call read_adios_scalar(myadios_file,myadios_group,myrank,trim(region_name) // "num_colors_inner",num_colors_inner)
 
   ! checks
-  if (iregion_code == IREGION_CRUST_MANTLE) then
+  select case(iregion_code)
+  case (IREGION_CRUST_MANTLE)
     nglob_tmp = NGLOB_CRUST_MANTLE
     nspec_tmp = NSPEC_CRUST_MANTLE
-  else if (iregion_code == IREGION_OUTER_CORE) then
+  case (IREGION_OUTER_CORE)
     nglob_tmp = NGLOB_OUTER_CORE
     nspec_tmp = NSPEC_OUTER_CORE
-  else
+  case (IREGION_INNER_CORE)
     nglob_tmp = NGLOB_INNER_CORE
     nspec_tmp = NSPEC_INNER_CORE
-  endif
+  case (IREGION_TRINFINITE)
+    nglob_tmp = NGLOB_TRINFINITE
+    nspec_tmp = NSPEC_TRINFINITE
+  case (IREGION_INFINITE)
+    nglob_tmp = NGLOB_INFINITE
+    nspec_tmp = NSPEC_INFINITE
+  case default
+    call exit_mpi(myrank,'Error invalid iregion_code in read_mesh_databases_MPI_adios() routine')
+  end select
+
   if (num_interfaces < 0) then
     print *,'Error: adios rank ',myrank,' num_interfaces: ',num_interfaces,'should be positive'
     call exit_mpi(myrank,'Error invalid value reading num_interfaces')
@@ -955,6 +1084,100 @@ subroutine read_mesh_databases_MPI_adios(iregion_code)
               tmp_num_elem_colors(1:(num_colors_outer + num_colors_inner))
     endif
 
+  case (IREGION_TRINFINITE)
+    ! transition-to-infinite
+    num_interfaces_trinfinite = num_interfaces
+    max_nibool_interfaces_trinfinite = max_nibool_interfaces
+    num_phase_ispec_trinfinite = num_phase_ispec
+    nspec_inner_trinfinite = nspec_inner
+    nspec_outer_trinfinite = nspec_outer
+    num_colors_outer_trinfinite = num_colors_outer
+    num_colors_inner_trinfinite = num_colors_inner
+
+    ! MPI arrays
+    allocate(my_neighbors_trinfinite(num_interfaces), &
+             nibool_interfaces_trinfinite(num_interfaces),stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_trinfinite etc.')
+    if (num_interfaces > 0) then
+      my_neighbors_trinfinite(1:num_interfaces) = tmp_my_neighbors(1:num_interfaces)
+      nibool_interfaces_trinfinite(1:num_interfaces) = tmp_nibool_interfaces(1:num_interfaces)
+    endif
+
+    allocate(ibool_interfaces_trinfinite(max_nibool_interfaces,num_interfaces), stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_trinfinite')
+    if (num_interfaces > 0) then
+      ibool_interfaces_trinfinite(:,:) = 0
+      do i = 1,num_interfaces
+        ibool_interfaces_trinfinite(1:max_nibool_interfaces,i) = tmp_ibool_interfaces(1:max_nibool_interfaces,i)
+      enddo
+    endif
+
+    allocate(phase_ispec_inner_trinfinite(num_phase_ispec,2),stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_trinfinite')
+    if (num_phase_ispec > 0) then
+      phase_ispec_inner_trinfinite(:,:) = 0
+      ! fills actual values
+      do i = 1,2
+        phase_ispec_inner_trinfinite(1:num_phase_ispec,i) = tmp_phase_ispec_inner(1:num_phase_ispec,i)
+      enddo
+    endif
+
+    ! mesh coloring for GPUs
+    allocate(num_elem_colors_trinfinite(num_colors_outer + num_colors_inner), stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_trinfinite array')
+    if (USE_MESH_COLORING_GPU) then
+      ! colors
+      num_elem_colors_trinfinite(1:(num_colors_outer + num_colors_inner)) = &
+              tmp_num_elem_colors(1:(num_colors_outer + num_colors_inner))
+    endif
+
+  case (IREGION_INFINITE)
+    ! infinite
+    num_interfaces_infinite = num_interfaces
+    max_nibool_interfaces_infinite = max_nibool_interfaces
+    num_phase_ispec_infinite = num_phase_ispec
+    nspec_inner_infinite = nspec_inner
+    nspec_outer_infinite = nspec_outer
+    num_colors_outer_infinite = num_colors_outer
+    num_colors_inner_infinite = num_colors_inner
+
+    ! MPI arrays
+    allocate(my_neighbors_infinite(num_interfaces), &
+             nibool_interfaces_infinite(num_interfaces),stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbors_infinite etc.')
+    if (num_interfaces > 0) then
+      my_neighbors_infinite(1:num_interfaces) = tmp_my_neighbors(1:num_interfaces)
+      nibool_interfaces_infinite(1:num_interfaces) = tmp_nibool_interfaces(1:num_interfaces)
+    endif
+
+    allocate(ibool_interfaces_infinite(max_nibool_interfaces,num_interfaces), stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_infinite')
+    if (num_interfaces > 0) then
+      ibool_interfaces_infinite(:,:) = 0
+      do i = 1,num_interfaces
+        ibool_interfaces_infinite(1:max_nibool_interfaces,i) = tmp_ibool_interfaces(1:max_nibool_interfaces,i)
+      enddo
+    endif
+
+    allocate(phase_ispec_inner_infinite(num_phase_ispec,2),stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating array phase_ispec_inner_infinite')
+    if (num_phase_ispec > 0) then
+      phase_ispec_inner_infinite(:,:) = 0
+      ! fills actual values
+      do i = 1,2
+        phase_ispec_inner_infinite(1:num_phase_ispec,i) = tmp_phase_ispec_inner(1:num_phase_ispec,i)
+      enddo
+    endif
+
+    ! mesh coloring for GPUs
+    allocate(num_elem_colors_infinite(num_colors_outer + num_colors_inner), stat=ierr)
+    if (ierr /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_infinite array')
+    if (USE_MESH_COLORING_GPU) then
+      ! colors
+      num_elem_colors_infinite(1:(num_colors_outer + num_colors_inner)) = &
+              tmp_num_elem_colors(1:(num_colors_outer + num_colors_inner))
+    endif
+
   case default
     stop 'Invalid region case in read_mesh_databases_MPI_arrays_adios'
   end select
diff --git a/src/specfem3D/read_mesh_parameters.F90 b/src/specfem3D/read_mesh_parameters.F90
index 99459b37a..318f71ab2 100644
--- a/src/specfem3D/read_mesh_parameters.F90
+++ b/src/specfem3D/read_mesh_parameters.F90
@@ -43,7 +43,7 @@ subroutine read_mesh_parameters()
   use shared_parameters, only: LOCAL_PATH,SIMULATION_TYPE,SAVE_FORWARD, &
     ABSORBING_CONDITIONS,ATTENUATION, &
     MOVIE_VOLUME,MOVIE_VOLUME_TYPE, &
-    OCEANS,ROTATION
+    OCEANS,ROTATION,FULL_GRAVITY
 
   use constants_solver
 
@@ -122,12 +122,22 @@ subroutine read_mesh_parameters()
     NSPEC_INNER_CORE_STRAIN_ONLY = 0
   endif
 
+  ! full gravity region sizes
+  if (.not. FULL_GRAVITY) then
+    NSPEC_TRINFINITE = 0
+    NGLOB_TRINFINITE = 0
+
+    NSPEC_INFINITE = 0
+    NGLOB_INFINITE = 0
+  endif
+
+  ! adjoint sizes
   if ((SIMULATION_TYPE == 1 .and. SAVE_FORWARD) .or. SIMULATION_TYPE == 3) then
     NSPEC_CRUST_MANTLE_ADJOINT = NSPEC_CRUST_MANTLE       ! NSPEC_REGIONS(IREGION_CRUST_MANTLE)
     NSPEC_OUTER_CORE_ADJOINT = NSPEC_OUTER_CORE           ! NSPEC_REGIONS(IREGION_OUTER_CORE)
     NSPEC_INNER_CORE_ADJOINT = NSPEC_INNER_CORE           ! NSPEC_REGIONS(IREGION_INNER_CORE)
     NSPEC_TRINFINITE_ADJOINT = NSPEC_TRINFINITE
-    NSPEC_INFINITE_ADJOINT   = NSPEC_TRINFINITE
+    NSPEC_INFINITE_ADJOINT   = NSPEC_INFINITE
 
     NGLOB_CRUST_MANTLE_ADJOINT = NGLOB_CRUST_MANTLE       ! NGLOB_REGIONS(IREGION_CRUST_MANTLE)
     NGLOB_OUTER_CORE_ADJOINT = NGLOB_OUTER_CORE           ! NGLOB_REGIONS(IREGION_OUTER_CORE)
diff --git a/src/specfem3D/rules.mk b/src/specfem3D/rules.mk
index cc02190ae..ebd2dbb53 100644
--- a/src/specfem3D/rules.mk
+++ b/src/specfem3D/rules.mk
@@ -61,6 +61,7 @@ specfem3D_SOLVER_OBJECTS = \
 
 specfem3D_SOLVER_OBJECTS += \
 	$O/asdf_data.solverstatic_module.o \
+	$O/bcast_mesh_databases.solverstatic.o \
 	$O/check_stability.solverstatic.o \
 	$O/comp_source_time_function.solverstatic.o \
 	$O/compute_add_sources.solverstatic.o \
@@ -113,6 +114,12 @@ specfem3D_SOLVER_OBJECTS += \
 	$O/read_mesh_parameters.solverstatic.o \
 	$O/read_mesh_databases.solverstatic.o \
 	$O/read_topography_bathymetry.solverstatic.o \
+	$O/SIEM_infinite_element.solverstatic.o \
+	$O/SIEM_math_library.solverstatic.o \
+	$O/SIEM_poisson.solverstatic.o \
+	$O/SIEM_prepare_solver.solverstatic.o \
+	$O/SIEM_solver_mpi.solverstatic.o \
+	$O/SIEM_solver_petsc.solverstatic.o \
 	$O/save_forward_arrays.solverstatic.o \
 	$O/save_kernels.solverstatic.o \
 	$O/save_regular_kernels.solverstatic.o \
diff --git a/src/specfem3D/specfem3D_par.F90 b/src/specfem3D/specfem3D_par.F90
index aed6efbef..52017d70c 100644
--- a/src/specfem3D/specfem3D_par.F90
+++ b/src/specfem3D/specfem3D_par.F90
@@ -1141,8 +1141,8 @@ module specfem_par_full_gravity
   integer, dimension(:), allocatable :: my_neighbors_trinfinite,nibool_interfaces_trinfinite
   integer, dimension(:,:), allocatable :: ibool_interfaces_trinfinite
 
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_trinfinite,b_buffer_recv_scalar_trinfinite
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: b_buffer_send_scalar_trinfinite,b_buffer_recv_scalar_trinfinite
 
   integer, dimension(:), allocatable :: request_send_scalar_trinfinite,request_recv_scalar_trinfinite
   integer, dimension(:), allocatable :: b_request_send_scalar_trinfinite,b_request_recv_scalar_trinfinite
@@ -1153,8 +1153,8 @@ module specfem_par_full_gravity
   integer, dimension(:), allocatable :: my_neighbors_infinite,nibool_interfaces_infinite
   integer, dimension(:,:), allocatable :: ibool_interfaces_infinite
 
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_infinite,buffer_recv_scalar_infinite
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_infinite,b_buffer_recv_scalar_infinite
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: buffer_send_scalar_infinite,buffer_recv_scalar_infinite
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: b_buffer_send_scalar_infinite,b_buffer_recv_scalar_infinite
 
   integer, dimension(:), allocatable :: request_send_scalar_infinite,request_recv_scalar_infinite
   integer, dimension(:), allocatable :: b_request_send_scalar_infinite,b_request_recv_scalar_infinite
@@ -1190,8 +1190,8 @@ module specfem_par_full_gravity
   integer, dimension(:), allocatable :: my_neighbors_outer_core1,nibool_interfaces_outer_core1
   integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core1
 
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_outer_core1,buffer_recv_scalar_outer_core1
-  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_outer_core1,b_buffer_recv_scalar_outer_core1
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: buffer_send_scalar_outer_core1,buffer_recv_scalar_outer_core1
+  real(kind=CUSTOM_REAL), dimension(:,:), pointer :: b_buffer_send_scalar_outer_core1,b_buffer_recv_scalar_outer_core1
 
   integer, dimension(:), allocatable :: request_send_scalar_outer_core1,request_recv_scalar_outer_core1
   integer, dimension(:), allocatable :: b_request_send_scalar_outer_core1,b_request_recv_scalar_outer_core1
@@ -1212,6 +1212,9 @@ module specfem_par_full_gravity
   ! parameters for Poisson's equation
   integer :: neq, neq1, b_neq, b_neq1, nnode, nnode1
 
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
   double precision,dimension(NGLLCUBE,NGLLCUBE) :: lagrange_gll
 
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: pgrav_ic       ! pgrav_ic(NGLOB_INNER_CORE)
@@ -1247,8 +1250,10 @@ module specfem_par_full_gravity
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: kmat_sparse1
 
   double precision,dimension(NGLLCUBE_INF,NGLLCUBE_INF) :: lagrange_gll1
+#endif
 
   integer :: nnode_ic1,nnode_oc1,nnode_cm1,nnode_trinf1,nnode_inf1
+
   integer,dimension(:),allocatable :: nmir_ic, nmir_oc, nmir_cm, nmir_trinf, nmir_inf
 
   integer,dimension(:,:),allocatable :: inode_map_ic      ! inode_map_ic(2,NGLOB_INNER_CORE)
@@ -1273,27 +1278,32 @@ module specfem_par_full_gravity
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: gradg_cm ! (6,NGLOB_CRUST_MANTLE)
   ! parameters for Poisson's solver
   integer,dimension(:,:),allocatable :: inode_elmt_cm   ! (NGLLCUBE,NSPEC_CRUST_MANTLE)
-  integer,dimension(:,:),allocatable :: inode_elmt_cm1  ! (NGLL_INF,NSPEC_CRUST_MANTLE)
+  integer,dimension(:,:),allocatable :: inode_elmt_cm1  ! (NGLLCUBE_INF,NSPEC_CRUST_MANTLE)
   integer,dimension(:),allocatable :: gdof_cm, gdof_cm1
 
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_crust_mantle   ! (NGLLCUBE,NGLLCUBE,NSPEC_CRUST_MANTLE)
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_crust_mantle         ! (NGLOB_CRUST_MANTLE)
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_crust_mantle1
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_crust_mantle1
 
   real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_cm        ! (NDIM,NGLLCUBE,NGLLCUBE,NSPEC_CRUST_MANTLE)
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_cm            ! (NGLL,NSPEC_CRUST_MANTLE)
-  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_cm1       ! (NDIM,NGLL_INF,NGLL_INF,NSPEC_CRUST_MANTLE)
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_cm1           ! (NGLL_INF,NSPEC_CRUST_MANTLE)
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_cm            ! (NGLLCUBE,NSPEC_CRUST_MANTLE)
+  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_cm1       ! (NDIM,NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_CRUST_MANTLE)
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_cm1           ! (NGLLCUBE_INF,NSPEC_CRUST_MANTLE)
+#endif
 
   ! inner core
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: g_ic       ! g_ic(NDIM,NGLOB_INNER_CORE)
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: gradg_ic   ! gradg_ic(6,NGLOB_INNER_CORE)
   ! parameters for Poisson's solver
   integer,dimension(:,:),allocatable :: inode_elmt_ic     ! inode_elmt_ic(NGLLCUBE,NSPEC_INNER_CORE)
-  integer,dimension(:,:),allocatable :: inode_elmt_ic1    ! inode_elmt_ic1(NGLL_INF,NSPEC_INNER_CORE)
+  integer,dimension(:,:),allocatable :: inode_elmt_ic1    ! inode_elmt_ic1(NGLLCUBE_INF,NSPEC_INNER_CORE)
   integer,dimension(:),allocatable :: gdof_ic,gdof_ic1
 
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_inner_core   ! (NGLLCUBE,NGLLCUBE,NSPEC_INNER_CORE)
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_inner_core         ! (NGLOB_INNER_CORE)
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable ::  storekmat_inner_core1
@@ -1301,17 +1311,20 @@ module specfem_par_full_gravity
 
   real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_ic        ! (NDIM,NGLLCUBE,NGLLCUBE,NSPEC_INNER_CORE)
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_ic            ! (NGLLCUBE,NSPEC_INNER_CORE)
-  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_ic1       ! (NDIM,NGLL_INF,NGLL_INF,NSPEC_INNER_CORE)
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_ic1           ! (NGLL_INF,NSPEC_INNER_CORE)
+  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_ic1       ! (NDIM,NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_INNER_CORE)
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_ic1           ! (NGLLCUBE_INF,NSPEC_INNER_CORE)
+#endif
 
   ! outer core
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: g_oc ! (NDIM,NGLOB_OUTER_CORE)
 
   ! parameters for Poisson's solver
   integer,dimension(:,:),allocatable :: inode_elmt_oc   ! (NGLLCUBE,NSPEC_OUTER_CORE)
-  integer,dimension(:,:),allocatable :: inode_elmt_oc1  ! (NGLL_INF,NSPEC_OUTER_CORE)
+  integer,dimension(:,:),allocatable :: inode_elmt_oc1  ! (NGLLCUBE_INF,NSPEC_OUTER_CORE)
   integer,dimension(:),allocatable :: gdof_oc,gdof_oc1
 
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_outer_core ! (NGLLCUBE,NGLLCUBE,NSPEC_OUTER_CORE)
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_outer_core   ! (NGLOB_OUTER_CORE)
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_outer_core1
@@ -1319,8 +1332,9 @@ module specfem_par_full_gravity
 
   real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_oc  ! (NDIM,NGLLCUBE,NGLLCUBE,NSPEC_OUTER_CORE)
   real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_oc      ! (NGLLCUBE,NSPEC_OUTER_CORE)
-  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_oc1 ! (NDIM,NGLL_INF,NGLL_INF,NSPEC_OUTER_CORE)
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_oc1     ! (NGLL_INF,NSPEC_OUTER_CORE)
+  real(kind=CUSTOM_REAL),dimension(:,:,:,:),allocatable :: storederiv_oc1 ! (NDIM,NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_OUTER_CORE)
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: storerhojw_oc1     ! (NGLLCUBE_INF,NSPEC_OUTER_CORE)
+#endif
 
 end module specfem_par_full_gravity
 
@@ -1412,6 +1426,9 @@ module specfem_par_trinfinite
   integer,dimension(:,:),allocatable :: inode_elmt_trinf  ! inode_elmt_trinf(NGLLCUBE,NSPEC_TRINFINITE)
   integer,dimension(:,:),allocatable :: inode_elmt_trinf1 ! inode_elmt_trinf1(NGLLCUBE_INF,NSPEC_TRINFINITE)
 
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
   integer,dimension(:),allocatable :: gdof_trinf, gdof_trinf1
   integer,dimension(:,:),allocatable :: ggdof_trinf, ggdof_trinf1
 
@@ -1420,6 +1437,8 @@ module specfem_par_trinfinite
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_trinfinite1
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_trinfinite1
 
+#endif
+
 end module specfem_par_trinfinite
 
 !=====================================================================
@@ -1511,6 +1530,10 @@ module specfem_par_infinite
   ! parameters for Poisson's solver
   integer,dimension(:,:),allocatable :: inode_elmt_inf    ! inode_elmt_inf(NGLLCUBE,NSPEC_INFINITE)
   integer,dimension(:,:),allocatable :: inode_elmt_inf1   ! inode_elmt_inf1(NGLLCUBE_INF,NSPEC_INFINITE)
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
   integer,dimension(:),allocatable :: gdof_inf, gdof_inf1
   integer,dimension(:,:),allocatable :: ggdof_inf, ggdof_inf1
 
@@ -1519,6 +1542,8 @@ module specfem_par_infinite
   real(kind=CUSTOM_REAL),dimension(:,:,:),allocatable :: storekmat_infinite1
   real(kind=CUSTOM_REAL),dimension(:),allocatable :: dprecon_infinite1
 
+#endif
+
 end module specfem_par_infinite
 
 !=====================================================================

From fee6e5644525ed75ed34bc20bac320ba1e2a3a9f Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 00:37:42 +0200
Subject: [PATCH 05/11] adds SIEM routines (not working yet)

---
 src/gindex3D/Makefile                   |   56 +
 src/gindex3D/create_gindex.f90          | 2028 ++++++++++++++++
 src/gindex3D/gindex3D.f90               |   48 +
 src/gindex3D/gindex3D_par.f90           |   57 +
 src/gindex3D/initialize_gindex.f90      |  121 +
 src/gindex3D/rules.mk                   |  161 ++
 src/specfem3D/SIEM_infinite_element.F90 |  872 +++++++
 src/specfem3D/SIEM_math_library.F90     | 2831 +++++++++++++++++++++++
 src/specfem3D/SIEM_poisson.F90          | 2361 +++++++++++++++++++
 src/specfem3D/SIEM_prepare_solver.F90   | 1196 ++++++++++
 src/specfem3D/SIEM_solver_mpi.F90       |  876 +++++++
 src/specfem3D/SIEM_solver_petsc.F90     | 1896 +++++++++++++++
 12 files changed, 12503 insertions(+)
 create mode 100644 src/gindex3D/Makefile
 create mode 100644 src/gindex3D/create_gindex.f90
 create mode 100644 src/gindex3D/gindex3D.f90
 create mode 100644 src/gindex3D/gindex3D_par.f90
 create mode 100644 src/gindex3D/initialize_gindex.f90
 create mode 100644 src/gindex3D/rules.mk
 create mode 100644 src/specfem3D/SIEM_infinite_element.F90
 create mode 100644 src/specfem3D/SIEM_math_library.F90
 create mode 100644 src/specfem3D/SIEM_poisson.F90
 create mode 100644 src/specfem3D/SIEM_prepare_solver.F90
 create mode 100644 src/specfem3D/SIEM_solver_mpi.F90
 create mode 100644 src/specfem3D/SIEM_solver_petsc.F90

diff --git a/src/gindex3D/Makefile b/src/gindex3D/Makefile
new file mode 100644
index 000000000..aa70c4011
--- /dev/null
+++ b/src/gindex3D/Makefile
@@ -0,0 +1,56 @@
+#=====================================================================
+#
+#                       S p e c f e m 3 D  G l o b e
+#                       ----------------------------
+#
+#     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+#                        Princeton University, USA
+#                and CNRS / University of Marseille, France
+#                 (there are currently many more authors!)
+# (c) Princeton University and CNRS / University of Marseille, April 2014
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#=====================================================================
+
+DIR = gindex3D
+
+# The rest of this file is generic
+#######################################
+
+####
+#### targets
+####
+
+default:
+	$(MAKE) -C ../.. $(DIR)
+
+all:
+	$(MAKE) -C ../.. all
+
+clean:
+	$(MAKE) -C ../.. CLEAN=$(DIR) clean
+
+cleanall:
+	$(MAKE) -C ../.. clean
+
+backup:
+	mkdir -p bak
+	cp *f90 *h Makefile bak
+
+bak: backup
+
+.PHONY: default all clean cleanall backup bak
+
diff --git a/src/gindex3D/create_gindex.f90 b/src/gindex3D/create_gindex.f90
new file mode 100644
index 000000000..49f3988ec
--- /dev/null
+++ b/src/gindex3D/create_gindex.f90
@@ -0,0 +1,2028 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine create_gindex()
+
+  use gindex_par
+
+  implicit none
+
+  ! local parameters
+  integer :: i_proc
+
+  ! initialize global indices
+  ignode_end = 0 ! global nodes for NGLLX = 5
+  gnf_end = 0    ! global gdof for NGLLX = 5
+  gnf_end1 = 0   ! global gdof for NGLLX_INF = 3
+
+  ! loop through the processors
+  do i_proc = 0,nproc-1
+    ! creates global DOFs for this process
+    call create_gindex_for_process(i_proc)
+  enddo
+
+  ! closes the main output file
+  if (myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'End of gindex3D - all done'
+    write(IMAIN,*)
+    call flush_IMAIN()
+    close(IMAIN)
+  endif
+
+  ! synchronizes all the processes to make sure everybody has finished
+  call synchronize_all()
+
+  end subroutine create_gindex
+
+!
+!------------------------------------------------------------------------
+!
+
+  subroutine create_gindex_for_process(i_proc)
+
+  use gindex_par
+
+  implicit none
+
+  integer, intent(in) :: i_proc
+
+  ! local parameters
+  integer :: j_proc
+  integer :: i,j,k,i_elmt,i_node,ispec_ic,ispec_oc,ispec_cm, &
+             ispec_trinf,ispec_inf,ibool_ic,ibool_oc,ibool_cm,ibool_trinf,ibool_inf,k_ic, &
+             k_oc,k_cm,k_trinf,k_inf
+  integer :: ibool,inode,ignode,ispec,nnode_icb,nnode_cmb,nnode_trinfb,nnode_infb
+
+  ! local
+  integer :: inode_ic(NGLOB_INNER_CORE),inode_oc(NGLOB_OUTER_CORE), &
+             inode_cm(NGLOB_CRUST_MANTLE),inode_trinf(NGLOB_TRINFINITE), &
+             inode_inf(NGLOB_INFINITE)
+  logical :: isnode_ic(NGLOB_INNER_CORE),isnode_oc(NGLOB_OUTER_CORE), &
+             isnode_cm(NGLOB_CRUST_MANTLE),isnode_trinf(NGLOB_TRINFINITE), &
+             isnode_inf(NGLOB_INFINITE)
+
+  ! global
+  integer :: ignode_ic(NGLOB_INNER_CORE),ignode_oc(NGLOB_OUTER_CORE), &
+             ignode_cm(NGLOB_CRUST_MANTLE),ignode_trinf(NGLOB_TRINFINITE), &
+             ignode_inf(NGLOB_INFINITE)
+  logical :: isgnode_ic(NGLOB_INNER_CORE),isgnode_oc(NGLOB_OUTER_CORE), &
+             isgnode_cm(NGLOB_CRUST_MANTLE),isgnode_trinf(NGLOB_TRINFINITE), &
+             isgnode_inf(NGLOB_INFINITE)
+
+  integer,allocatable :: inode_oc1(:),inode_ic1(:),inode_cm1(:),inode_trinf1(:), &
+                         inode_inf1(:)
+
+  integer,allocatable :: gdf_ic(:,:),gdf_oc(:,:),gdf_cm(:,:),gdf_trinf(:,:), &
+                         gdf_inf(:,:),gnf(:,:)
+
+  integer :: nnode_ic,nnode_oc,nnode_cm,nnode_trinf,nnode_inf
+  integer :: inode1,inum,igll
+  logical,allocatable :: isnode(:)
+
+  logical,allocatable :: isibool_interface_ic(:,:),isibool_interface_oc(:,:), &
+                         isibool_interface_cm(:,:),isibool_interface_trinf(:,:), &
+                         isibool_interface_inf(:,:),isgnf(:,:)
+
+  integer :: igdof,nibool
+  integer,allocatable :: gghost(:,:),ighost(:)
+  character(len = 20) :: fhead
+  character(len = 12) :: spm,spn
+  character(len = 60) :: fname
+
+  integer,allocatable :: nmir(:)
+  integer,allocatable :: gdf_ic1(:,:),gdf_oc1(:,:),gdf_cm1(:,:),gdf_trinf1(:,:), &
+                         gdf_inf1(:,:),gnf1(:,:)
+
+  logical,allocatable :: isgnf1(:,:)
+
+  integer :: igdof1
+
+  integer,allocatable :: tmpvec(:)
+  integer,allocatable :: tmpmat(:,:)
+
+  integer :: myrank_org
+
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) 'Process: ',i_proc
+    write(IMAIN,*) '  reading databases...'
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+
+  ! to read databases for different processes, we will set myrank to the corresponding process id (i_proc)
+  ! store current myrank
+  myrank_org = myrank
+
+  ! set new process id
+  myrank = i_proc
+
+  ! starts reading the databases
+  call read_mesh_databases()
+
+  ! restores original myrank
+  myrank = myrank_org
+
+  !deallocate unnecessary arrays
+  if (allocated(rmassz_inner_core)) then
+    deallocate(rmassz_inner_core)
+    deallocate(phase_ispec_inner_inner_core)
+    deallocate(num_elem_colors_inner_core)
+    deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+               request_send_vector_ic,request_recv_vector_ic)
+  endif
+
+  if (allocated(rmass_outer_core)) then
+    deallocate(rmass_outer_core)
+    deallocate(phase_ispec_inner_outer_core)
+    deallocate(num_elem_colors_outer_core)
+    deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+               request_send_scalar_oc,request_recv_scalar_oc)
+  endif
+
+  if (allocated(rmassz_crust_mantle)) then
+    deallocate(rmassz_crust_mantle)
+    deallocate(phase_ispec_inner_crust_mantle)
+    deallocate(num_elem_colors_crust_mantle)
+    deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+               request_send_vector_cm,request_recv_vector_cm)
+  endif
+
+  if (allocated(phase_ispec_inner_trinfinite)) then
+    deallocate(phase_ispec_inner_trinfinite)
+    deallocate(num_elem_colors_trinfinite)
+    deallocate(buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite, &
+               request_send_scalar_trinfinite,request_recv_scalar_trinfinite)
+  endif
+
+  if (allocated(phase_ispec_inner_infinite)) then
+    deallocate(phase_ispec_inner_infinite)
+    deallocate(num_elem_colors_infinite)
+    deallocate(buffer_send_scalar_infinite,buffer_recv_scalar_infinite, &
+               request_send_scalar_infinite,request_recv_scalar_infinite)
+  endif
+
+  !debug
+  !print *,'Process:',i_proc,' neighbors:',num_interfaces_inner_core
+
+  nnode_ic = NGLOB_INNER_CORE
+  nnode_oc = NGLOB_OUTER_CORE
+  nnode_cm = NGLOB_CRUST_MANTLE
+  nnode_trinf = NGLOB_TRINFINITE
+  nnode_inf = NGLOB_INFINITE
+
+  inode_ic(:) = -1;    isnode_ic(:) = .false.
+  inode_oc(:) = -1;    isnode_oc(:) = .false.
+  inode_cm(:) = -1;    isnode_cm(:) = .false.
+  inode_trinf(:) = -1; isnode_trinf(:) = .false.
+  inode_inf(:) = -1;   isnode_inf(:) = .false.
+
+  ignode_ic(:) = -1;    isgnode_ic(:) = .false.
+  ignode_oc(:) = -1;    isgnode_oc(:) = .false.
+  ignode_cm(:) = -1;    isgnode_cm(:) = .false.
+  ignode_trinf(:) = -1; isgnode_trinf(:) = .false.
+  ignode_inf(:) = -1;   isgnode_inf(:) = .false.
+
+  ! allocate necessary arrays
+  ! trinfinite arrays
+  if (ADD_TRINF) then
+    allocate(ibool_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE))
+    allocate(ibelm_bottom_trinfinite(NSPEC2D_BOTTOM_TRINF))
+    allocate(ibelm_top_trinfinite(NSPEC2D_TOP_TRINF))
+    allocate(inode_elmt_trinf(NGLLCUBE,NSPEC_TRINFINITE))
+    allocate(inode_elmt_trinf1(NGLLCUBE_INF,NSPEC_TRINFINITE))
+  endif
+  ! infinite arrays
+  allocate(ibool_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE))
+  allocate(ibelm_bottom_infinite(NSPEC2D_BOTTOM_INF))
+  allocate(ibelm_top_infinite(NSPEC2D_TOP_INF))
+  allocate(inode_elmt_inf(NGLLCUBE,NSPEC_INFINITE))
+  allocate(inode_elmt_inf1(NGLLCUBE_INF,NSPEC_INFINITE))
+
+  ! count global node numbers
+  nnode = NGLOB_INNER_CORE + NGLOB_OUTER_CORE + NGLOB_CRUST_MANTLE + NGLOB_TRINFINITE + NGLOB_INFINITE
+
+  ! identify duplicate nodes on the boundary
+  ! inner core - outer core boundary (ICB)
+  !isnode_oc=.false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_OC
+    ispec = ibelm_bottom_outer_core(i_elmt)
+    k = 1 ! bottom face
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        isgnode_oc(ibool_outer_core(i,j,k,ispec)) = .true.
+      enddo
+    enddo
+  enddo
+  nnode_icb = count(isgnode_oc)
+
+  ! outer core - crust mantle boundary (CMB)
+  !isnode_cm=.false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_CM
+    ispec = ibelm_bottom_crust_mantle(i_elmt)
+    k = 1 ! bottom face
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        isgnode_cm(ibool_crust_mantle(i,j,k,ispec)) = .true.
+      enddo
+    enddo
+  enddo
+  nnode_cmb = count(isgnode_cm)
+
+  ! crust mantle - transition infinite boundary (FS: free surface)
+  nnode_trinfb = 0
+  !isnode_trinf=.false.
+  if (ADD_TRINF) then
+    do i_elmt = 1,NSPEC2D_BOTTOM_TRINF
+      ispec = ibelm_bottom_trinfinite(i_elmt)
+      k = 1 ! bottom face
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          isgnode_trinf(ibool_trinfinite(i,j,k,ispec)) = .true.
+        enddo
+      enddo
+    enddo
+    nnode_trinfb = count(isgnode_trinf)
+  endif
+
+  ! crust mantle - infinite boundary (FS: free surface)
+  !isnode_inf=.false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_INF
+    ispec = ibelm_bottom_infinite(i_elmt)
+    k = 1 ! bottom face
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        isgnode_inf(ibool_infinite(i,j,k,ispec)) = .true.
+      enddo
+    enddo
+  enddo
+  nnode_infb = count(isgnode_inf)
+
+  ! number of unique global nodes in this processor
+  nnode = nnode-nnode_icb-nnode_cmb-nnode_trinfb-nnode_infb
+
+  print *,'Nodes in a process:',nnode
+
+  ! until this stage both arrays are same
+  !============================================================
+
+  ! local indexing
+  ! indexify regionally assembled local nodes and store in a region array
+  ! inner core
+  inode_ic = (/ (inode, inode = 1,NGLOB_INNER_CORE) /)
+
+  inode = NGLOB_INNER_CORE
+  ! outer core
+  ! ICB
+  ! copy common boundary nodes
+  isnode_oc = .false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_OC
+    ispec_oc = ibelm_bottom_outer_core(i_elmt)
+    ispec_ic = ibelm_top_inner_core(i_elmt)
+    k_oc = 1;    ! bottom face
+    k_ic = NGLLZ ! top face
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc)
+        ibool_ic = ibool_inner_core(i,j,k_ic,ispec_ic)
+        inode_oc(ibool_oc)=inode_ic(ibool_ic)
+        isnode_oc(ibool_oc) = .true.
+      enddo
+    enddo
+  enddo
+  ! now loop through all nodes
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_oc = ibool_outer_core(i,j,k,i_elmt)
+           if (.not. isnode_oc(ibool_oc)) then
+              isnode_oc(ibool_oc) = .true.
+              inode = inode+1
+              inode_oc(ibool_oc)=inode
+           endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! crust-mantle
+  ! CMB
+  ! copy common boundary nodes
+  isnode_cm = .false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_CM
+    ispec_cm = ibelm_bottom_crust_mantle(i_elmt)
+    ispec_oc = ibelm_top_outer_core(i_elmt)
+    k_cm = 1; k_oc = NGLLZ
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+        ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc)
+        inode_cm(ibool_cm)=inode_oc(ibool_oc)
+        isnode_cm(ibool_cm) = .true.
+      enddo
+    enddo
+  enddo
+  ! now loop through all nodes
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_cm = ibool_crust_mantle(i,j,k,i_elmt)
+          if (.not. isnode_cm(ibool_cm)) then
+            isnode_cm(ibool_cm) = .true.
+            inode = inode+1
+            inode_cm(ibool_cm)=inode
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite & infinite
+    ! FS
+    ! copy common boundary nodes
+    isnode_trinf = .false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_TRINF
+      ispec_trinf = ibelm_bottom_trinfinite(i_elmt)
+      ispec_cm = ibelm_top_crust_mantle(i_elmt)
+      k_trinf = 1; k_cm = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf)
+          ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+          inode_trinf(ibool_trinf)=inode_cm(ibool_cm)
+          isnode_trinf(ibool_trinf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_TRINFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_trinf = ibool_trinfinite(i,j,k,i_elmt)
+            if (.not. isnode_trinf(ibool_trinf)) then
+              isnode_trinf(ibool_trinf) = .true.
+              inode = inode+1
+              inode_trinf(ibool_trinf)=inode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! infinite
+    ! FS
+    ! copy common boundary nodes
+    isnode_inf = .false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_INF
+      ispec_inf = ibelm_bottom_infinite(i_elmt)
+      ispec_trinf = ibelm_top_trinfinite(i_elmt)
+      k_inf = 1; k_trinf = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf)
+          ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf)
+          inode_inf(ibool_inf)=inode_trinf(ibool_trinf)
+          isnode_inf(ibool_inf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_INFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_inf = ibool_infinite(i,j,k,i_elmt)
+            if (.not. isnode_inf(ibool_inf)) then
+              isnode_inf(ibool_inf) = .true.
+              inode = inode+1
+              inode_inf(ibool_inf)=inode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+  else
+    ! infinite only
+    ! FS
+    ! copy common boundary nodes
+    isnode_inf = .false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_INF
+      ispec_inf = ibelm_bottom_infinite(i_elmt)
+      ispec_cm = ibelm_top_crust_mantle(i_elmt)
+      k_inf = 1; k_cm = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf)
+          ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+          inode_inf(ibool_inf)=inode_cm(ibool_cm)
+          isnode_inf(ibool_inf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_INFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_inf = ibool_infinite(i,j,k,i_elmt)
+            if (.not. isnode_inf(ibool_inf)) then
+              isnode_inf(ibool_inf) = .true.
+              inode = inode+1
+              inode_inf(ibool_inf)=inode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+  endif ! if (ADD_TRINF)
+
+  ! safety check
+  if (inode /= nnode) then
+    write(*,*) 'ERROR: numbers of global nodes mismatch!',inode,nnode
+    stop
+  endif
+
+  !============================================================
+
+  ! global nodal indexing
+
+  ! WARNING: is it correct to put these statements here?
+  isgnode_ic(:) = .false.
+  isgnode_oc(:) = .false.
+  isgnode_cm(:) = .false.
+  isgnode_trinf(:) = .false.
+  isgnode_inf(:) = .false.
+
+  ! copy global indices from preceding partitions
+
+  ! inner core
+  fhead = 'ic'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(ighost(nibool))
+      read(10,*) ighost(1:nibool)
+      close(10,status='delete')
+
+      isgnode_ic(ibool_interfaces_inner_core(1:nibool,i)) = .true.
+      ignode_ic(ibool_interfaces_inner_core(1:nibool,i)) = ighost(:)
+      deallocate(ighost)
+    endif
+  enddo
+
+  ! outer core
+  fhead = 'oc'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(ighost(nibool))
+      read(10,*) ighost(1:nibool)
+      close(10,status='delete')
+
+      isgnode_oc(ibool_interfaces_outer_core(1:nibool,i)) = .true.
+      ignode_oc(ibool_interfaces_outer_core(1:nibool,i)) = ighost(:)
+      deallocate(ighost)
+    endif
+  enddo
+
+  ! crust mantle
+  fhead = 'cm'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(ighost(nibool))
+      read(10,*) ighost(1:nibool)
+      close(10,status='delete')
+
+      isgnode_cm(ibool_interfaces_crust_mantle(1:nibool,i)) = .true.
+      ignode_cm(ibool_interfaces_crust_mantle(1:nibool,i)) = ighost(:)
+      deallocate(ighost)
+    endif
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead = 'trinf'
+    write(spm,*) i_proc
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc < i_proc) then
+        write(spn,*) j_proc
+        fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        !print *,fname
+        open(10,file=fname,action='read',status='old')
+        read(10,*) nibool
+        allocate(ighost(nibool))
+        read(10,*) ighost(1:nibool)
+        close(10,status='delete')
+
+        isgnode_trinf(ibool_interfaces_trinfinite(1:nibool,i)) = .true.
+        ignode_trinf(ibool_interfaces_trinfinite(1:nibool,i)) = ighost(:)
+        deallocate(ighost)
+      endif
+    enddo
+  endif
+
+  ! infinite
+  fhead = 'inf'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(ighost(nibool))
+      read(10,*) ighost(1:nibool)
+      close(10,status='delete')
+
+      isgnode_inf(ibool_interfaces_infinite(1:nibool,i)) = .true.
+      ignode_inf(ibool_interfaces_infinite(1:nibool,i)) = ighost(:)
+      deallocate(ighost)
+    endif
+  enddo
+
+  print *,'Previous largest node ID:',ignode_end
+
+  ! indexify global nodes and store in a region array
+  ! inner core
+  ignode = ignode_end
+  do i_node = 1,NGLOB_INNER_CORE
+    if (.not. isgnode_ic(i_node)) then
+      ignode = ignode+1
+      isgnode_ic(i_node) = .true.
+      ignode_ic(i_node) = ignode
+    endif
+  enddo
+  if (i_proc == 1) print *,'IC:',ignode
+
+  !ignode_ic=(/ (ignode, ignode=1,NGLOB_INNER_CORE) /)
+  !ignode=NGLOB_INNER_CORE
+
+  ! outer core
+  ! ICB
+  ! copy common boundary nodes
+  !isnode_oc=.false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_OC
+    ispec_oc = ibelm_bottom_outer_core(i_elmt)
+    ispec_ic = ibelm_top_inner_core(i_elmt)
+    k_oc = 1;    ! bottom face
+    k_ic = NGLLZ ! top face
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc)
+        ibool_ic = ibool_inner_core(i,j,k_ic,ispec_ic)
+        ignode_oc(ibool_oc) = ignode_ic(ibool_ic)
+        isgnode_oc(ibool_oc) = .true.
+      enddo
+    enddo
+  enddo
+  ! now loop through all nodes
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_oc = ibool_outer_core(i,j,k,i_elmt)
+           if (.not. isgnode_oc(ibool_oc)) then
+              isgnode_oc(ibool_oc) = .true.
+              ignode = ignode+1
+              ignode_oc(ibool_oc)=ignode
+           endif
+        enddo
+      enddo
+    enddo
+  enddo
+  if (i_proc == 1) print *,'OC:',ignode
+
+  ! crust-mantle
+  ! CMB
+  ! copy common boundary nodes
+  !isnode_cm=.false.
+  do i_elmt = 1,NSPEC2D_BOTTOM_CM
+    ispec_cm = ibelm_bottom_crust_mantle(i_elmt)
+    ispec_oc = ibelm_top_outer_core(i_elmt)
+    k_cm = 1; k_oc = NGLLZ
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+        ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc)
+        ignode_cm(ibool_cm) = ignode_oc(ibool_oc)
+        isgnode_cm(ibool_cm) = .true.
+      enddo
+    enddo
+  enddo
+  ! now loop through all nodes
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_cm = ibool_crust_mantle(i,j,k,i_elmt)
+          if (.not. isgnode_cm(ibool_cm)) then
+            isgnode_cm(ibool_cm) = .true.
+            ignode = ignode+1
+            ignode_cm(ibool_cm)=ignode
+          endif
+        enddo
+      enddo
+    enddo
+  enddo
+  if (i_proc == 1) print *,'CM:',ignode
+
+  if (ADD_TRINF) then
+    ! transition infinite & infinite
+    ! FS
+    ! copy common boundary nodes
+    !isnode_trinf=.false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_TRINF
+      ispec_trinf = ibelm_bottom_trinfinite(i_elmt)
+      ispec_cm = ibelm_top_crust_mantle(i_elmt)
+      k_trinf = 1; k_cm = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf)
+          ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+          ignode_trinf(ibool_trinf) = ignode_cm(ibool_cm)
+          isgnode_trinf(ibool_trinf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_TRINFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_trinf = ibool_trinfinite(i,j,k,i_elmt)
+            if (.not. isgnode_trinf(ibool_trinf)) then
+              isgnode_trinf(ibool_trinf) = .true.
+              ignode = ignode+1
+              ignode_trinf(ibool_trinf) = ignode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! infinite
+    ! FS
+    ! copy common boundary nodes
+    !isnode_inf=.false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_INF
+      ispec_inf = ibelm_bottom_infinite(i_elmt)
+      ispec_trinf = ibelm_top_trinfinite(i_elmt)
+      k_inf = 1; k_trinf = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf)
+          ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf)
+          ignode_inf(ibool_inf) = ignode_trinf(ibool_trinf)
+          isgnode_inf(ibool_inf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_INFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_inf = ibool_infinite(i,j,k,i_elmt)
+            if (.not. isgnode_inf(ibool_inf)) then
+              isgnode_inf(ibool_inf) = .true.
+              ignode = ignode+1
+              ignode_inf(ibool_inf) = ignode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+  else
+    ! infinite only
+    ! FS
+    ! copy common boundary nodes
+    !isnode_inf=.false.
+    do i_elmt = 1,NSPEC2D_BOTTOM_INF
+      ispec_inf = ibelm_bottom_infinite(i_elmt)
+      ispec_cm = ibelm_top_crust_mantle(i_elmt)
+      k_inf = 1; k_cm = NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf)
+          ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm)
+          ignode_inf(ibool_inf) = ignode_cm(ibool_cm)
+          isgnode_inf(ibool_inf) = .true.
+        enddo
+      enddo
+    enddo
+    ! now loop through all nodes
+    do i_elmt = 1,NSPEC_INFINITE
+      do k = 1,NGLLZ
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ibool_inf = ibool_infinite(i,j,k,i_elmt)
+            if (.not. isgnode_inf(ibool_inf)) then
+              isgnode_inf(ibool_inf) = .true.
+              ignode = ignode+1
+              ignode_inf(ibool_inf) = ignode
+            endif
+          enddo
+        enddo
+      enddo
+    enddo
+  endif
+  if (i_proc == 1) print *,'INF:',ignode
+
+  !  if (ignode /= nnode) then
+  !    write(*,*) 'ERROR: numbers of global nodes mismatch!',ignode,nnode
+  !    call sync_all
+  !    call close_process
+  !  endif
+
+  ! save global indices for neighboring partitions
+  ! inner core
+  fhead = 'ic'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname !,i_proc,j_proc
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_inner_core(i)
+      allocate(tmpvec(nibool_interfaces_inner_core(i)))
+      tmpvec = ignode_ic(ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i))
+      !write(10,*)ignode_ic(ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i))
+      write(10,*) tmpvec
+      deallocate(tmpvec)
+      close(10)
+    endif
+  enddo
+
+  ! outer core
+  fhead = 'oc'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_outer_core(i)
+      allocate(tmpvec(nibool_interfaces_outer_core(i)))
+      tmpvec = ignode_oc(ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(i),i))
+      !write(10,*)ignode_oc(ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(i),i))
+      write(10,*) tmpvec
+      deallocate(tmpvec)
+      close(10)
+    endif
+  enddo
+
+  ! crust mantle
+  fhead = 'cm'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_crust_mantle(i)
+      allocate(tmpvec(nibool_interfaces_crust_mantle(i)))
+      tmpvec = ignode_cm(ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(i),i))
+      !write(10,*)ignode_cm(ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(i),i))
+      write(10,*) tmpvec
+      deallocate(tmpvec)
+      close(10)
+    endif
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead = 'trinf'
+    write(spm,*) i_proc
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc > i_proc) then
+        write(spn,*) j_proc
+        fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        !print *,fname
+        open(10,file=fname,action='write',status='replace')
+        write(10,*) nibool_interfaces_trinfinite(i)
+        allocate(tmpvec(nibool_interfaces_trinfinite(i)))
+        tmpvec = ignode_trinf(ibool_interfaces_trinfinite(1:nibool_interfaces_trinfinite(i),i))
+        !write(10,*)ignode_trinf(ibool_interfaces_trinfinite(1:nibool_interfaces_trinfinite(i),i))
+        write(10,*) tmpvec
+        deallocate(tmpvec)
+        close(10)
+      endif
+    enddo
+  endif
+
+  ! infinite
+  fhead = 'inf'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_infinite(i)
+      allocate(tmpvec(nibool_interfaces_infinite(i)))
+      tmpvec = ignode_inf(ibool_interfaces_infinite(1:nibool_interfaces_infinite(i),i))
+      !write(10,*)ignode_inf(ibool_interfaces_infinite(1:nibool_interfaces_infinite(i),i))
+      write(10,*) tmpvec
+      deallocate(tmpvec)
+      close(10)
+    endif
+  enddo
+  !  print *,'success!'
+
+  ignode_end = maxval( (/ ignode_ic,ignode_oc,ignode_cm,ignode_trinf,ignode_inf /) )
+  print *,'Largest node ID:',ignode_end
+
+  write(spm,*) i_proc
+
+  fname = 'DATABASES_MPI/gibool_proc'//trim(adjustl(spm))
+  open(10,file=fname,action='write',status='replace')
+
+  write(10,*) NGLOB_INNER_CORE
+  write(10,*) ignode_ic
+  write(10,*) NGLOB_OUTER_CORE
+  write(10,*) ignode_oc
+  write(10,*) NGLOB_CRUST_MANTLE
+  write(10,*) ignode_cm
+  write(10,*) NGLOB_TRINFINITE
+  write(10,*) ignode_trinf
+  write(10,*) NGLOB_INFINITE
+  write(10,*) ignode_inf
+  close(10)
+
+  ! global indexing of degrees of freedoms
+  allocate(gnf(NNDOF,nnode),isgnf(NNDOF,nnode))
+  gnf = 0
+  isgnf = .false.
+
+  ! activate freedoms
+
+  ! freedoms of fictitious cube in inner core are deactivated
+  do i_elmt = 1,NSPEC_INNER_CORE
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool = ibool_inner_core(i,j,k,i_elmt)
+          gnf(1,inode_ic(ibool)) = 1
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! outer core
+  ! all freedoms are active
+  gnf(1,inode_oc) = 1
+
+  ! crust-mantle
+  ! all freedoms are active
+  gnf(1,inode_cm) = 1
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    ! all freedoms are active
+    gnf(1,inode_trinf) = 1
+  endif
+
+  ! infinite element
+  ! all but surface nodes are activated
+  do i_elmt = 1,NSPEC_INFINITE
+    do k = 1,NGLLZ-1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ibool = ibool_infinite(i,j,k,i_elmt)
+          gnf(1,inode_inf(ibool)) = 1
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! copy global indices from preceding partitions
+  ! inner core
+  fhead = 'gdof_ic'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*) gghost
+      close(10,status='delete')
+
+      isgnf(:,inode_ic(ibool_interfaces_inner_core(1:nibool,i))) = .true.
+      gnf(:,inode_ic(ibool_interfaces_inner_core(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  enddo
+
+  ! outer core
+  fhead = 'gdof_oc'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*) gghost
+      close(10,status='delete')
+
+      isgnf(:,inode_oc(ibool_interfaces_outer_core(1:nibool,i))) = .true.
+      gnf(:,inode_oc(ibool_interfaces_outer_core(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  enddo
+
+  ! crust mantle
+  fhead = 'gdof_cm'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*) gghost
+      close(10,status='delete')
+
+      isgnf(:,inode_cm(ibool_interfaces_crust_mantle(1:nibool,i))) = .true.
+      gnf(:,inode_cm(ibool_interfaces_crust_mantle(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead = 'gdof_trinf'
+    write(spm,*) i_proc
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc < i_proc) then
+        write(spn,*) j_proc
+        fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        !print *,fname
+        open(10,file=fname,action='read',status='old')
+        read(10,*) nibool
+        allocate(gghost(NNDOF,nibool))
+        read(10,*) gghost
+        close(10,status='delete')
+
+        isgnf(:,inode_trinf(ibool_interfaces_trinfinite(1:nibool,i))) = .true.
+        gnf(:,inode_trinf(ibool_interfaces_trinfinite(1:nibool,i))) = gghost(:,:)
+        deallocate(gghost)
+      endif
+    enddo
+  endif
+
+  ! infinite
+  fhead = 'gdof_inf'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc < i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      !print *,fname
+      open(10,file=fname,action='read',status='old')
+      read(10,*) nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*) gghost
+      close(10,status='delete')
+
+      isgnf(:,inode_inf(ibool_interfaces_infinite(1:nibool,i))) = .true.
+      gnf(:,inode_inf(ibool_interfaces_infinite(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  enddo
+
+  print *,'Previous largest gnf ID:',gnf_end
+
+  igdof = gnf_end ! gdof
+  do i_node = 1,nnode
+    if (gnf(1,i_node) > 0 .and. .not. isgnf(1,i_node)) then
+      isgnf(1,i_node) = .true.
+      igdof = igdof+1
+      gnf(1,i_node) = igdof
+    endif
+  enddo
+  !neq=inode
+
+  allocate(gdf_ic(NNDOF,NGLOB_INNER_CORE), &
+           gdf_oc(NNDOF,NGLOB_OUTER_CORE), &
+           gdf_cm(NNDOF,NGLOB_CRUST_MANTLE), &
+           gdf_trinf(NNDOF,NGLOB_TRINFINITE), &
+           gdf_inf(NNDOF,NGLOB_INFINITE))
+  ! store gdf in a region array
+  gdf_ic(:,:) = gnf(:,inode_ic)
+  gdf_oc(:,:) = gnf(:,inode_oc)
+  gdf_cm(:,:) = gnf(:,inode_cm)
+  gdf_trinf(:,:) = gnf(:,inode_trinf)
+  gdf_inf(:,:) = gnf(:,inode_inf)
+
+  ! save global degrees of freedom for neighboring partitions
+  ! inner core
+  fhead = 'gdof_ic'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname !,i_proc,j_proc
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_inner_core(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_inner_core(i)))
+      tmpmat = gdf_ic(:,ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i))
+      !write(10,*)gdf_ic(:,ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i))
+      write(10,*) tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  ! outer core
+  fhead = 'gdof_oc'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*)nibool_interfaces_outer_core(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_outer_core(i)))
+      tmpmat = gdf_oc(:,ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(i),i))
+      !write(10,*)gdf_oc(:,ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(i),i))
+      write(10,*) tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  ! crust mantle
+  fhead = 'gdof_cm'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_crust_mantle(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_crust_mantle(i)))
+      tmpmat = gdf_cm(:,ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(i),i))
+      !write(10,*)gdf_cm(:,ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(i),i))
+      write(10,*) tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead = 'gdof_trinf'
+    write(spm,*) i_proc
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc > i_proc) then
+        write(spn,*) j_proc
+        fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        !print *,fname
+        open(10,file=fname,action='write',status='replace')
+        write(10,*) nibool_interfaces_trinfinite(i)
+        allocate(tmpmat(NNDOF,nibool_interfaces_trinfinite(i)))
+        tmpmat = gdf_trinf(:,ibool_interfaces_trinfinite(1:nibool_interfaces_trinfinite(i),i))
+        !write(10,*)gdf_trinf(:,ibool_interfaces_trinfinite(1:nibool_interfaces_trinfinite(i),i))
+        write(10,*) tmpmat
+        deallocate(tmpmat)
+        close(10)
+      endif
+    enddo
+  endif
+
+  ! infinite
+  fhead = 'gdof_inf'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc > i_proc) then
+      write(spn,*) j_proc
+      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*) nibool_interfaces_infinite(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_infinite(i)))
+      tmpmat = gdf_inf(:,ibool_interfaces_infinite(1:nibool_interfaces_infinite(i),i))
+      !write(10,*)gdf_inf(:,ibool_interfaces_infinite(1:nibool_interfaces_infinite(i),i))
+      write(10,*) tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  gnf_end = maxval(gnf)
+  print *,'Largest gnf ID:',gnf_end
+
+  write(spm,*) i_proc
+
+  fname='DATABASES_MPI/gdof_proc'//trim(adjustl(spm))
+  open(10,file=fname,action='write',status='replace')
+
+  write(10,*) NGLOB_INNER_CORE
+  write(10,*) gdf_ic
+  write(10,*) NGLOB_OUTER_CORE
+  write(10,*) gdf_oc
+  write(10,*) NGLOB_CRUST_MANTLE
+  write(10,*) gdf_cm
+  write(10,*) NGLOB_TRINFINITE
+  write(10,*) gdf_trinf
+  write(10,*) NGLOB_INFINITE
+  write(10,*) gdf_inf
+  close(10)
+
+  !=============================================================================
+  ! global degrees of freedoms for NGLLX_INF=3
+  !=============================================================================
+  !
+  ! activate GLL points for NGLLX_INF=3 from NGLLX=5
+  isgll = .false.
+  inum = 0
+  do k = 1,NGLLZ,2
+    do j = 1,NGLLY,2
+      do i = 1,NGLLX,2
+        inum = inum+1
+        igll = NGLLY * NGLLX * (k-1) + NGLLX * (j-1) + i
+        isgll(igll) = .true.
+        igll_on(inum) = igll
+      enddo
+    enddo
+  enddo
+
+  ! prepare mesh information
+  ! store ibool in 1D linear mapping
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    inode_elmt_ic(:,i_elmt) = reshape(ibool_inner_core(:,:,:,i_elmt),(/NGLLCUBE/))
+  enddo
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    inode_elmt_oc(:,i_elmt) = reshape(ibool_outer_core(:,:,:,i_elmt),(/NGLLCUBE/))
+  enddo
+  ! inner core
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    inode_elmt_cm(:,i_elmt) = reshape(ibool_crust_mantle(:,:,:,i_elmt),(/NGLLCUBE/))
+  enddo
+  ! transition infinite
+  if (ADD_TRINF) then
+  do i_elmt = 1,NSPEC_TRINFINITE
+    inode_elmt_trinf(:,i_elmt) = reshape(ibool_trinfinite(:,:,:,i_elmt),(/NGLLCUBE/))
+  enddo
+  endif
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    inode_elmt_inf(:,i_elmt) = reshape(ibool_infinite(:,:,:,i_elmt),(/NGLLCUBE/))
+  enddo
+
+  isnode_ic(:) = .false.
+  isnode_oc(:) = .false.
+  isnode_cm(:) = .false.
+  isnode_trinf(:) = .false.
+  isnode_inf(:) = .false.
+
+  allocate(isnode(nnode))
+  isnode(:) = .false.
+
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    do k = 1,NGLLZ,2
+      do j = 1,NGLLY,2
+        do i = 1,NGLLX,2
+          ibool = ibool_inner_core(i,j,k,i_elmt)
+          isnode_ic(ibool) = .true.
+          isnode(inode_ic(ibool)) = .true.
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    do k = 1,NGLLZ,2
+      do j = 1,NGLLY,2
+        do i = 1,NGLLX,2
+          ibool = ibool_outer_core(i,j,k,i_elmt)
+          isnode_oc(ibool) = .true.
+          isnode(inode_oc(ibool)) = .true.
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! crust mantle
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    do k = 1,NGLLZ,2
+      do j = 1,NGLLY,2
+        do i = 1,NGLLX,2
+          ibool = ibool_crust_mantle(i,j,k,i_elmt)
+          isnode_cm(ibool) = .true.
+          isnode(inode_cm(ibool)) = .true.
+        enddo
+      enddo
+    enddo
+  enddo
+
+  if (ADD_TRINF) then
+    ! trinfinite
+    do i_elmt = 1,NSPEC_TRINFINITE
+      do k = 1,NGLLZ,2
+        do j = 1,NGLLY,2
+          do i = 1,NGLLX,2
+            ibool = ibool_trinfinite(i,j,k,i_elmt)
+            isnode_trinf(ibool) = .true.
+            isnode(inode_trinf(ibool)) = .true.
+          enddo
+        enddo
+      enddo
+    enddo
+  endif
+
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    do k = 1,NGLLZ,2
+      do j = 1,NGLLY,2
+        do i = 1,NGLLX,2
+          ibool = ibool_infinite(i,j,k,i_elmt)
+          isnode_inf(ibool) = .true.
+          isnode(inode_inf(ibool)) = .true.
+        enddo
+      enddo
+    enddo
+  enddo
+
+  nnode_ic1 = count(isnode_ic)
+  nnode_oc1 = count(isnode_oc)
+  nnode_cm1 = count(isnode_cm)
+  nnode_trinf1 = count(isnode_trinf)
+  nnode_inf1 = count(isnode_inf)
+
+  nnode1 = count(isnode)
+
+  ! node mirror
+  allocate(nmir(nnode))
+  inode1 = 0
+  do i_node = 1,nnode
+    if (isnode(i_node)) then
+      inode1 = inode1+1
+      nmir(i_node) = inode1
+    endif
+  enddo
+  deallocate(isnode)
+
+  if (inode1 /= nnode1) then
+    print *,inode1,nnode1
+    write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch!'
+    stop
+  endif
+
+  ! find active nodes and mirror to orginal nodes
+  ! inner core
+  allocate(nmir_ic(nnode_ic))
+  nmir_ic(:) = 0
+  inode = 0
+  do i_node = 1,nnode_ic
+    if (isnode_ic(i_node)) then
+      inode = inode+1
+      nmir_ic(i_node) = inode
+    endif
+  enddo
+  if (inode /= nnode_ic1) then
+    print *,inode,nnode_ic1,nnode_ic,size(isnode_ic),NGLOB_INNER_CORE
+    write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch in inner core!'
+    stop
+  endif
+
+  ! outer core
+  allocate(nmir_oc(nnode_oc))
+  nmir_oc(:) = 0
+  inode = 0
+  do i_node = 1,nnode_oc
+    if (isnode_oc(i_node)) then
+      inode = inode+1
+      nmir_oc(i_node) = inode
+    endif
+  enddo
+  if (inode /= nnode_oc1) then
+    write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch in outer core!'
+    stop
+  endif
+
+  ! crust mantle
+  allocate(nmir_cm(nnode_cm))
+  nmir_cm(:) = 0
+  inode = 0
+  do i_node = 1,nnode_cm
+    if (isnode_cm(i_node)) then
+      inode = inode+1
+      nmir_cm(i_node) = inode
+    endif
+  enddo
+  if (inode /= nnode_cm1) then
+    write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch in crust mantle!'
+    stop
+  endif
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    allocate(nmir_trinf(nnode_trinf))
+    nmir_trinf(:) = 0
+    inode = 0
+    do i_node = 1,nnode_trinf
+      if (isnode_trinf(i_node)) then
+        inode = inode+1
+        nmir_trinf(i_node) = inode
+      endif
+    enddo
+    if (inode /= nnode_trinf1) then
+      write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch in transition infinite!'
+      stop
+    endif
+  endif
+
+  ! infinite
+  allocate(nmir_inf(nnode_inf))
+  nmir_inf(:) = 0
+  inode = 0
+  do i_node = 1,nnode_inf
+    if (isnode_inf(i_node)) then
+      inode = inode+1
+      nmir_inf(i_node) = inode
+    endif
+  enddo
+  if (inode /= nnode_inf1) then
+    write(*,'(/,a) ') 'ERROR: counted level-1 active nodes mismatch in infinite!'
+    stop
+  endif
+
+  ! store ibool1 in elemental array
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    !print *,igll_on
+    inode_elmt_ic1(:,i_elmt) = nmir_ic(inode_elmt_ic(igll_on,i_elmt))
+  enddo
+
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    inode_elmt_oc1(:,i_elmt) = nmir_oc(inode_elmt_oc(igll_on,i_elmt))
+  enddo
+
+  ! crust mantle
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    inode_elmt_cm1(:,i_elmt) = nmir_cm(inode_elmt_cm(igll_on,i_elmt))
+  enddo
+
+  ! transtion infinite
+  if (ADD_TRINF) then
+  do i_elmt = 1,NSPEC_TRINFINITE
+    inode_elmt_trinf1(:,i_elmt) = nmir_trinf(inode_elmt_trinf(igll_on,i_elmt))
+  enddo
+  endif
+
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    inode_elmt_inf1(:,i_elmt) = nmir_inf(inode_elmt_inf(igll_on,i_elmt))
+  enddo
+
+  ! find inode_ic1,inode_oc1,inode_cm1,inode_inf1
+  allocate(inode_ic1(nnode_ic1),inode_oc1(nnode_oc1),inode_cm1(nnode_cm1), &
+  inode_trinf1(nnode_trinf1),inode_inf1(nnode_inf1))
+
+  ! inner core
+  do i_node = 1,nnode_ic
+    if (isnode_ic(i_node)) then
+      inode_ic1(nmir_ic(i_node)) = nmir(inode_ic(i_node))
+    endif
+  enddo
+
+  ! outer core
+  do i_node = 1,nnode_oc
+    if (isnode_oc(i_node)) then
+      inode_oc1(nmir_oc(i_node)) = nmir(inode_oc(i_node))
+    endif
+  enddo
+
+  ! crust mantle
+  do i_node = 1,nnode_cm
+    if (isnode_cm(i_node)) then
+      inode_cm1(nmir_cm(i_node)) = nmir(inode_cm(i_node))
+    endif
+  enddo
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    do i_node = 1,nnode_trinf
+      if (isnode_trinf(i_node)) then
+        inode_trinf1(nmir_trinf(i_node)) = nmir(inode_trinf(i_node))
+      endif
+    enddo
+  endif
+
+  ! infinite
+  do i_node = 1,nnode_inf
+    if (isnode_inf(i_node)) then
+      inode_inf1(nmir_inf(i_node))=nmir(inode_inf(i_node))
+    endif
+  enddo
+  deallocate(nmir)
+
+  ! modify MPI interfaces removing deactivated nodes
+  ! NOTE: this has to be done before isnode_ic, isnode_oc, etc. change
+
+  ! inner core
+  num_interfaces_inner_core1 = num_interfaces_inner_core
+  allocate(my_neighbors_inner_core1(num_interfaces_inner_core1))
+  my_neighbors_inner_core1(:) = my_neighbors_inner_core(:)
+
+  allocate(isibool_interface_ic(max_nibool_interfaces_ic,num_interfaces_inner_core1), &
+           nibool_interfaces_inner_core1(num_interfaces_inner_core1))
+  isibool_interface_ic(:,:) = .false.
+  do i = 1,num_interfaces_inner_core1
+    do j = 1,nibool_interfaces_inner_core(i)
+      isibool_interface_ic(j,i) = isnode_ic(ibool_interfaces_inner_core(j,i))
+    enddo
+    nibool_interfaces_inner_core1(i) = count(isibool_interface_ic(:,i))
+  enddo
+  max_nibool_interfaces_inner_core1 = maxval(nibool_interfaces_inner_core1)
+
+  allocate(ibool_interfaces_inner_core1(max_nibool_interfaces_inner_core1,num_interfaces_inner_core1))
+  ibool_interfaces_inner_core1(:,:) = 0
+  do i = 1,num_interfaces_inner_core1
+    inum = 0
+    do j = 1,nibool_interfaces_inner_core(i)
+      if (isibool_interface_ic(j,i)) then
+        inum = inum+1
+        ibool_interfaces_inner_core1(inum,i) = nmir_ic(ibool_interfaces_inner_core(j,i))
+      endif
+    enddo
+  enddo
+
+  deallocate(nmir_ic)
+
+  ! outer core
+  num_interfaces_outer_core1 = num_interfaces_outer_core
+  allocate(my_neighbors_outer_core1(num_interfaces_outer_core1))
+  my_neighbors_outer_core1(:) = my_neighbors_outer_core(:)
+
+  allocate(isibool_interface_oc(max_nibool_interfaces_oc,num_interfaces_outer_core1), &
+           nibool_interfaces_outer_core1(num_interfaces_outer_core1))
+  isibool_interface_oc(:,:) = .false.
+  do i = 1,num_interfaces_outer_core1
+    do j = 1,nibool_interfaces_outer_core(i)
+      isibool_interface_oc(j,i)=isnode_oc(ibool_interfaces_outer_core(j,i))
+    enddo
+    nibool_interfaces_outer_core1(i)=count(isibool_interface_oc(:,i))
+  enddo
+  max_nibool_interfaces_outer_core1 = maxval(nibool_interfaces_outer_core1)
+
+  allocate(ibool_interfaces_outer_core1(max_nibool_interfaces_outer_core1,num_interfaces_outer_core1))
+  ibool_interfaces_outer_core1(:,:) = 0
+  do i = 1,num_interfaces_outer_core1
+    inum = 0
+    do j = 1,nibool_interfaces_outer_core(i)
+      if (isibool_interface_oc(j,i)) then
+        inum = inum+1
+        ibool_interfaces_outer_core1(inum,i) = nmir_oc(ibool_interfaces_outer_core(j,i))
+      endif
+    enddo
+  enddo
+  deallocate(nmir_oc)
+
+  ! crust mantle
+  num_interfaces_crust_mantle1 = num_interfaces_crust_mantle
+  allocate(my_neighbors_crust_mantle1(num_interfaces_crust_mantle1))
+  my_neighbors_crust_mantle1(:) = my_neighbors_crust_mantle(:)
+
+  allocate(isibool_interface_cm(max_nibool_interfaces_cm,num_interfaces_crust_mantle1), &
+           nibool_interfaces_crust_mantle1(num_interfaces_crust_mantle1))
+  isibool_interface_cm(:,:) = .false.
+  do i = 1,num_interfaces_crust_mantle1
+    do j = 1,nibool_interfaces_crust_mantle(i)
+      isibool_interface_cm(j,i)=isnode_cm(ibool_interfaces_crust_mantle(j,i))
+    enddo
+    nibool_interfaces_crust_mantle1(i)=count(isibool_interface_cm(:,i))
+  enddo
+  max_nibool_interfaces_crust_mantle1 = maxval(nibool_interfaces_crust_mantle1)
+
+  allocate(ibool_interfaces_crust_mantle1(max_nibool_interfaces_crust_mantle1,num_interfaces_crust_mantle1))
+  ibool_interfaces_crust_mantle1(:,:) = 0
+  do i = 1,num_interfaces_crust_mantle1
+    inum = 0
+    do j = 1,nibool_interfaces_crust_mantle(i)
+      if (isibool_interface_cm(j,i)) then
+        inum = inum+1
+        ibool_interfaces_crust_mantle1(inum,i) = nmir_cm(ibool_interfaces_crust_mantle(j,i))
+      endif
+    enddo
+  enddo
+  deallocate(nmir_cm)
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    num_interfaces_trinfinite1 = num_interfaces_trinfinite
+    allocate(my_neighbors_trinfinite1(num_interfaces_trinfinite1))
+    my_neighbors_trinfinite1(:) = my_neighbors_trinfinite(:)
+
+    allocate(isibool_interface_trinf(max_nibool_interfaces_trinfinite,num_interfaces_trinfinite1), &
+             nibool_interfaces_trinfinite1(num_interfaces_trinfinite1))
+    isibool_interface_trinf(:,:) = .false.
+    do i = 1,num_interfaces_trinfinite1
+      do j = 1,nibool_interfaces_trinfinite(i)
+        isibool_interface_trinf(j,i)=isnode_trinf(ibool_interfaces_trinfinite(j,i))
+      enddo
+      nibool_interfaces_trinfinite1(i)=count(isibool_interface_trinf(:,i))
+    enddo
+    max_nibool_interfaces_trinfinite1 = maxval(nibool_interfaces_trinfinite1)
+
+    allocate(ibool_interfaces_trinfinite1(max_nibool_interfaces_trinfinite1,num_interfaces_trinfinite1))
+    ibool_interfaces_trinfinite1(:,:) = 0
+    do i = 1,num_interfaces_trinfinite1
+      inum = 0
+      do j = 1,nibool_interfaces_trinfinite(i)
+        if (isibool_interface_trinf(j,i)) then
+          inum = inum+1
+          ibool_interfaces_trinfinite1(inum,i) = nmir_trinf(ibool_interfaces_trinfinite(j,i))
+        endif
+      enddo
+    enddo
+    deallocate(nmir_trinf)
+  endif
+
+  ! infinite
+  num_interfaces_infinite1 = num_interfaces_infinite
+  allocate(my_neighbors_infinite1(num_interfaces_infinite1))
+  my_neighbors_infinite1(:) = my_neighbors_infinite(:)
+
+  allocate(isibool_interface_inf(max_nibool_interfaces_infinite,num_interfaces_infinite1), &
+           nibool_interfaces_infinite1(num_interfaces_infinite1))
+  isibool_interface_inf(:,:) = .false.
+  do i = 1,num_interfaces_infinite1
+    do j = 1,nibool_interfaces_infinite(i)
+      isibool_interface_inf(j,i) = isnode_inf(ibool_interfaces_infinite(j,i))
+    enddo
+    nibool_interfaces_infinite1(i) = count(isibool_interface_inf(:,i))
+  enddo
+  max_nibool_interfaces_infinite1 = maxval(nibool_interfaces_infinite1)
+
+  allocate(ibool_interfaces_infinite1(max_nibool_interfaces_infinite1,num_interfaces_infinite1))
+  ibool_interfaces_infinite1(:,:) = 0
+  do i = 1,num_interfaces_infinite1
+    inum = 0
+    do j = 1,nibool_interfaces_infinite(i)
+      if (isibool_interface_inf(j,i)) then
+        inum = inum+1
+        ibool_interfaces_infinite1(inum,i) = nmir_inf(ibool_interfaces_infinite(j,i))
+      endif
+    enddo
+  enddo
+  deallocate(nmir_inf)
+
+  deallocate(isibool_interface_ic)
+  deallocate(isibool_interface_oc)
+  deallocate(isibool_interface_cm)
+  deallocate(isibool_interface_trinf)
+  deallocate(isibool_interface_inf)
+
+  allocate(gnf1(NNDOF,nnode1),isgnf1(NNDOF,nnode1))
+  gnf1 = 0
+  isgnf1 = .false.
+
+  ! activate freedoms
+
+  ! freedoms of fictitious cube in inner core are deactivated
+  do i_elmt = 1,NSPEC_INNER_CORE
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+
+    do i = 1,NGLLCUBE_INF
+      ibool = inode_elmt_ic1(i,i_elmt)
+      gnf1(1,inode_ic1(ibool)) = 1
+    enddo
+  enddo
+
+  ! outer core
+  ! all freedoms are active
+  gnf1(1,inode_oc1) = 1
+
+  ! crust-mantle
+  ! all freedoms are active
+  gnf1(1,inode_cm1) = 1
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    ! all freedoms are active
+    gnf1(1,inode_trinf1) = 1
+  endif
+
+  ! infinite element
+  ! all but surface nodes are activated
+  do i_elmt = 1,NSPEC_INFINITE
+    do k = 1,NGLLZ_INF-1
+      do j = 1,NGLLY_INF
+        do i = 1,NGLLX_INF
+          igll = NGLLX_INF * NGLLY_INF * (k-1) + NGLLX_INF * (j-1) + i
+          ibool = inode_elmt_inf1(igll,i_elmt)
+          gnf1(1,inode_inf1(ibool)) = 1
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! copy global indices from preceding partitions
+  ! inner core
+  fhead = 'gdof1_ic'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      open(10,file=fname,action='read',status='old')
+      read(10,*)nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*)gghost
+      close(10,status='delete')
+
+      isgnf1(:,inode_ic1(ibool_interfaces_inner_core1(1:nibool,i))) = .true.
+      gnf1(:,inode_ic1(ibool_interfaces_inner_core1(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  !    ! ndof_p2p
+  !    ndof_p2p(i_proc,j_proc)=ndof_p2p(i_proc,j_proc)+NNDOF*nibool_interfaces_inner_core1(i)
+  enddo
+
+  ! outer core
+  fhead = 'gdof1_oc'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc < i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      open(10,file=fname,action='read',status='old')
+      read(10,*)nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*)gghost
+      close(10,status='delete')
+
+      isgnf1(:,inode_oc1(ibool_interfaces_outer_core1(1:nibool,i))) = .true.
+      gnf1(:,inode_oc1(ibool_interfaces_outer_core1(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  !    ! ndof_p2p
+  !    ndof_p2p(i_proc,j_proc)=ndof_p2p(i_proc,j_proc)+NNDOF*nibool_interfaces_outer_core1(i)
+  enddo
+
+  ! crust mantle
+  fhead = 'gdof1_cm'
+  write(spm,*) i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc < i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      open(10,file=fname,action='read',status='old')
+      read(10,*)nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*)gghost
+      close(10,status='delete')
+
+      isgnf1(:,inode_cm1(ibool_interfaces_crust_mantle1(1:nibool,i))) = .true.
+      gnf1(:,inode_cm1(ibool_interfaces_crust_mantle1(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  !    ! ndof_p2p
+  !    ndof_p2p(i_proc,j_proc)=ndof_p2p(i_proc,j_proc)+NNDOF*nibool_interfaces_crust_mantle1(i)
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead = 'gdof1_trinf'
+    write(spm,*)i_proc
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc < i_proc) then
+        write(spn,*)j_proc
+        fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        open(10,file=fname,action='read',status='old')
+        read(10,*)nibool
+        allocate(gghost(NNDOF,nibool))
+        read(10,*)gghost
+        close(10,status='delete')
+
+        isgnf1(:,inode_trinf1(ibool_interfaces_trinfinite1(1:nibool,i))) = .true.
+        gnf1(:,inode_trinf1(ibool_interfaces_trinfinite1(1:nibool,i))) = gghost(:,:)
+        deallocate(gghost)
+      endif
+    !    ! ndof_p2p
+    !    ndof_p2p(i_proc,j_proc)=ndof_p2p(i_proc,j_proc)+NNDOF*nibool_interfaces_trinfinite1(i)
+    enddo
+  endif
+
+  ! infinite
+  fhead = 'gdof1_inf'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc < i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      open(10,file=fname,action='read',status='old')
+      read(10,*)nibool
+      allocate(gghost(NNDOF,nibool))
+      read(10,*)gghost
+      close(10,status='delete')
+
+      isgnf1(:,inode_inf1(ibool_interfaces_infinite1(1:nibool,i))) = .true.
+      gnf1(:,inode_inf1(ibool_interfaces_infinite1(1:nibool,i))) = gghost(:,:)
+      deallocate(gghost)
+    endif
+  !    ! ndof_p2p
+  !    ndof_p2p(i_proc,j_proc)=ndof_p2p(i_proc,j_proc)+NNDOF*nibool_interfaces_infinite1(i)
+  enddo
+  print *,'Previous largest gnf1 ID:',gnf_end1
+
+  igdof1 = gnf_end1 ! gdof
+  do i_node = 1,nnode1
+    if (gnf1(1,i_node) > 0 .and. .not. isgnf1(1,i_node)) then
+      isgnf1(1,i_node) = .true.
+      igdof1 = igdof1+1
+      gnf1(1,i_node)=igdof1
+    endif
+  enddo
+  !neq1=inode1
+
+  allocate(gdf_ic1(NNDOF,nnode_ic1),gdf_oc1(NNDOF,nnode_oc1), &
+  gdf_cm1(NNDOF,nnode_cm1),gdf_trinf1(NNDOF,nnode_trinf1), &
+  gdf_inf1(NNDOF,nnode_inf1))
+
+  ! store gdf in a region array
+  gdf_ic1(:,:) = gnf1(:,inode_ic1)
+  gdf_oc1(:,:) = gnf1(:,inode_oc1)
+  gdf_cm1(:,:) = gnf1(:,inode_cm1)
+  gdf_trinf1(:,:) = gnf1(:,inode_trinf1)
+  gdf_inf1(:,:) = gnf1(:,inode_inf1)
+
+  deallocate(inode_ic1,inode_oc1,inode_cm1,inode_trinf1,inode_inf1)
+
+  ! save global degrees of freedom for neighboring partitions
+  ! inner core
+  fhead='gdof1_ic'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_inner_core
+    j_proc = my_neighbors_inner_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      open(10,file=fname,action='write',status='replace')
+      write(10,*)nibool_interfaces_inner_core1(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_inner_core1(i)))
+      tmpmat = gdf_ic1(:,ibool_interfaces_inner_core1(1:nibool_interfaces_inner_core1(i),i))
+      !write(10,*)gdf_ic1(:,ibool_interfaces_inner_core1(1:nibool_interfaces_inner_core1(i),i))
+      write(10,*)tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  ! outer core
+  fhead='gdof1_oc'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_outer_core
+    j_proc = my_neighbors_outer_core(i)
+    if (j_proc > i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      open(10,file=fname,action='write',status='replace')
+      write(10,*)nibool_interfaces_outer_core1(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_outer_core1(i)))
+      tmpmat = gdf_oc1(:,ibool_interfaces_outer_core1(1:nibool_interfaces_outer_core1(i),i))
+      !write(10,*)gdf_oc1(:,ibool_interfaces_outer_core1(1:nibool_interfaces_outer_core1(i),i))
+      write(10,*)tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  ! crust mantle
+  fhead='gdof1_cm'
+  write(spm,*)i_proc
+  do i = 1,num_interfaces_crust_mantle
+    j_proc = my_neighbors_crust_mantle(i)
+    if (j_proc > i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      open(10,file=fname,action='write',status='replace')
+      write(10,*)nibool_interfaces_crust_mantle1(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_crust_mantle1(i)))
+      tmpmat = gdf_cm1(:,ibool_interfaces_crust_mantle1(1:nibool_interfaces_crust_mantle1(i),i))
+      !write(10,*)gdf_cm1(:,ibool_interfaces_crust_mantle1(1:nibool_interfaces_crust_mantle1(i),i))
+      write(10,*)tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    fhead='gdof1_trinf'
+    write(spm,*)i_proc
+
+    do i = 1,num_interfaces_trinfinite
+      j_proc = my_neighbors_trinfinite(i)
+      if (j_proc > i_proc) then
+        write(spn,*)j_proc
+        fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        !print *,fname
+        open(10,file=fname,action='write',status='replace')
+        write(10,*)nibool_interfaces_trinfinite1(i)
+        allocate(tmpmat(NNDOF,nibool_interfaces_trinfinite1(i)))
+        tmpmat = gdf_trinf1(:,ibool_interfaces_trinfinite1(1:nibool_interfaces_trinfinite1(i),i))
+        !write(10,*)gdf_trinf1(:,ibool_interfaces_trinfinite1(1:nibool_interfaces_trinfinite1(i),i))
+        write(10,*)tmpmat
+        deallocate(tmpmat)
+        close(10)
+      endif
+    enddo
+  endif
+
+  ! infinite
+  fhead='gdof1_inf'
+  write(spm,*) i_proc
+
+  do i = 1,num_interfaces_infinite
+    j_proc = my_neighbors_infinite(i)
+    if (j_proc > i_proc) then
+      write(spn,*)j_proc
+      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      !print *,fname
+      open(10,file=fname,action='write',status='replace')
+      write(10,*)nibool_interfaces_infinite1(i)
+      allocate(tmpmat(NNDOF,nibool_interfaces_infinite1(i)))
+      tmpmat = gdf_inf1(:,ibool_interfaces_infinite1(1:nibool_interfaces_infinite1(i),i))
+      !write(10,*)gdf_inf1(:,ibool_interfaces_infinite1(1:nibool_interfaces_infinite1(i),i))
+      write(10,*)tmpmat
+      deallocate(tmpmat)
+      close(10)
+    endif
+  enddo
+
+  gnf_end1 = maxval(gnf1)
+  print *,'Largest gnf1 ID:',gnf_end1
+  write(spm,*)i_proc
+
+  fname='DATABASES_MPI/gdof1_proc'//trim(adjustl(spm))
+  open(10,file=fname,action='write',status='replace')
+  write(10,*)nnode_ic1
+  write(10,*)gdf_ic1
+  write(10,*)nnode_oc1
+  write(10,*)gdf_oc1
+  write(10,*)nnode_cm1
+  write(10,*)gdf_cm1
+  write(10,*)nnode_trinf1
+  write(10,*)gdf_trinf1
+  write(10,*)nnode_inf1
+  write(10,*)gdf_inf1
+  close(10)
+
+  print *,'*********************************************************'
+
+  ! deallocate variables
+  ! for NGLL=5
+  deallocate(gnf,isgnf)
+  deallocate(gdf_ic,gdf_oc,gdf_cm,gdf_trinf,gdf_inf)
+  !  deallocate(rmass_inner_core)
+  deallocate(my_neighbors_inner_core,nibool_interfaces_inner_core)
+  deallocate(ibool_interfaces_inner_core)
+  !  deallocate(phase_ispec_inner_inner_core)
+  !  deallocate(num_elem_colors_inner_core)
+  !  deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+  !  request_send_vector_inner_core,request_recv_vector_inner_core)
+
+  !  deallocate(rmass_outer_core)
+  deallocate(my_neighbors_outer_core,nibool_interfaces_outer_core)
+  deallocate(ibool_interfaces_outer_core)
+  !  deallocate(phase_ispec_inner_outer_core)
+  !  deallocate(num_elem_colors_outer_core)
+  !  deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+  !  request_send_scalar_outer_core,request_recv_scalar_outer_core)
+
+  !  deallocate(rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle)
+  deallocate(my_neighbors_crust_mantle,nibool_interfaces_crust_mantle)
+  deallocate(ibool_interfaces_crust_mantle)
+  !  deallocate(phase_ispec_inner_crust_mantle)
+  !  deallocate(num_elem_colors_crust_mantle)
+  !  deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+  !  request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
+
+  deallocate(my_neighbors_trinfinite,nibool_interfaces_trinfinite)
+  deallocate(ibool_interfaces_trinfinite)
+  !  deallocate(phase_ispec_inner_trinfinite)
+  !  deallocate(num_elem_colors_trinfinite)
+  !  deallocate(buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite, &
+  !  request_send_scalar_trinfinite,request_recv_scalar_trinfinite)
+
+  deallocate(my_neighbors_infinite,nibool_interfaces_infinite)
+  deallocate(ibool_interfaces_infinite)
+  !  deallocate(phase_ispec_inner_infinite)
+  !  deallocate(num_elem_colors_infinite)
+  !  deallocate(buffer_send_scalar_infinite,buffer_recv_scalar_infinite, &
+  !  request_send_scalar_infinite,request_recv_scalar_infinite)
+
+  ! for NGLL=3
+  deallocate(gnf1,isgnf1)
+  deallocate(gdf_ic1,gdf_oc1,gdf_cm1,gdf_trinf1,gdf_inf1)
+  deallocate(my_neighbors_inner_core1,nibool_interfaces_inner_core1)
+  deallocate(ibool_interfaces_inner_core1)
+  !
+  deallocate(my_neighbors_outer_core1,nibool_interfaces_outer_core1)
+  deallocate(ibool_interfaces_outer_core1)
+  !
+  deallocate(my_neighbors_crust_mantle1,nibool_interfaces_crust_mantle1)
+  deallocate(ibool_interfaces_crust_mantle1)
+  !
+  deallocate(my_neighbors_trinfinite1,nibool_interfaces_trinfinite1)
+  deallocate(ibool_interfaces_trinfinite1)
+  !
+  deallocate(my_neighbors_infinite1,nibool_interfaces_infinite1)
+  deallocate(ibool_interfaces_infinite1)
+
+  end subroutine create_gindex_for_process
diff --git a/src/gindex3D/gindex3D.f90 b/src/gindex3D/gindex3D.f90
new file mode 100644
index 000000000..fc51c1710
--- /dev/null
+++ b/src/gindex3D/gindex3D.f90
@@ -0,0 +1,48 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+program xgindex3D
+
+  implicit none
+
+  ! starts mpi
+  call init_mpi()
+
+  ! initializes parameters
+  call initialize_gindex()
+
+  ! creates new global degrees of freedom indexing
+  call create_gindex()
+
+  ! stop all the MPI processes, and exit
+  call finalize_mpi()
+
+end program xgindex3D
+
+
+
diff --git a/src/gindex3D/gindex3D_par.f90 b/src/gindex3D/gindex3D_par.f90
new file mode 100644
index 000000000..e6bd31ba6
--- /dev/null
+++ b/src/gindex3D/gindex3D_par.f90
@@ -0,0 +1,57 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+module gindex_par
+
+  use constants, only: myrank,NGLLX,NGLLY,NGLLZ
+
+  use shared_parameters, only: NPROC
+
+  use specfem_par
+
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+
+  use specfem_par_full_gravity
+
+  !use specfem_par_movie
+  implicit none
+
+  ! global nodes for NGLLX = 5
+  integer :: ignode_end
+  ! global gdof for NGLLX = 5
+  integer :: gnf_end
+  ! global gdof for NGLLX_INF = 3
+  integer :: gnf_end1
+
+end module gindex_par
+
+
diff --git a/src/gindex3D/initialize_gindex.f90 b/src/gindex3D/initialize_gindex.f90
new file mode 100644
index 000000000..21f6aa42f
--- /dev/null
+++ b/src/gindex3D/initialize_gindex.f90
@@ -0,0 +1,121 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine initialize_gindex()
+
+  use gindex_par
+
+  implicit none
+
+  include 'version.fh'
+
+  ! local parameters
+  character(len = 20) :: snproc
+  ! mpi
+  integer :: sizeprocs
+  integer :: ier
+
+  ! runs in single process mode
+  call world_size(sizeprocs)
+  call world_rank(myrank)
+
+  ! checks number of processes
+  ! note: must run as a single process with: mpirun -np 1 ..
+  if (sizeprocs /= 1) then
+    ! usage info
+    if (myrank == 0) then
+      print *, 'xgindex3D requires MPI functionality. However, this program executes as sequential program.'
+      print *, 'Invalid number of processes used: ', sizeprocs, ' procs'
+      print *
+      print *, 'Please run: mpirun -np 1 ./bin/xgindex3D <number_of_solver_processes>'
+      print *
+      print *, '            for example: mpirun -np 1 ./bin/xgindex3D 96'
+      print *
+    endif
+    call abort_mpi()
+  endif
+
+  ! reads input parameters
+  if (command_argument_count() /= 1) then
+    ! usage info
+    print *, 'Usage: mpirun -np 1 ./bin/xgindex3D <number_of_solver_processes>'
+    print *
+    print *, '       for example: mpirun -np 1 ./bin/xgindex3D 96'
+    print *
+    stop 'Wrong number of arguments'
+  endif
+
+  call get_command_argument(1,snproc)
+  read(snproc,*) nproc
+
+  ! open main output file, only written to by process 0
+  if (myrank == 0) then
+    if (IMAIN /= ISTANDARD_OUTPUT) then
+      open(unit=IMAIN,file=trim(OUTPUT_FILES)//'/output_gindex3D.txt',status='unknown',action='write',iostat=ier)
+      if (ier /= 0 ) call exit_MPI(myrank,'Error opening file output_gindex3D.txt for writing output info')
+    endif
+
+    write(IMAIN,*)
+    write(IMAIN,*) '******************************'
+    write(IMAIN,*) '**** Specfem3D gindex3D   ****'
+    write(IMAIN,*) '******************************'
+    write(IMAIN,*)
+    write(IMAIN,*) 'Version: ', git_package_version
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+
+  if (myrank == 0) write(IMAIN,'(a)') '<< xgindex3D...'
+
+  !allocate(ndof_p2p(nproc,nproc))
+  !ndof_p2p=0
+
+  ! initializes simulation parameters
+  if (myrank == 0) write(IMAIN,'(a)',advance='no') '   initialising...'
+
+  ! reads in Par_file and sets compute parameters
+  call read_compute_parameters()
+
+  ! read the mesh parameters for all array setup
+  call read_mesh_parameters()
+
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) 'mesh parameters (from input directory):'
+    write(IMAIN,*) '  NSPEC_CRUST_MANTLE = ',NSPEC_CRUST_MANTLE
+    write(IMAIN,*) '  NSPEC_OUTER_CORE   = ',NSPEC_OUTER_CORE
+    write(IMAIN,*) '  NSPEC_INNER_CORE   = ',NSPEC_INNER_CORE
+    write(IMAIN,*)
+    write(IMAIN,*) '  NSPEC_TRINFINITE   = ',NSPEC_TRINFINITE
+    write(IMAIN,*) '  NSPEC_INFINITE     = ',NSPEC_INFINITE
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+
+  end subroutine initialize_gindex
+
+
diff --git a/src/gindex3D/rules.mk b/src/gindex3D/rules.mk
new file mode 100644
index 000000000..f2de77d91
--- /dev/null
+++ b/src/gindex3D/rules.mk
@@ -0,0 +1,161 @@
+#=====================================================================
+#
+#                       S p e c f e m 3 D  G l o b e
+#                       ----------------------------
+#
+#     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+#                        Princeton University, USA
+#                and CNRS / University of Marseille, France
+#                 (there are currently many more authors!)
+# (c) Princeton University and CNRS / University of Marseille, April 2014
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#=====================================================================
+
+#######################################
+
+gindex3D_TARGETS = \
+	$E/xgindex3D \
+	$(EMPTY_MACRO)
+
+gindex3D_OBJECTS = \
+	$O/gindex3D.gindex.o \
+	$O/gindex3D_par.gindex_module.o \
+	$O/create_gindex.gindex.o \
+	$O/initialize_gindex.gindex.o \
+	$(EMPTY_MACRO)
+
+gindex3D_MODULES = \
+	$(FC_MODDIR)/gindex_par.$(FC_MODEXT) \
+	$(EMPTY_MACRO)
+
+# These files come from the specfem3D directory
+gindex3D_SHARED_OBJECTS = \
+	$O/specfem3D_par.solverstatic_module.o \
+	$O/bcast_mesh_databases.solverstatic.o \
+	$O/locate_regular_points.solverstatic.o \
+	$O/read_arrays_solver.solverstatic.o \
+	$O/read_mesh_parameters.solverstatic.o \
+	$O/read_mesh_databases.solverstatic.o \
+	$(EMPTY_MACRO)
+
+# These files come from the shared directory
+gindex3D_SHARED_OBJECTS += \
+	$O/adios_manager.shared_adios_module.o \
+	$O/shared_par.shared_module.o \
+	$O/auto_ner.shared.o \
+	$O/broadcast_computed_parameters.shared.o \
+	$O/count_elements.shared.o \
+	$O/count_points.shared.o \
+	$O/create_name_database.shared.o \
+	$O/define_all_layers.shared.o \
+	$O/euler_angles.shared.o \
+	$O/exit_mpi.shared.o \
+	$O/flush_system.shared.o \
+	$O/get_model_parameters.shared.o \
+	$O/get_timestep_and_layers.shared.o \
+	$O/hex_nodes.shared.o \
+	$O/memory_eval.shared.o \
+	$O/parallel.sharedmpi.o \
+	$O/param_reader.cc.o \
+	$O/read_compute_parameters.shared.o \
+	$O/read_parameter_file.shared.o \
+	$O/read_value_parameters.shared.o \
+	$O/recompute_jacobian.shared.o \
+	$O/reduce.shared.o \
+	$O/rotate_tensor.shared.o \
+	$O/rthetaphi_xyz.shared.o \
+	$O/save_header_file.shared.o \
+	$O/ylm.shared.o \
+	$(EMPTY_MACRO)
+
+###
+### GPU
+###
+
+gindex3D_SHARED_OBJECTS += $(gpu_OBJECTS)
+
+###
+### ADIOS
+###
+
+# conditional adios linking
+ifeq ($(ADIOS),yes)
+	gindex3D_SHARED_OBJECTS += $(adios_specfem3D_OBJECTS)
+	gindex3D_SHARED_OBJECTS += $(adios_specfem3D_SHARED_OBJECTS)
+else ifeq ($(ADIOS2),yes)
+	gindex3D_SHARED_OBJECTS += $(adios_specfem3D_OBJECTS)
+	gindex3D_SHARED_OBJECTS += $(adios_specfem3D_SHARED_OBJECTS)
+else
+	gindex3D_SHARED_OBJECTS += $(adios_specfem3D_SHARED_STUBS)
+endif
+
+###
+### ASDF
+###
+
+# conditional asdf linking
+ifeq ($(ASDF),yes)
+	gindex3D_SHARED_OBJECTS += $(asdf_specfem3D_OBJECTS)
+	gindex3D_SHARED_OBJECTS += $(asdf_specfem3D_SHARED_OBJECTS)
+else
+	gindex3D_SHARED_OBJECTS += ${asdf_specfem3D_SHARED_STUBS}
+endif
+
+# conditional CEM or EMC model
+ifeq ($(CEM),yes)
+	gindex3D_SHARED_OBJECTS += $O/read_write_netcdf.checknetcdf.o
+else ifeq ($(EMC),yes)
+	gindex3D_SHARED_OBJECTS += $O/read_write_netcdf.checknetcdf.o
+else ifeq ($(NETCDF),yes)
+	gindex3D_SHARED_OBJECTS += $O/read_write_netcdf.checknetcdf.o
+endif
+
+
+
+#######################################
+
+####
+#### rules for executables
+####
+
+${E}/xgindex3D: $(gindex3D_SHARED_OBJECTS) $(gindex3D_OBJECTS)
+	${FCLINK} -o $@ $+ $(SPECFEM_LINK_FLAGS)
+
+#######################################
+
+## compilation directories
+S := ${S_TOP}/src/gindex3D
+$(gindex3D_OBJECTS): S = ${S_TOP}/src/gindex3D
+
+####
+#### rule for each .o file below
+####
+
+## additional module dependencies
+$O/gindex3D_par.gindex_module.o: $O/specfem3D_par.solverstatic_module.o
+
+
+## general rules
+
+$O/%.gindex_module.o: $S/%.f90 $O/shared_par.shared_module.o
+	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
+
+$O/%.gindex.o: $S/%.f90 $O/shared_par.shared_module.o $O/gindex3D_par.gindex_module.o
+	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
+
+$O/%.gindex.o: $S/%.F90 $O/shared_par.shared_module.o $O/gindex3D_par.gindex_module.o
+	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
diff --git a/src/specfem3D/SIEM_infinite_element.F90 b/src/specfem3D/SIEM_infinite_element.F90
new file mode 100644
index 000000000..4354eb399
--- /dev/null
+++ b/src/specfem3D/SIEM_infinite_element.F90
@@ -0,0 +1,872 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+! this module contains infinite-element routines
+! REVISION
+!   HNG, Apr 11,2012; HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module infinite_element
+
+  integer,parameter :: kdble=selected_real_kind(15) ! double precision
+
+contains
+
+  !! this subroutine add a layer of infinite mesh outside the model given the
+  !! reference surface and infinite element information.
+  !! REVISION
+  !!   HNG, Jul 12,2011; ; HNG, Apr 09,2010
+  !subroutine add_infmesh(ismpi,myid,nproc,errcode,errtag)
+  !use global
+  !use math_constants, only: zero
+  !use math_library, only: distance,i_uniinv
+  !implicit none
+  !logical,intent(in) :: ismpi
+  !integer,intent(in) :: myid,nproc
+  !integer,intent(out) :: errcode
+  !character(len=250),intent(out) :: errtag
+  !integer :: bctype,i,ios
+  !integer :: ielmt,iface,nelpart,i_elpart
+  !integer,dimension(6,4) :: node_face ! local node numbers in each face
+  !
+  !real(kind=kdble) :: gaminf,x0(ndim),val
+  !real(kind=kdble),parameter :: one=1.0_kdble
+  !integer :: g_numOLD(8,nelmt),mat_idOLD(nelmt),nelmtOLD,nnodeOLD
+  !real(kind=kdble) :: r1,g_coordOLD(ndim,nnode)
+  !real(kind=kdble),allocatable :: xs(:,:),mirxs(:,:)
+  !
+  !integer :: n1,n2,nelmtINF,nnode_inf,nsnode,nsnode_all
+  !integer,allocatable :: nodelist(:),inode_order(:),g_numinf(:),iface_elmt(:)
+  !logical,allocatable :: isnode(:)
+  !
+  !character(len=20) :: format_str,ptail
+  !character(len=250) :: fname
+  !character(len=150) :: data_path,strline
+  !
+  !integer :: ipart ! partition ID
+  !
+  !errtag="ERROR: unknown!"
+  !errcode=-1
+  !! set data path
+  !if (ismpi) then
+  !  data_path=trim(part_path)
+  !else
+  !  data_path=trim(inp_path)
+  !endif
+  !
+  !ipart=myid-1 ! partition ID starts from 0
+  !if (ismpi) then
+  !  write(format_str,*)ceiling(log10(real(nproc)+1))
+  !  format_str='(a,i'//trim(adjustl(format_str))//'.'//trim(adjustl(format_str))//')'
+  !  write(ptail, fmt=format_str)'_proc',ipart
+  !else
+  !  ptail=""
+  !endif
+  !
+  !! local node numbering in each face CUBIT/EXODUS convention
+  !node_face(1,:)=(/1,2,6,5/) ! counterclockwise w.r.t. outer normal
+  !node_face(2,:)=(/2,3,7,6/) ! counterclockwise w.r.t. outer normal
+  !node_face(3,:)=(/4,3,7,8/) ! clockwise w.r.t. outer normal
+  !node_face(4,:)=(/1,4,8,5/) ! clockwise w.r.t. outer normal
+  !node_face(5,:)=(/1,2,3,4/) ! clockwise w.r.t. outer normal
+  !node_face(6,:)=(/5,6,7,8/) ! counterclockwise w.r.t. outer normal
+  !
+  !fname=trim(data_path)//trim(infrfile)//trim(ptail)
+  !!print *,fname
+  !open(unit=11,file=trim(fname),status='old',action='read',iostat = ios)
+  !if (ios /= 0) then
+  !  write(errtag,*)'ERROR: file "'//trim(fname)//'" cannot be opened!'
+  !  return
+  !endif
+  !
+  !read(11,*,iostat=ios)nelpart
+  !if (ios /= 0) then
+  !  write(errtag,*)'ERROR: bad infrfile!'
+  !  return
+  !endif
+  !nelmtINF=nelpart
+  !allocate(iface_elmt(nelmtINF))
+  !nsnode_all=4*nelmtINF
+  !allocate(nodelist(nsnode_all),inode_order(nsnode_all))
+  !n1=1; n2=4
+  !do i_elpart=1,nelpart
+  !  !print *,n1,n2
+  !  read(11,*)ielmt,iface ! This will read a line and proceed to next line
+  !  iface_elmt(i_elpart)=iface
+  !  nodelist(n1:n2)=g_num(node_face(iface,:),ielmt)
+  !  n1=n2+1; n2=n1+3
+  !enddo
+  !close(11)
+  !!stop
+  !!print *,n2,nsnode_all;! stop
+  !call i_uniinv(nodelist,inode_order)
+  !
+  !!print *,inode_order;
+  !!print *,minval(nodelist),maxval(nodelist);
+  !!print *,minval(abs(inode_order)),maxval(abs(inode_order)); stop
+  !
+  !nsnode=maxval(inode_order)
+  !allocate(isnode(nsnode),xs(ndim,nsnode),mirxs(ndim,nsnode),g_numinf(nsnode))
+  !
+  !isnode=.false.
+  !! assign xs
+  !xs(:,inode_order(1))=g_coord(:,nodelist(1))
+  !isnode(inode_order(1))=.true.
+  !do i=2,nsnode_all
+  !  if (.not.isnode(inode_order(i))) then
+  !     xs(:,inode_order(i))=g_coord(:,nodelist(i))
+  !     isnode(inode_order(i))=.true.
+  !  endif
+  !enddo
+  !deallocate(isnode)
+  !!print *,'Hi:',inod,nsnode_all
+  !!stop
+  !! pole specific to the spherical body which has the center at (0,0,0)
+  !x0=zero
+  !
+  !! compute mirror nodes
+  !do i=1,nsnode
+  !  r1=distance(x0,xs(:,i),ndim)
+  !  if (rinf <= r1) then
+  !    write(errtag,*)'ERROR: reference infinite radius is smaller than the model!'
+  !    return
+  !  endif
+  !  gaminf=r1/(rinf-r1)
+  !  !print *,one+one/gaminf
+  !  ! division formula
+  !  mirxs(:,i)=((gaminf+one)*xs(:,i)-x0)/gaminf
+  !  g_numinf(i)=nnode+i
+  !enddo
+  !!stop
+  !deallocate(xs)
+  !g_numOLD=g_num;
+  !g_coordOLD=g_coord;
+  !mat_idOLD=mat_id
+  !deallocate(g_num,g_coord,mat_id)
+  !
+  !nelmtOLD=nelmt; nnodeOLD=nnode
+  !
+  !nelmt=nelmtOLD+nelmtINF
+  !nnode=nnodeOLD+nsnode
+  !
+  !! reallocate global node - and element-arrays
+  !allocate(g_num(8,nelmt),g_coord(ndim,nnode),mat_id(nelmt))
+  !
+  !! update connectivity, coordinates, and material IDs
+  !g_num(:,1:nelmtOLD)=g_numOLD
+  !g_coord(:,1:nnodeOLD)=g_coordOLD
+  !mat_id(1:nelmtOLD)=mat_idOLD
+  !! add to material list
+  !mat_id(nelmtOLD+1:nelmt)=mat_idINF
+  !!print *,mat_id,'infmat:',mat_idINF; stop
+  !! add to global node list
+  !g_coord(:,nnodeOLD+1:nnode)=mirxs
+  !deallocate(mirxs)
+  !
+  !! add to global element list
+  !! to have face 5 of new element always clockwise w.r.t. outer normal
+  !! face 3 or 4 or 5 of reference element has to be reordered
+  !n1=1; n2=4
+  !do i=1,nelmtINF
+  !  if (iface_elmt(i)==3.or.iface_elmt(i)==4.or.iface_elmt(i)==5) then
+  !    g_num(1:4,nelmtOLD+i)=nodelist(n2:n1:-1)
+  !    g_num(5:8,nelmtOLD+i)=g_numinf(inode_order(n2:n1:-1))
+  !  else
+  !    g_num(1:4,nelmtOLD+i)=nodelist(n1:n2)
+  !    g_num(5:8,nelmtOLD+i)=g_numinf(inode_order(n1:n2))
+  !  endif
+  !  n1=n2+1; n2=n1+3
+  !enddo
+  !deallocate(nodelist,inode_order,g_numinf,iface_elmt)
+  !
+  !ielmtINF1=nelmtOLD+1; ielmtINF2=nelmt
+  !ifaceINF=6
+  !!sync all
+  !!call stop_all()
+  !
+  !! compute nodal to global
+  !errcode=0
+  !return
+  !
+  !end subroutine add_infmesh
+
+!
+!===========================================
+!
+
+! TODO: compute 1D lagrange shape function iusing GEN rotuine since we take
+! equidistant interpolation points along infinite direction. But now I have
+! changed to GLL points so not necessary!
+! this subroutine computes GLL (along finite directions) and Radau (along
+! infinite direction) quadrature points and weights for 3D
+
+  subroutine shape_function_infiniteGLHEX8ZW_GLLR(ndim,ngllx,nglly,ngllz,ngll,nip, &
+                                                  iface,gam,a,shape_infinite,dshape_infinite,lagrange_gl,dlagrange_gl,GLw)
+
+  use gll_library1, only: lagrange1dGLLAS,lagrange1dGENAS,zwgljd
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngllz,ngll,nip,iface
+  !of decay
+  !integer,parameter :: ngllinf=ngll-nglly*ngllz
+  real(kind=kdble),intent(in) :: gam,a!,nd !nd: order
+  real(kind=kdble),dimension(nip,8),intent(out) :: shape_infinite
+  real(kind=kdble),dimension(ndim,nip,8),intent(out) :: dshape_infinite
+  real(kind=kdble),dimension(nip,ngll),intent(out) :: lagrange_gl
+  real(kind=kdble),dimension(ndim,nip,ngll),intent(out) :: dlagrange_gl
+  real(kind=kdble),intent(out) :: GLw(nip)
+  real(kind=kdble),parameter :: jacobi_alpha=0.0_kdble,jacobi_beta=0.0_kdble, &
+  one = 1.0_kdble!,five = 5.0_kdble
+  integer :: i,ii,j,k,n,i1,j1,k1,nipx(ndim)
+  real(kind=kdble) :: ddir,xi(ndim) !,eta,zeta
+  real(kind=kdble),dimension(ngllx) :: gllpx,gllwx,igllpx,igllwx ! GLL points and weights
+  real(kind=kdble),dimension(nglly) :: gllpy,gllwy,igllpy,igllwy ! GLL points and weights
+  real(kind=kdble),dimension(ngllz) :: gllpz,gllwz,igllpz,igllwz ! GLL points and weights
+  real(kind=kdble),dimension(ndim,ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(ndim,2) :: lagrangeINF_x,lagrangeINF_dx
+  !real(kind=kdble),dimension(nglly) :: lagrange_y,lagrange_dy
+  !real(kind=kdble),dimension(ngllz) :: lagrange_z,lagrange_dz
+  integer :: iINF !,ngllxINF(ndim)
+  !print *,iface; stop
+  !ngllxINF(1)=ngllx
+  !ngllxINF(2)=nglly
+  !ngllxINF(3)=ngllz
+  !print *,nip; stop
+  !print *,size(shape_infinite),size(dshape_infinite)
+  !stop
+  ddir = one
+  if (iface == 1) then
+    iINF = 2; ddir=-one
+  else if (iface == 2) then
+    iINF = 1; ddir = one
+  else if (iface == 3) then
+    iINF = 2; ddir = one
+  else if (iface == 4) then
+    iINF = 1; ddir=-one
+  else if (iface == 5) then
+    iINF = 3; ddir=-one
+  else if (iface == 6) then
+    iINF = 3; ddir = one
+  endif
+  !print *,ddir; stop
+  !ngllxINF(iINF)=ngllxINF(iINF)-1
+
+  nipx(1)=ngllx
+  nipx(2)=nglly
+  nipx(3)=ngllz
+  ! compute everything in indexed order
+  ! get GLL points
+  ! for alpha=beta=0, jacobi polynomial is legendre polynomial
+  ! for ngllx=nglly=ngllz, need to call only once
+  ! get GLL points and weights
+  call zwgljd(gllpx,gllwx,ngllx,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpy,gllwy,nglly,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpz,gllwz,ngllz,jacobi_alpha,jacobi_beta)
+
+  ! integration points are the GLL points
+  igllpx = gllpx; igllpy = gllpy; igllpz = gllpz
+  igllwx = gllwx; igllwy = gllwy; igllwz = gllwz;
+
+  ! overwrite GLL points/weights with radau counterpart along infinite direction
+  if (iINF == 1) call radau_quadrature(ngllx,igllpx,igllwx)
+  if (iINF == 2) call radau_quadrature(nglly,igllpy,igllwy)
+  if (iINF == 3) call radau_quadrature(ngllz,igllpz,igllwz)
+
+  !print *,gllpz
+  !print *,gllwz
+  !stop
+  ! gauss-jacobi or gauss-legendre points and weights
+  !call zwgjd(gllpx,gllwx,nipx,jacobi_alpha,jacobi_beta)
+  !print *,nipx
+  !print *,gllpx
+  !print *,gllwx
+  ! for an infinite element we use Gauss-Legendre quadrature
+  !if (nip==8) then
+  !  gllpx(1)=-one/sqrt(3.0_kdble); gllpx(2)=-gllpx(1)
+  !  gllwx(1)=one; gllwx(2)=one;
+  !else if (nip==27) then
+  !  gllpx(1)=-sqrt(3.0_kdble/five); gllpx(2)=0.0_kdble; gllpx(3)=-gllpx(1)
+  !  gllwx(1)=five/9.0_kdble; gllwx(2)=8.0_kdble/9.0_kdble; gllwx(3)=gllwx(1);
+  !else
+  !if (nip /= 8.and.nip /= 27) then
+  !  print *,'ERROR: illegal number of Gauss points:',nip,'!'
+  !  stop
+  !endif
+  !print *,gllpx
+  !print *,gllwx
+  !stop
+  !gllpy=gllpx; gllpz=gllpx;
+  !gllwy=gllwx; gllwz=gllwx;
+
+  !print *,gllpz
+  !print *,gllwz
+  !stop
+  ii = 0
+  do k1 = 1,nipx(3)
+    do j1 = 1,nipx(2)
+      do i1 = 1,nipx(1)
+        ii = ii+1
+        !do ii=1,ngll ! ngllx*nglly*ngllz
+
+        ! integration points
+        xi(1)=igllpx(i1) !xi,   gll_points(1,ii)
+        xi(2)=igllpy(j1) !eta,  gll_points(2,ii)
+        xi(3)=igllpz(k1) !zeta, gll_points(3,ii)
+
+        !xi(iINF)=ddir*xi(iINF)
+
+        ! integration weights
+        GLw(ii)=igllwx(i1)*igllwy(j1)*igllwz(k1)
+
+        call lagrange1dGLLAS(ngllx,gllpx,xi(1),lagrange_x(1,:),lagrange_dx(1,:))
+        call lagrange1dGLLAS(nglly,gllpy,xi(2),lagrange_x(2,:),lagrange_dx(2,:))
+        call lagrange1dGLLAS(ngllz,gllpz,xi(3),lagrange_x(3,:),lagrange_dx(3,:))
+
+        ! interpolation functions
+        n = 0
+        do k = 1,ngllz
+          do j = 1,nglly
+            do i = 1,ngllx
+              n = n+1
+              lagrange_gl(ii,n)=lagrange_x(1,i)*lagrange_x(2,j)*lagrange_x(3,k)
+              dlagrange_gl(1,ii,n)=lagrange_dx(1,i)*lagrange_x(2,j)*lagrange_x(3,k)
+              dlagrange_gl(2,ii,n)=lagrange_x(1,i)*lagrange_dx(2,j)*lagrange_x(3,k)
+              dlagrange_gl(3,ii,n)=lagrange_x(1,i)*lagrange_x(2,j)*lagrange_dx(3,k)
+            enddo
+          enddo
+        enddo
+
+        ! shape functions for HEX8
+        ! compute 1d lagrange polynomials
+        call lagrange1dGENAS(2,xi(1),lagrangeINF_x(1,:),lagrangeINF_dx(1,:))
+        call lagrange1dGENAS(2,xi(2),lagrangeINF_x(2,:),lagrangeINF_dx(2,:))
+        call lagrange1dGENAS(2,xi(3),lagrangeINF_x(3,:),lagrangeINF_dx(3,:))
+        ! consider 3 nodes but compute only at 2 nodes
+        !call lagrange_infinite(3,nd,xi(iINF),ddir,gam,a,lagrangeINF_x(iINF,:),lagrangeINF_dx(iINF,:))
+        call lagrange1d_infiniteZWAS(3,xi(iINF),lagrangeINF_x(iINF,:),lagrangeINF_dx(iINF,:))
+        !call lagrange_infinite(ngllx,nd,1.0_kdble,gam,a,lagrangeINF_x,lagrangeINF_dx)
+        !print *,lagrangeINF_x,lagrangeINF_dx; stop
+        !call lagrange1d_infinite(ngllx,xi,lagrangeINF_x,lagrangeINF_dx)
+        !call lagrange1d_infiniteZW(ngllx,xi,lagrangeINF_x,lagrangeINF_dx)
+        n = 0
+        do k = 1,2
+          do j = 1,2
+            do i = 1,2
+              n = n+1
+              shape_infinite(ii,n)=lagrangeINF_x(1,i)*lagrangeINF_x(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(1,ii,n)=lagrangeINF_dx(1,i)*lagrangeINF_x(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(2,ii,n)=lagrangeINF_x(1,i)*lagrangeINF_dx(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(3,ii,n)=lagrangeINF_x(1,i)*lagrangeINF_x(2,j)*lagrangeINF_dx(3,k)
+            enddo
+          enddo
+        enddo
+
+        !enddo
+      enddo
+    enddo
+  enddo
+  !stop 'Hi!'
+  return
+  end subroutine shape_function_infiniteGLHEX8ZW_GLLR
+
+!
+!===========================================
+!
+! this subroutine computes Gauss quadrature points and weights for 3D
+! TODO: compute 1D lagrange shape function iusing GEN rotuine since we take
+! equidistant interpolation points along infinite direction.  But now I have
+! changed to GLL points so not necessary!
+
+  subroutine shape_function_infiniteGLHEX8ZW_GQ(ndim,ngllx,nglly,ngllz,ngll,nipx, &
+                                                nip,iface,nd,gam,a,shape_infinite,dshape_infinite,lagrange_gl,dlagrange_gl,GLw)
+
+  use gll_library1, only: lagrange1dGLL,lagrange1dGEN,zwgjd,zwgljd
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngllz,ngll,nipx,nip,iface
+  real(kind=kdble),intent(in) :: gam,a,nd !nd: order
+  real(kind=kdble),dimension(nip,8),intent(out) :: shape_infinite
+  real(kind=kdble),dimension(ndim,nip,8),intent(out) :: dshape_infinite
+  real(kind=kdble),dimension(nip,ngll),intent(out) :: lagrange_gl
+  real(kind=kdble),dimension(ndim,nip,ngll),intent(out) :: dlagrange_gl
+  real(kind=kdble),intent(out) :: GLw(nip)
+  real(kind=kdble),parameter :: jacobi_alpha=0.0_kdble,jacobi_beta=0.0_kdble,one=1.0_kdble
+  integer :: i,ii,j,k,n,i1,j1,k1
+  real(kind=kdble) :: ddir,xi(ndim),tmp !,eta,zeta
+  real(kind=kdble),dimension(ngllx) :: gllpx,gllwx ! GLL points and weights
+  real(kind=kdble),dimension(nglly) :: gllpy,gllwy ! GLL points and weights
+  real(kind=kdble),dimension(ngllz) :: gllpz,gllwz ! GLL points and weights
+  real(kind=kdble),dimension(nipx) :: igllpx,igllwx ! GLL points and weights
+  real(kind=kdble),dimension(nipx) :: igllpy,igllwy ! GLL points and weights
+  real(kind=kdble),dimension(nipx) :: igllpz,igllwz ! GLL points and weights
+  real(kind=kdble),dimension(ndim,ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(ndim,2) :: lagrangeINF_x,lagrangeINF_dx
+  !real(kind=kdble),dimension(nglly) :: lagrange_y,lagrange_dy
+  !real(kind=kdble),dimension(ngllz) :: lagrange_z,lagrange_dz
+  integer :: iINF !,ngllxINF(ndim)
+  !print *,iface; stop
+  !ngllxINF(1)=ngllx
+  !ngllxINF(2)=nglly
+  !ngllxINF(3)=ngllz
+
+  ddir = one
+  if (iface == 1) then
+    iINF = 2; ddir=-one
+  else if (iface == 2) then
+    iINF = 1; ddir = one
+  else if (iface == 3) then
+    iINF = 2; ddir = one
+  else if (iface == 4) then
+    iINF = 1; ddir=-one
+  else if (iface == 5) then
+    iINF = 3; ddir=-one
+  else if (iface == 6) then
+    iINF = 3; ddir = one
+  endif
+  !print *,iface,iINF,ddir
+  !ngllxINF(iINF)=ngllxINF(iINF)-1
+
+  ! interpolation points
+  ! compute everything in indexed order
+  ! get GLL points
+  ! for alpha=beta=0, jacobi polynomial is legendre polynomial
+  ! for ngllx=nglly=ngllz, need to call only once
+  call zwgljd(gllpx,gllwx,ngllx,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpy,gllwy,nglly,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpz,gllwz,ngllz,jacobi_alpha,jacobi_beta)
+
+
+  ! integration points
+  ! gauss-jacobi or gauss-legendre points and weights
+  call zwgjd(igllpx,igllwx,nipx,jacobi_alpha,jacobi_beta)
+  !print *,nipx
+  !print *,gllpx
+  !print *,gllwx; stop
+  ! for an infinite element we use Gauss-Legendre quadrature
+  !if (nip==8) then
+  !  gllpx(1)=-one/sqrt(3.0_kdble); gllpx(2)=-gllpx(1)
+  !  gllwx(1)=one; gllwx(2)=one;
+  !else if (nip==27) then
+  !  gllpx(1)=-sqrt(3.0_kdble/five); gllpx(2)=0.0_kdble; gllpx(3)=-gllpx(1)
+  !  gllwx(1)=five/9.0_kdble; gllwx(2)=8.0_kdble/9.0_kdble; gllwx(3)=gllwx(1);
+  !else
+  if (nip /= 8.and.nip /= 27) then
+    print *,'ERROR: illegal number of Gauss points:',nip,'!'
+    stop
+  endif
+  !print *,igllpx
+  !print *,igllwx
+  !stop
+  igllpy = igllpx; igllpz = igllpx;
+  igllwy = igllwx; igllwz = igllwx;
+
+  !print *,iface,iINF,ddir; stop
+  !print *,gllpz
+  !print *,gllwz
+  !stop
+  ii = 0
+  do k1 = 1,nipx
+    do j1 = 1,nipx
+      do i1 = 1,nipx
+        ii = ii+1
+        !do ii=1,ngll ! ngllx*nglly*ngllz
+
+        ! integration points
+        xi(1)=igllpx(i1) !xi,   gll_points(1,ii)
+        xi(2)=igllpy(j1) !eta,  gll_points(2,ii)
+        xi(3)=igllpz(k1) !zeta, gll_points(3,ii)
+
+        !xi(iINF)=ddir*xi(iINF)
+
+        ! integration weights
+        GLw(ii)=igllwx(i1)*igllwy(j1)*igllwz(k1)
+
+        call lagrange1dGLL(ngllx,gllpx,xi(1),lagrange_x(1,:),lagrange_dx(1,:))
+        call lagrange1dGLL(nglly,gllpy,xi(2),lagrange_x(2,:),lagrange_dx(2,:))
+        call lagrange1dGLL(ngllz,gllpz,xi(3),lagrange_x(3,:),lagrange_dx(3,:))
+
+        !call lagrange1dGEN(ngllx,xi(1),lagrange_x(1,:),lagrange_dx(1,:))
+        !call lagrange1dGEN(nglly,xi(2),lagrange_x(2,:),lagrange_dx(2,:))
+        !call lagrange1dGEN(ngllz,xi(3),lagrange_x(3,:),lagrange_dx(3,:))
+
+        ! interpolation functions
+        n = 0
+        do k = 1,ngllz
+          do j = 1,nglly
+            do i = 1,ngllx
+              n = n+1
+              lagrange_gl(ii,n)=lagrange_x(1,i)*lagrange_x(2,j)*lagrange_x(3,k)
+              dlagrange_gl(1,ii,n)=lagrange_dx(1,i)*lagrange_x(2,j)*lagrange_x(3,k)
+              dlagrange_gl(2,ii,n)=lagrange_x(1,i)*lagrange_dx(2,j)*lagrange_x(3,k)
+              dlagrange_gl(3,ii,n)=lagrange_x(1,i)*lagrange_x(2,j)*lagrange_dx(3,k)
+            enddo
+          enddo
+        enddo
+
+        ! shape functions for HEX8
+        ! compute 1d lagrange polynomials
+        call lagrange1dGEN(2,xi(1),lagrangeINF_x(1,:),lagrangeINF_dx(1,:))
+        call lagrange1dGEN(2,xi(2),lagrangeINF_x(2,:),lagrangeINF_dx(2,:))
+        call lagrange1dGEN(2,xi(3),lagrangeINF_x(3,:),lagrangeINF_dx(3,:))
+        ! consider 3 nodes but compute only at 2 nodes
+        call lagrange1d_infiniteZW(3,xi(iINF),lagrangeINF_x(iINF,:),lagrangeINF_dx(iINF,:))
+        !call lagrange_infinite(ngllx,nd,1.0_kdble,gam,a,lagrangeINF_x,lagrangeINF_dx)
+        !print *,lagrangeINF_x,lagrangeINF_dx; stop
+        !call lagrange1d_infinite(ngllx,xi,lagrangeINF_x,lagrangeINF_dx)
+        !call lagrange1d_infiniteZW(ngllx,xi,lagrangeINF_x,lagrangeINF_dx)
+        n = 0
+        do k = 1,2
+          do j = 1,2
+            do i = 1,2
+              n = n+1
+              shape_infinite(ii,n)=lagrangeINF_x(1,i)*lagrangeINF_x(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(1,ii,n)=lagrangeINF_dx(1,i)*lagrangeINF_x(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(2,ii,n)=lagrangeINF_x(1,i)*lagrangeINF_dx(2,j)*lagrangeINF_x(3,k)
+              dshape_infinite(3,ii,n)=lagrangeINF_x(1,i)*lagrangeINF_x(2,j)*lagrangeINF_dx(3,k)
+            enddo
+          enddo
+        enddo
+
+        !enddo
+      enddo
+    enddo
+  enddo
+
+  return
+  end subroutine shape_function_infiniteGLHEX8ZW_GQ
+
+!
+!===========================================
+!
+! this subroutine extracts the nodes for HEX8 of the finite region of an infinite element
+
+  subroutine get_gnodinfHEX8(ndim,ngllx,nglly,ngllz,nginf,iface,gnodinf)
+
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngllz,nginf,iface
+  integer,intent(out) :: gnodinf(nginf)
+  integer :: i,j,k,inum
+  integer :: inc(ndim),ngllxINF0(ndim),ngllxINF(ndim),iINF
+  real(kind=kdble) :: ddir
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  if (iface < 1.or.iface > 6) then
+    write(*,*) 'ERROR: illegal outer face ID:',iface
+    stop
+  endif
+
+  ! initialize ngllINF indices
+  ngllxINF0 = 1
+  ngllxINF(1)=ngllx
+  ngllxINF(2)=nglly
+  ngllxINF(3)=ngllz
+
+  if (iface == 1) then
+    iINF = 2; ddir=-one
+  else if (iface == 2) then
+    iINF = 1; ddir = one
+  else if (iface == 3) then
+    iINF = 2; ddir = one
+  else if (iface == 4) then
+    iINF = 1; ddir=-one
+  else if (iface == 5) then
+    iINF = 3; ddir=-one
+  else if (iface == 6) then
+    iINF = 3; ddir = one
+  endif
+
+  if (ddir < 0) then
+    ngllxINF0(iINF)=2
+  else
+    ngllxINF(iINF)=ngllxINF(iINF)-1
+  endif
+
+  ! extract only the corner nodes
+  inc = ngllxINF-ngllxINF0
+  inum = 0
+  do k = ngllxINF0(3),ngllxINF(3),inc(3)
+    do j = ngllxINF0(2),ngllxINF(2),inc(2)
+      do i = ngllxINF0(1),ngllxINF(1),inc(1)
+        inum = inum+1
+        gnodinf(inum)=nglly*ngllx*(k-1)+ngllx*(j-1)+i
+      enddo
+    enddo
+  enddo
+  !print *,inum
+  end subroutine get_gnodinfHEX8
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+
+  subroutine lagrange1d_infiniteMO(nenod,nd,xi,ddir,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  !integer :: i,j,k
+  real(kind=kdble),intent(in) :: nd,xi,ddir ! xi: point where to calculate lagrange function and
+  !its derivative
+  !real(kind=kdble),intent(in) :: gam,a
+  real(kind=kdble),dimension(nenod-1),intent(out) :: phi,dphi_dxi
+  real(kind=kdble),dimension(nenod) :: xii
+  real(kind=kdble) :: fac !dx
+  real(kind=kdble),parameter :: one=1.0_kdble,two=2.0_kdble
+
+  if (nenod /= 3) then
+    write(*,*) 'ERROR: infinite element is currently implemented only for 3 nodes!'
+    stop
+  endif
+  !! compute natural coordnates
+  !dx=2.0_kdble/real((nenod-1),kdble)! length = 2.0 as xi is taken -1 to +1
+  !do i=1,nenod
+  !  ! coordinates when origin is in the left
+  !  xii(i)=real((i-1),kdble)*dx
+  !enddo
+
+  !! origin is tranformed to mid point
+  !xii=xii-1.0_kdble
+
+  fac=one/(one-xi)
+
+  phi(1)=-two*xi*fac
+  phi(2)=one-phi(1)
+
+  dphi_dxi(1)=-two*fac*fac
+  dphi_dxi(2)=two*fac*fac
+
+  return
+  end subroutine lagrange1d_infiniteMO
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+! Assumed Shape array: pass pointer, subarray or allocatable array
+
+  subroutine lagrange1d_infiniteZWAS(nenod,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  !integer :: i,j,k
+  real(kind=kdble),intent(in) :: xi ! xi: point where to calculate lagrange
+  !function and its derivative
+  real(kind=kdble),dimension(:),intent(out) :: phi,dphi_dxi !,dimension(nenod-1)
+  real(kind=kdble) :: fac
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  if (nenod /= 3) then
+    write(*,*) 'ERROR: infinite element is currently implemented only for 3 nodes!'
+    stop
+  endif
+
+  fac=one/(one-xi)
+
+  phi(1)=-xi*fac
+  phi(2)=fac !one-phi(1) !+xi*fac
+
+  dphi_dxi(1)=-fac*fac
+  dphi_dxi(2)=fac*fac
+
+  return
+  end subroutine lagrange1d_infiniteZWAS
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+
+  subroutine lagrange1d_infiniteZW(nenod,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  !integer :: i,j,k
+  real(kind=kdble),intent(in) :: xi ! xi: point where to calculate lagrange
+  !function and its derivative
+
+  real(kind=kdble),dimension(nenod-1),intent(out) :: phi,dphi_dxi
+  real(kind=kdble) :: fac !dx
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  if (nenod /= 3) then
+    write(*,*) 'ERROR: infinite element is currently implemented only for 3 nodes!'
+    stop
+  endif
+  !! compute natural coordnates
+  !dx=2.0_kdble/real((nenod-1),kdble)! length = 2.0 as xi is taken -1 to +1
+  !do i=1,nenod
+  !  ! coordinates when origin is in the left
+  !  xii(i)=real((i-1),kdble)*dx
+  !enddo
+
+  !! origin is tranformed to mid point
+  !xii=xii-1.0_kdble
+
+  fac=one/(one-xi)
+
+  phi(1)=-xi*fac
+  phi(2)=fac !one-phi(1) !+xi*fac
+
+  dphi_dxi(1)=-fac*fac
+  dphi_dxi(2)=fac*fac
+
+  return
+  end subroutine lagrange1d_infiniteZW
+
+!
+!===========================================
+!
+
+! Revision:
+!   HNG, Apr 19,2012
+! RADAU_COMPUTE computes a Radau quadrature rule.
+! the Radau rule is distinguished by the fact that the left endpoint
+! (-1) is always an abscissa.
+!
+! the integral:
+! integral ( -1 <= x <= 1 ) f(x) dx
+!
+! the quadrature rule:
+! sum ( 1 <= i <= n ) w(i) * f ( x(i) )
+!
+! the quadrature rule will integrate exactly all polynomials up to
+! X**(2*N-2).
+!
+! Licensing:!
+! this code is distributed under the GNU LGPL license.
+!
+! Modified:
+! 06 February 2007
+!
+! Author:
+!    Original MATLAB code by Greg von Winckel.
+!    This MATLAB version by John Burkardt.
+!
+! References:
+!    Milton Abramowitz, Irene Stegun,
+!    Handbook of Mathematical functions,
+!    National Bureau of Standards, 1964,
+!    ISBN: 0-486-61272-4,
+!    LC: QA47.A34.
+!
+!    Claudio Canuto, Yousuff Hussaini, Alfio Quarteroni, Thomas Zang,
+!    Spectral Methods in Fluid Dynamics,
+!    Springer, 1993,
+!    ISNB13: 978-3540522058,
+!    LC: QA377.S676.
+!
+!    Francis Hildebrand,
+!    Section 8.11,
+!    Introduction to Numerical Analysis,
+!    Dover, 1987,
+!    ISBN13: 978-0486653631,
+!    LC: QA300.H5.
+!
+!    Arthur Stroud, Don Secrest,
+!    Gaussian Quadrature Formulas,
+!    Prentice Hall, 1966,
+!    LC: QA299.4G3S7.
+!
+!    Daniel Zwillinger, editor,
+!    CRC Standard Mathematical Tables and Formulae,
+!    30th Edition,
+!    CRC Press, 1996,
+!    ISBN: 0-8493-2479-3,
+!    LC: QA47.M315.
+!
+! Input:
+! N: the order or the number of integration points (>0, integer)
+! Output:
+! X(N): the abscissas
+! W(N): the weights
+
+  subroutine radau_quadrature(n,x,w)
+
+  implicit none
+  integer,intent(in) :: n
+  integer :: i,j
+  real(kind=kdble),intent(out) :: x(n),w(n)
+  real(kind=kdble) :: rj,rn,twopi,fac,p(n,n+1),xold(n)
+  real(kind=kdble),parameter :: one=1.0_kdble,pi=3.141592653589793_kdble, &
+  two = 2.0_kdble,tol = 1.0e-12_kdble,zero = 0.0_kdble,zerotol = 1.0e-12_kdble
+
+  if (n < 1) then
+    write(*,*) 'ERROR: number of quadrature points must be > 1!'
+    stop
+  endif
+
+  x = zero; w = zero
+  rn=real(n,kdble)
+
+  ! initial estimate for the abscissas is the Chebyshev-Gauss-Radau nodes.
+  fac=two*pi/(two*rn-one)
+
+  ! initialize the Legendre Vandermonde matrix.
+  p = zero
+  p(2:n,1) = one;
+  do i = 1,n
+    x(i)=-cos(fac*real(i-1,kdble))
+    p(1,i) = (-one)**(i-1)
+  enddo
+  p(1,n+1)=(-one)**(n)
+
+  ! compute P using the recursion relation.
+  ! compute its first and second derivatives and
+  ! update X using the Newton-Raphson method.
+  xold = two
+  do i = 1,100
+    if (maxval(abs(x-xold)) <= zerotol)exit
+    if (i >= 100) then
+      write(*,*) 'ERROR: Legendre Vandermonde matrix does not converge!'
+      stop
+    endif
+    xold = x;
+    p(2:n,2) = x(2:n);
+    do j = 2,n
+      rj=real(j,kdble)
+      p(2:n,j+1) = ((two*rj-one)*x(2:n)*p(2:n,j)+(-rj+one)*p(2:n,j-1))/rj
+    enddo
+    x(2:n) = xold(2:n)-((one-xold(2:n))/rn)*(p(2:n,n)+p(2:n,n+1))/(p(2:n,n)-p(2:n,n+1))
+  enddo
+
+  ! compute the weights.
+  w = zero
+  w(1) = two/(rn*rn)
+  w(2:n)=(one-x(2:n))/(rn*p(2:n,n)*rn*p(2:n,n))
+  return
+  end subroutine radau_quadrature
+
+end module infinite_element
+
+#endif
+
diff --git a/src/specfem3D/SIEM_math_library.F90 b/src/specfem3D/SIEM_math_library.F90
new file mode 100644
index 000000000..8bd978da9
--- /dev/null
+++ b/src/specfem3D/SIEM_math_library.F90
@@ -0,0 +1,2831 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+
+! this module contains math constants
+! math parameters
+! REVISION
+!   HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module math_constants
+
+  use constants_solver, only: CUSTOM_REAL
+  implicit none
+
+  integer,parameter :: kreal = CUSTOM_REAL
+  real(kind=kreal),parameter :: zero=0.0_kreal,half=0.5_kreal,one=1.0_kreal, &
+  two = 2.0_kreal
+  real(kind=kreal),parameter :: pi=3.141592653589793_kreal
+  real(kind=kreal),parameter :: deg2rad=pi/180.0_kreal,rad2deg=180.0_kreal/pi
+
+  ! tolerance value for zero
+  real(kind=kreal),parameter :: inftol=1.0e32_kreal,zerotol = 1.0e-12_kreal
+  !real(kind=kreal) :: tmpx
+  !real(kind=kreal),parameter :: NAN = IEEE_VALUE(tmpx, IEEE_QUIET_NAN)
+end module math_constants
+
+
+!
+!=====================================================================
+!
+
+! this module coatins math routines
+! REVISION
+!   HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module math_library
+
+  use constants_solver, only: CUSTOM_REAL
+  integer,parameter :: kreal = CUSTOM_REAL
+
+contains
+
+  function get_normal(x1,x2,x3) result(nx)
+  real(kind=kreal),dimension(3),intent(in) :: x1,x2,x3
+  real(kind=kreal),dimension(3) :: nx
+  real(kind=kreal),dimension(3) :: v1,v2
+  real(kind=kreal) :: norm
+
+  ! two vectors
+  v1 = x2-x1
+  v2 = x3-x1
+
+  ! cross product
+  nx(1)=v1(2)*v2(3)-v2(2)*v1(3)
+  nx(2)=v2(1)*v1(3)-v1(1)*v2(3)
+  nx(3)=v1(1)*v2(2)-v2(1)*v1(2)
+  norm = sqrt(sum(nx**2))
+  if (norm <= 0.0_kreal) then
+    write(*,*) 'ERROR: undefined normal!'
+    stop
+  endif
+  ! unit normal
+  nx = nx/norm
+  return
+  end function get_normal
+  !=======================================================
+
+  function norm(x) result(l2n)
+  !
+  ! this function calculates the l2 norm of vector x
+  !
+  implicit none
+  real(kind=kreal),intent(in) :: x(:)
+  real(kind=kreal)::l2n
+  l2n = sqrt(sum(x**2))
+  return
+  end function norm
+  !=======================================================
+
+  recursive function factorial(n) result(nfact)
+  implicit none
+  integer, intent(in) :: n
+  integer :: nfact
+  if (n > 0) then
+    nfact = n * factorial(n-1)
+    return
+  else if (n == 0) then
+    nfact = 1
+  else
+    write(*,*) 'ERROR: undefined factorial!'
+    stop
+  endif
+  end function factorial
+  !=======================================================
+
+  ! this function returns the determinant of a 1x1, 2x2 or 3x3
+  ! jacobian matrix.
+  ! this routine was copied and modified from
+  ! Smith and Griffiths (2004): Programming the finite element method
+  function determinant(jac)result(det)
+  implicit none
+  real(kind=kreal),intent(in)::jac(:,:)
+  real(kind=kreal)::det
+  integer::it
+  it=ubound(jac,1)
+  select case(it)
+  case(1)
+    det = 1.0_kreal
+  case(2)
+    det = jac(1,1)*jac(2,2)-jac(1,2)*jac(2,1)
+  case(3)
+    det = jac(1,1)*(jac(2,2)*jac(3,3)-jac(3,2)*jac(2,3))
+    det = det-jac(1,2)*(jac(2,1)*jac(3,3)-jac(3,1)*jac(2,3))
+    det = det+jac(1,3)*(jac(2,1)*jac(3,2)-jac(3,1)*jac(2,2))
+  case default
+    write(*,*) 'ERROR: wrong dimension for jacobian matrix!'
+  end select
+  return
+  end function determinant
+  !=======================================================
+
+  ! this subroutine inverts a small square matrix onto itself.
+  ! this routine was copied and modified from
+  ! Smith and Griffiths (2004): Programming the finite element method
+  subroutine invert(matrix)
+  implicit none
+  real(kind=kreal),intent(inout)::matrix(:,:)
+  real(kind=kreal)::det,j11,j12,j13,j21,j22,j23,j31,j32,j33,con
+  integer::ndim,i,k
+  ndim=ubound(matrix,1)
+  if (ndim == 2) then
+    det = matrix(1,1)*matrix(2,2)-matrix(1,2)*matrix(2,1)
+    j11=matrix(1,1)
+    matrix(1,1)=matrix(2,2)
+    matrix(2,2)=j11
+    matrix(1,2)=-matrix(1,2)
+    matrix(2,1)=-matrix(2,1)
+    matrix = matrix/det
+  else if (ndim == 3) then
+    det = matrix(1,1)*(matrix(2,2)*matrix(3,3)-matrix(3,2)*matrix(2,3))
+    det = det-matrix(1,2)*(matrix(2,1)*matrix(3,3)-matrix(3,1)*matrix(2,3))
+    det = det+matrix(1,3)*(matrix(2,1)*matrix(3,2)-matrix(3,1)*matrix(2,2))
+    j11 = matrix(2,2)*matrix(3,3)-matrix(3,2)*matrix(2,3)
+    j21=-matrix(2,1)*matrix(3,3)+matrix(3,1)*matrix(2,3)
+    j31 = matrix(2,1)*matrix(3,2)-matrix(3,1)*matrix(2,2)
+    j12=-matrix(1,2)*matrix(3,3)+matrix(3,2)*matrix(1,3)
+    j22 = matrix(1,1)*matrix(3,3)-matrix(3,1)*matrix(1,3)
+    j32=-matrix(1,1)*matrix(3,2)+matrix(3,1)*matrix(1,2)
+    j13 = matrix(1,2)*matrix(2,3)-matrix(2,2)*matrix(1,3)
+    j23=-matrix(1,1)*matrix(2,3)+matrix(2,1)*matrix(1,3)
+    j33 = matrix(1,1)*matrix(2,2)-matrix(2,1)*matrix(1,2)
+    matrix(1,1)=j11
+    matrix(1,2)=j12
+    matrix(1,3)=j13
+    matrix(2,1)=j21
+    matrix(2,2)=j22
+    matrix(2,3)=j23
+    matrix(3,1)=j31
+    matrix(3,2)=j32
+    matrix(3,3)=j33
+    matrix = matrix/det
+  else
+    do k = 1,ndim
+      con=matrix(k,k)
+      matrix(k,k)=1.0_kreal
+      matrix(k,:)=matrix(k,:)/con
+      do i = 1,ndim
+        if (i /= k) then
+          con=matrix(i,k)
+          matrix(i,k)=0.0_kreal
+          matrix(i,:)=matrix(i,:)-matrix(k,:)*con
+        endif
+      enddo
+    enddo
+  endif
+  return
+  end subroutine invert
+  !=======================================================
+
+  function dotmat(m,n,x1,x2) result(dotm)
+  implicit none
+  integer,intent(in) :: m,n
+  real(kind=kreal),intent(in) :: x1(m,n),x2(m,n)
+  real(kind=kreal) :: dotm(n)
+
+  integer :: i
+
+  do i = 1,n
+    dotm(i)=dot_product(x1(:,i),x2(:,i))
+  enddo
+  return
+  end function dotmat
+  !=======================================================
+
+  ! quick sort of integer list
+  function quick_sort(x,n) result(xnew)
+  integer,intent(in) :: n ! size of the vector data x
+  integer, dimension(n) :: x ! data vector to sort
+  integer :: temp
+  integer :: i,j
+  integer,dimension(n) :: xnew
+
+  do i = 2, n
+    j = i - 1
+    temp = x(i)
+    do while (j >= 1 .and. x(j) > temp)
+      x(j+1) = x(j)
+      j = j - 1
+    enddo
+    x(j+1) = temp
+  enddo
+  xnew = x
+  end function quick_sort
+  !=======================================================
+
+  ! quick sort of real list
+  function rquick_sort(x,n) result(xnew)
+  integer,intent(in) :: n ! size of the vector data x
+  real(kind=kreal), dimension(n) :: x ! data vector to sort
+  real(kind=kreal) :: temp
+  integer :: i,j
+  real(kind=kreal),dimension(n) :: xnew
+
+  do i = 2, n
+    j = i - 1
+    temp = x(i)
+    do while (j >= 1 .and. x(j) > temp)
+      x(j+1) = x(j)
+      j = j - 1
+    enddo
+    x(j+1) = temp
+  enddo
+  xnew = x
+  end function rquick_sort
+  !=======================================================
+
+  ! insertion sort of integer list
+  subroutine insertion_sort(x,n)
+  integer,intent(in) :: n ! size of the vector data x
+  real, intent(inout), dimension(n) :: x ! data vector to sort
+  real :: temp
+  integer :: i, j
+
+  do i = 2, n
+    j = i - 1
+    temp = x(i)
+    do while (j >= 1 .and. x(j) > temp)
+      x(j+1) = x(j)
+      j = j - 1
+    enddo
+    x(j+1) = temp
+  enddo
+  end subroutine insertion_sort
+  !=======================================================
+
+  ! compute distance between two points in a n-dimensional space
+  function distance(x1,x2,n) result(r)
+  implicit none
+  integer,intent(in) :: n
+  real(kind=kreal),intent(in) :: x1(n),x2(n)
+  real(kind=kreal) :: dx(n),r
+
+  dx = x1-x2
+  r = sqrt(sum(dx*dx))
+  return
+  end function distance
+  !=======================================================
+
+  ! Author: Michel Olagnon
+  ! orderpack 2.0
+  ! source: http://www.Fortran-2000.com/rank/
+  subroutine i_uniinv (XDONT, IGOEST)
+  ! UNIINV = Merge-sort inverse ranking of an array, with removal of
+  ! duplicate entries.
+  ! this routine is similar to pure merge-sort ranking, but on
+  ! the last pass, it sets indices in IGOEST to the rank
+  ! of the value in the ordered set with duplicates removed.
+  ! for performance reasons, the first 2 passes are taken
+  ! out of the standard loop, and use dedicated coding.
+  implicit none
+  integer,intent(in)  :: XDONT(:)
+  integer,intent(out) :: IGOEST(:)
+
+  integer :: XTST, XDONA, XDONB
+  integer, dimension (SIZE(IGOEST)) :: JWRKT, IRNGT
+  integer :: LMTNA, LMTNC, IRNG, IRNG1, IRNG2, NUNI
+  integer :: NVAL, IIND, IWRKD, IWRK, IWRKF, JINDA, IINDA, IINDB
+
+  NVAL = Min (SIZE(XDONT), SIZE(IGOEST))
+  select case (NVAL)
+  case (:0)
+    return
+  case (1)
+    IGOEST (1) = 1
+    return
+  case default
+    continue
+  end select
+
+  ! fill-in the index array, creating ordered couples
+  do IIND = 2, NVAL, 2
+    if (XDONT(IIND-1) < XDONT(IIND)) then
+      IRNGT (IIND-1) = IIND - 1
+      IRNGT (IIND) = IIND
+    else
+      IRNGT (IIND-1) = IIND
+      IRNGT (IIND) = IIND - 1
+    endif
+  enddo
+  if (modulo(NVAL,2) /= 0) then
+    IRNGT (NVAL) = NVAL
+  endif
+
+  ! we will now have ordered subsets A - B - A - B - ...
+  ! and merge A and B couples into     C   -   C   - ...
+  LMTNA = 2
+  LMTNC = 4
+
+  ! first iteration. The length of the ordered subsets goes from 2 to 4
+  do
+    if (NVAL <= 4) Exit
+    ! loop on merges of A and B into C
+    do IWRKD = 0, NVAL - 1, 4
+      if ((IWRKD+4) > NVAL) then
+        if ((IWRKD+2) >= NVAL) Exit
+        !   1 2 3
+        if (XDONT(IRNGT(IWRKD+2)) <= XDONT(IRNGT(IWRKD+3))) Exit
+        !   1 3 2
+        if (XDONT(IRNGT(IWRKD+1)) <= XDONT(IRNGT(IWRKD+3))) then
+          IRNG2 = IRNGT (IWRKD+2)
+          IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
+          IRNGT (IWRKD+3) = IRNG2
+          !   3 1 2
+        else
+          IRNG1 = IRNGT (IWRKD+1)
+          IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
+          IRNGT (IWRKD+3) = IRNGT (IWRKD+2)
+          IRNGT (IWRKD+2) = IRNG1
+        endif
+        exit
+      endif
+      !   1 2 3 4
+      if (XDONT(IRNGT(IWRKD+2)) <= XDONT(IRNGT(IWRKD+3))) Cycle
+      !   1 3 x x
+      if (XDONT(IRNGT(IWRKD+1)) <= XDONT(IRNGT(IWRKD+3))) then
+        IRNG2 = IRNGT (IWRKD+2)
+        IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
+        if (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD+4))) then
+          !   1 3 2 4
+          IRNGT (IWRKD+3) = IRNG2
+        else
+          !   1 3 4 2
+          IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
+          IRNGT (IWRKD+4) = IRNG2
+        endif
+        !   3 x x x
+      else
+        IRNG1 = IRNGT (IWRKD+1)
+        IRNG2 = IRNGT (IWRKD+2)
+        IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
+        if (XDONT(IRNG1) <= XDONT(IRNGT(IWRKD+4))) then
+          IRNGT (IWRKD+2) = IRNG1
+          if (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD+4))) then
+            !   3 1 2 4
+            IRNGT (IWRKD+3) = IRNG2
+          else
+            !   3 1 4 2
+            IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
+            IRNGT (IWRKD+4) = IRNG2
+          endif
+        else
+          !   3 4 1 2
+          IRNGT (IWRKD+2) = IRNGT (IWRKD+4)
+          IRNGT (IWRKD+3) = IRNG1
+          IRNGT (IWRKD+4) = IRNG2
+        endif
+      endif
+    enddo
+
+  ! the Cs become As and Bs
+    LMTNA = 4
+    Exit
+  enddo
+
+  ! iteration loop. Each time, the length of the ordered subsets
+  ! is doubled.
+  do
+    if (2*LMTNA >= NVAL) Exit
+    IWRKF = 0
+    LMTNC = 2 * LMTNC
+
+    ! loop on merges of A and B into C
+    do
+      IWRK = IWRKF
+      IWRKD = IWRKF + 1
+      JINDA = IWRKF + LMTNA
+      IWRKF = IWRKF + LMTNC
+      if (IWRKF >= NVAL) then
+        if (JINDA >= NVAL) Exit
+        IWRKF = NVAL
+      endif
+      IINDA = 1
+      IINDB = JINDA + 1
+
+      ! one steps in the C subset, that we create in the final rank array
+      ! make a copy of the rank array for the iteration
+      JWRKT (1:LMTNA) = IRNGT (IWRKD:JINDA)
+      XDONA = XDONT (JWRKT(IINDA))
+      XDONB = XDONT (IRNGT(IINDB))
+      do
+        IWRK = IWRK + 1
+        ! we still have unprocessed values in both A and B
+        if (XDONA > XDONB) then
+          IRNGT (IWRK) = IRNGT (IINDB)
+          IINDB = IINDB + 1
+          if (IINDB > IWRKF) then
+            ! only A still with unprocessed values
+            IRNGT (IWRK+1:IWRKF) = JWRKT (IINDA:LMTNA)
+            Exit
+          endif
+          XDONB = XDONT (IRNGT(IINDB))
+        else
+          IRNGT (IWRK) = JWRKT (IINDA)
+          IINDA = IINDA + 1
+          if (IINDA > LMTNA) Exit! Only B still with unprocessed values
+          XDONA = XDONT (JWRKT(IINDA))
+        endif
+
+      enddo
+    enddo
+
+    ! the Cs become As and Bs
+    LMTNA = 2 * LMTNA
+  enddo
+
+  ! last merge of A and B into C, with removal of duplicates.
+  IINDA = 1
+  IINDB = LMTNA + 1
+  NUNI = 0
+
+  ! one steps in the C subset, that we create in the final rank array
+  JWRKT (1:LMTNA) = IRNGT (1:LMTNA)
+  if (IINDB <= NVAL) then
+    XTST = i_nearless(Min(XDONT(JWRKT(1)), XDONT(IRNGT(IINDB))))
+  else
+    XTST = i_nearless(XDONT(JWRKT(1)))
+  endif
+
+  do IWRK = 1, NVAL
+    ! we still have unprocessed values in both A and B
+    if (IINDA <= LMTNA) then
+      if (IINDB <= NVAL) then
+        if (XDONT(JWRKT(IINDA)) > XDONT(IRNGT(IINDB))) then
+          IRNG = IRNGT (IINDB)
+          IINDB = IINDB + 1
+        else
+          IRNG = JWRKT (IINDA)
+          IINDA = IINDA + 1
+        endif
+      else
+        ! only A still with unprocessed values
+        IRNG = JWRKT (IINDA)
+        IINDA = IINDA + 1
+      endif
+    else
+      ! only B still with unprocessed values
+      IRNG = IRNGT (IWRK)
+    endif
+    if (XDONT(IRNG) > XTST) then
+      XTST = XDONT (IRNG)
+      NUNI = NUNI + 1
+    endif
+    IGOEST (IRNG) = NUNI
+  enddo
+  return
+  end subroutine i_uniinv
+  !=======================================================
+
+  function i_nearless (XVAL) result (I_nl)
+  ! nearest value less than given value
+  implicit none
+  integer,intent(in) :: XVAL
+  integer :: I_nl
+  I_nl = XVAL - 1
+  return
+  end function i_nearless
+  !=======================================================
+
+  ! this function computes unit vectors in spherical coordinates at a point
+  ! defined by (r,theta,phi)
+  subroutine dspherical_unitvect(theta,phi,unitr,unittheta,unitphi)
+  implicit none
+  double precision,intent(in) :: theta,phi
+  double precision,intent(out) :: unitr(3),unittheta(3),unitphi(3)
+  double precision :: ctheta,cphi,stheta,sphi
+  double precision,parameter :: zero = 0.0_kreal
+
+  ctheta = cos(theta); stheta = sin(theta)
+  cphi = cos(phi);     sphi = sin(phi)
+
+  !unitr(1)=ctheta*sphi;   unitr(2)=stheta*sphi;   unitr(3)=ctheta
+  !unittheta(1)=-stheta;   unittheta(2)=ctheta;    unittheta(3)=zero
+  !unitphi(1)=ctheta*cphi; unitphi(2)=stheta*cphi; unitphi(3)=-sphi
+
+  unitr(1)=stheta*cphi;   unitr(2)=stheta*sphi;   unitr(3)=ctheta
+  unittheta(1)=ctheta*cphi; unittheta(2)=ctheta*sphi; unittheta(3)=-stheta
+  unitphi(1)=-sphi;   unitphi(2)=cphi;    unitphi(3)=zero
+
+  return
+  end subroutine dspherical_unitvect
+  !=======================================================
+
+  ! this function computes unit vectors in spherical coordinates at a point
+  ! defined by (r,theta,phi)
+  subroutine spherical_unitvect(theta,phi,unitr,unittheta,unitphi)
+  implicit none
+  real(kind=kreal),intent(in) :: theta,phi
+  real(kind=kreal),intent(out) :: unitr(3),unittheta(3),unitphi(3)
+  real(kind=kreal) :: ctheta,cphi,stheta,sphi
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  ctheta = cos(theta); stheta = sin(theta)
+  cphi = cos(phi);     sphi = sin(phi)
+
+  !unitr(1)=ctheta*sphi;   unitr(2)=stheta*sphi;   unitr(3)=ctheta
+  !unittheta(1)=-stheta;   unittheta(2)=ctheta;    unittheta(3)=zero
+  !unitphi(1)=ctheta*cphi; unitphi(2)=stheta*cphi; unitphi(3)=-sphi
+
+  unitr(1)=stheta*cphi;   unitr(2)=stheta*sphi;   unitr(3)=ctheta
+  unittheta(1)=ctheta*cphi; unittheta(2)=ctheta*sphi; unittheta(3)=-stheta
+  unitphi(1)=-sphi;   unitphi(2)=cphi;    unitphi(3)=zero
+
+  return
+  end subroutine spherical_unitvect
+  !=======================================================
+
+  subroutine dlegendreP2_costheta(theta,P2,dP2,d2P2)
+  implicit none
+  double precision,intent(in) :: theta ! radian
+  double precision,intent(out) :: P2,dP2,d2P2
+  double precision :: ctheta
+  double precision,parameter :: half = 0.5_kreal,one = 1.0_kreal,two = 2.0_kreal,three = 3.0_kreal
+
+  ctheta=cos(theta)
+
+  P2=half*(three*ctheta*ctheta-one)
+  dP2=-three*sin(theta)*ctheta
+  d2P2=-3*cos(two*theta)
+  return
+  end subroutine dlegendreP2_costheta
+  !=======================================================
+
+  subroutine legendreP2_costheta(theta,P2,dP2,d2P2)
+  implicit none
+  real(kind=kreal),intent(in) :: theta ! radian
+  real(kind=kreal),intent(out) :: P2,dP2,d2P2
+  real(kind=kreal) :: ctheta
+  real(kind=kreal),parameter :: half=0.5_kreal,one=1.0_kreal,two=2.0_kreal,three=3.0_kreal
+
+  ctheta=cos(theta)
+
+  P2=half*(three*ctheta*ctheta-one)
+  dP2=-three*sin(theta)*ctheta
+  d2P2=-3*cos(two*theta)
+  return
+  end subroutine legendreP2_costheta
+  !=======================================================
+
+  ! from specfem3d_globe
+  ! convert x y z to r theta phi, real(kind=kreal) call
+  subroutine xyz2rthetaphi(x,y,z,r,theta,phi)
+  implicit none
+  real(kind=kreal),intent(in) :: x,y,z
+  real(kind=kreal),intent(out) :: r,theta,phi
+  real(kind=kreal) :: xmesh,ymesh,zmesh
+  ! small tolerance for conversion from x y z to r theta phi
+  real(kind=kreal),parameter :: SMALL_VAL_ANGLE = 1.0e-10_kreal,zero=0.0_kreal
+
+  xmesh = x
+  ymesh = y
+  zmesh = z
+
+  if (zmesh > -SMALL_VAL_ANGLE .and. zmesh <= ZERO) zmesh = -SMALL_VAL_ANGLE
+  if (zmesh < SMALL_VAL_ANGLE .and. zmesh >= ZERO) zmesh = SMALL_VAL_ANGLE
+
+  theta = atan2(sqrt(xmesh*xmesh+ymesh*ymesh),zmesh)
+  if (xmesh > -SMALL_VAL_ANGLE .and. xmesh <= ZERO) xmesh = -SMALL_VAL_ANGLE
+  if (xmesh < SMALL_VAL_ANGLE .and. xmesh >= ZERO) xmesh = SMALL_VAL_ANGLE
+
+  phi = atan2(ymesh,xmesh)
+
+  r = sqrt(xmesh*xmesh + ymesh*ymesh + zmesh*zmesh)
+  return
+  end subroutine xyz2rthetaphi
+
+  !=======================================================
+
+  ! from specfem3d_globe
+  ! convert r theta phi to x y z
+  subroutine rthetaphi2xyz(r,theta,phi,x,y,z)
+  implicit none
+  real(kind=kreal),intent(in) :: r,theta,phi
+  real(kind=kreal),intent(out) :: x,y,z
+
+  x = r*sin(theta)*cos(phi)
+  y = r*sin(theta)*sin(phi)
+  z = r*cos(theta)
+  return
+  end subroutine rthetaphi2xyz
+  !=======================================================
+
+  subroutine compute_g_gradg_elliptical(ndim,r,theta,phi,rho,dotrho,g0,eps,eta,twothirdOmega2,g,gradg)
+  implicit none
+  integer,intent(in) :: ndim
+  double precision,intent(in) :: r,theta,phi,rho,dotrho,g0,eps,eta,twothirdOmega2
+  double precision,intent(out) :: g(ndim)
+  double precision,optional,intent(out) :: gradg(6)
+
+  double precision,parameter :: two = 2.d0,four = 4.d0,two_third = two/3.d0
+  double precision :: hmat(ndim,ndim)
+  double precision :: lfac,rinv
+  double precision :: cotthetaXdP2,doteps,ddoteps,doteta,dotg
+  double precision :: facP2,P2,dP2,d2P2
+  double precision :: unitr(ndim,1),unittheta(ndim,1),unitphi(ndim,1)
+  double precision :: unitrT(1,ndim),unitthetaT(1,ndim),unitphiT(1,ndim)
+
+  ! compute unit vectors
+  call dspherical_unitvect(theta,phi,unitr,unittheta,unitphi)
+
+  unitrT(1,:)=unitr(:,1);
+  unitthetaT(1,:)=unittheta(:,1)
+  unitphiT(1,:)=unitphi(:,1)
+  call dlegendreP2_costheta(theta,P2,dP2,d2P2)
+  rinv = 1.d0/r
+
+  dotg = four*rho-two*rinv*g0 !dotg = four_pi_G*rho-two*rinv*g_ss
+  doteps = eta*eps*rinv !eta*eps/r
+  ddoteps=6.d0*rinv*rinv*eps-8.0d0*rho*(doteps+rinv*eps)/g0 !two*four
+  !doteta=doteps/eps-0.5_kreal*r0*rinv*rinv*doteps*doteps+r0*ddoteps/eps
+  !doteta=doteps/eps-r*doteps*doteps/(eps*eps)+r*ddoteps/eps
+  doteta = doteps/eps+r*(eps*ddoteps-doteps*doteps)/(eps*eps)
+  !cottheta=cos(theta)/sin(theta)
+  cotthetaXdp2=-3.d0*cos(theta)*cos(theta)
+
+  ! lfac=g_ss+two_third*(eta*eps*g_ss+four_pi_G*r0*rho*eps-eps*g_ss)*P2-two_third*Omega2*r0
+  lfac=g0+two_third*(eta*eps*g0+four*r*rho*eps-eps*g0)*P2-twothirdOmega2*r
+  ! compute g
+  g=-lfac*unitr(:,1)-two_third*eps*g0*dP2*unittheta(:,1)
+
+  if (.not.present(gradg))return
+
+  ! compute grad g
+  facP2=(doteta*eps*g0+eta*doteps*g0+eta*eps*dotg+four*(rho*eps+r*dotrho*eps+r*rho*doteps)-doteps*g0-eps*dotg)
+
+  !hmat=-(dotg+two_third*facP2*P2-two_third*Omega2)*matmul(unitr,unitrT)     &
+  !     -two_third*(doteps*g0+eps+dotg)*dP2*(matmul(unitr,unitthetaT)+matmul(unittheta,unitrT))     &
+  !     -rinv*(lfac-two_third*Omega2*r+two_third*rinv*eps*g0*d2p2)*matmul(unittheta,unitthetaT)  &
+  !     -rinv*(lfac-two_third*Omega2*r+two_third*rinv*eps*g0*cotthetaXdp2)*matmul(unitphi,unitphiT)
+  hmat=-(dotg+two_third*facP2*P2-twothirdOmega2)*matmul(unitr,unitrT)     &
+       -two_third*(doteps*g0+eps+dotg)*dP2*(matmul(unitr,unitthetaT)+matmul(unittheta,unitrT))     &
+       -rinv*(lfac+two_third*rinv*eps*g0*d2p2)*matmul(unittheta,unitthetaT)  &
+       -rinv*(lfac+two_third*rinv*eps*g0*cotthetaXdp2)*matmul(unitphi,unitphiT)
+
+  gradg=(/ hmat(1,1),hmat(2,2),hmat(3,3),hmat(1,2),hmat(1,3),hmat(2,3) /)
+  return
+  end subroutine compute_g_gradg_elliptical
+  !=======================================================
+
+  subroutine compute_g_gradg(ndim,r,theta,phi,rho,g0,g,gradg)
+  implicit none
+  integer,intent(in) :: ndim
+  double precision,intent(in) :: r,theta,phi,rho,g0
+  double precision,intent(out) :: g(ndim)
+  double precision,optional,intent(out) :: gradg(6)
+
+  double precision,parameter :: two = 2.d0,four = 4.d0
+  double precision :: hmat(ndim,ndim)
+  double precision :: lfac,rinv
+  double precision :: dotg
+  double precision :: ctheta,P2,dP2,d2P2
+  double precision :: unitr(ndim,1),unittheta(ndim,1),unitphi(ndim,1)
+  double precision :: unitrT(1,ndim),unitthetaT(1,ndim),unitphiT(1,ndim)
+
+  ! compute unit vectors
+  call dspherical_unitvect(theta,phi,unitr,unittheta,unitphi)
+
+  unitrT(1,:)=unitr(:,1);
+  unitthetaT(1,:)=unittheta(:,1)
+  unitphiT(1,:)=unitphi(:,1)
+
+  !call dlegendreP2_costheta(theta,P2,dP2,d2P2)
+
+  ctheta=cos(theta)
+  P2=0.5d0*(3.0d0*ctheta*ctheta-1.0d0)
+
+  rinv = 1.d0/r
+  dotg = four*rho-two*rinv*g0 !dotg = four_pi_G*rho-two*rinv*g_ss
+
+  lfac = g0
+  ! compute g
+  g=-lfac*unitr(:,1)
+
+  if (.not.present(gradg))return
+
+  ! compute grad g
+  hmat=-dotg*matmul(unitr,unitrT)-rinv*lfac*matmul(unittheta,unitthetaT)          &
+       -rinv*lfac*matmul(unitphi,unitphiT)
+
+  gradg=(/ hmat(1,1),hmat(2,2),hmat(3,3),hmat(1,2),hmat(1,3),hmat(2,3) /)
+  return
+  end subroutine compute_g_gradg
+  !=======================================================
+
+  ! computes the integral of f[r]*r[r]*r[r] from i=nir to i=ner for
+  ! radii values as in model PREM_an640
+  subroutine integrate_frr_consrho(intsum,ndis,kdis,nr,r,nir,ner,f)
+  implicit none
+  ! Argument variables
+  integer,intent(in) :: ndis,kdis(2*ndis),nr,ner,nir
+  real(kind=kreal),intent(in) :: f(nr),r(nr)
+  real(kind=kreal),intent(out) :: intsum
+  ! Local variables
+  real(kind=kreal),parameter :: half = 0.5_kreal
+  real(kind=kreal),parameter :: third = 1.0_kreal/3.0_kreal
+  real(kind=kreal),parameter :: fourth = 0.25_kreal
+  real(kind=kreal),parameter :: fifth = 0.2_kreal
+  real(kind=kreal),parameter :: sixth = 1.0_kreal/6.0_kreal
+  real(kind=kreal) :: rji,yprime(nr)
+  real(kind=kreal) :: s1l,s2l,s3l
+  integer :: i,i0,j !,kdis(28),n
+  integer :: nir1 !,ndis
+
+  nir1 = nir + 1
+  intsum = 0.0_kreal
+  do i = nir1,ner
+    i0 = i-1
+    intsum = intsum + third*f(i0)*(r(i)-r(i0))
+  enddo
+
+  end subroutine integrate_frr_consrho
+  !=======================================================
+
+  ! computes the integral of f[i]*r[i]*r[i] from i=nir to i=ner for
+  ! radii values as in model PREM_an640
+  subroutine integrate_frr(intsum,ndis,kdis,nr,r,nir,ner,f,s1,s2,s3)
+  implicit none
+  ! Argument variables
+  integer,intent(in) :: ndis,kdis(2*ndis),nr,ner,nir
+  real(kind=kreal),intent(in) :: f(nr),r(nr)
+  real(kind=kreal),intent(out) :: s1(nr),s2(nr),s3(nr),intsum
+  ! Local variables
+  real(kind=kreal),parameter :: half = 0.5_kreal
+  real(kind=kreal),parameter :: third = 1.0_kreal/3.0_kreal
+  real(kind=kreal),parameter :: fourth = 0.25_kreal
+  real(kind=kreal),parameter :: fifth = 0.2_kreal
+  real(kind=kreal),parameter :: sixth = 1.0_kreal/6.0_kreal
+  real(kind=kreal) :: rji,yprime(nr)
+  real(kind=kreal) :: s1l,s2l,s3l
+  integer :: i,j !,kdis(28),n
+  integer :: nir1 !,ndis
+  !data kdis /163,323,336,517,530,540,565,590,609,619,626,633,16*0/
+
+  !ndis = 12
+  !n = 640
+
+  call deriv(f,yprime,nr,r,ndis,kdis,s1,s2,s3)
+  nir1 = nir + 1
+  intsum = 0.0_kreal
+  do i = nir1,ner
+    j = i-1
+    rji = r(i) - r(j)
+    s1l = s1(j)
+    s2l = s2(j)
+    s3l = s3(j)
+    intsum = intsum + r(j)*r(j)*rji*(f(j) &
+             + rji*(half*s1l + rji*(third*s2l + rji*fourth*s3l))) &
+             + 2.0_kreal*r(j)*rji*rji*(half*f(j) + rji*(third*s1l + rji*(fourth*s2l + rji*fifth*s3l))) &
+             + rji*rji*rji*(third*f(j) + rji*(fourth*s1l + rji*(fifth*s2l + rji*sixth*s3l)))
+  enddo
+
+  end subroutine integrate_frr
+  !=======================================================
+
+  subroutine deriv(y,yprime,n,r,ndis,kdis,s1,s2,s3)
+  implicit none
+  ! Argument variables
+  integer,intent(in) :: n,ndis,kdis(2*ndis)
+  real(kind=kreal),intent(in) :: r(n),y(n)
+  real(kind=kreal),intent(out) :: s1(n),s2(n),s3(n)
+  real(kind=kreal),intent(out) :: yprime(n)
+  ! Local variables
+  integer i,j,j1,j2
+  integer k,nd,ndp
+  real(kind=kreal) a0,b0,b1
+  real(kind=kreal) f(3,1000),h,h2,h2a
+  real(kind=kreal) h2b,h3a,ha,s13
+  real(kind=kreal) s21,s32,yy(3)
+  real(kind=kreal),parameter :: two=2.0_kreal,three=3.0_kreal,zero=0.0_kreal
+  !print *,'N:',n,ndis,'k:',kdis
+  yy(1) = zero
+  yy(2) = zero
+  yy(3) = zero
+
+  ndp = ndis+1
+
+   do 3 nd = 1,ndp
+    if (nd == 1) goto 4
+    if (nd == ndp) goto 5
+    j1=kdis(nd-1)+1
+    j2=kdis(nd)-2
+    goto 6
+      4 j1 = 1
+    j2=kdis(1)-2
+    goto 6
+      5 j1=kdis(ndis)+1
+    j2 = n-2
+      6 if ((j2+1-j1) > 0) goto 11
+      !print *,'j1,j2:',j1,j2
+    j2 = j2+2
+    !print *,'nd:',nd,'j:',j1,j2
+    yy(1)=(y(j2)-y(j1))/(r(j2)-r(j1))
+    s1(j1)=yy(1)
+    s1(j2)=yy(1)
+    s2(j1)=yy(2)
+    s2(j2)=yy(2)
+    s3(j1)=yy(3)
+    s3(j2)=yy(3)
+    goto 3
+     11 a0 = 0.0d0
+    if (j1 == 1) goto 7
+    h = r(j1+1)-r(j1)
+    h2 = r(j1+2)-r(j1)
+    yy(1)=h*h2*(h2-h)
+    h = h*h
+    h2 = h2*h2
+    b0=(y(j1)*(h-h2)+y(j1+1)*h2-y(j1+2)*h)/yy(1)
+    goto 8
+   7 b0 = 0.0d0
+   8 b1 = b0
+
+    if (j2 > 1000) stop 'error in subroutine deriv for j2'
+
+    do i = j1,j2
+      h = r(i+1)-r(i)
+      yy(1)=y(i+1)-y(i)
+      h2 = h*h
+      ha = h-a0
+      h2a = h-2.0d0*a0
+      h3a = 2.0d0*h-3.0d0*a0
+      h2b = h2*b0
+      s1(i)=h2/ha
+      s2(i)=-ha/(h2a*h2)
+      s3(i)=-h*h2a/h3a
+      f(1,i)=(yy(1)-h*b0)/(h*ha)
+      f(2,i)=(h2b-yy(1)*(2.0d0*h-a0))/(h*h2*h2a)
+      f(3,i)=-(h2b-3.0d0*yy(1)*ha)/(h*h3a)
+      a0=s3(i)
+      b0=f(3,i)
+    enddo
+
+    i = j2+1
+    h = r(i+1)-r(i)
+    yy(1)=y(i+1)-y(i)
+    h2 = h*h
+    ha = h-a0
+    h2a = h*ha
+    h2b = h2*b0-yy(1)*(2.d0*h-a0)
+    s1(i)=h2/ha
+    f(1,i)=(yy(1)-h*b0)/h2a
+    ha = r(j2)-r(i+1)
+    yy(1)=-h*ha*(ha+h)
+    ha = ha*ha
+    yy(1)=(y(i+1)*(h2-ha)+y(i)*ha-y(j2)*h2)/yy(1)
+    s3(i)=(yy(1)*h2a+h2b)/(h*h2*(h-2.0d0*a0))
+    s13 = s1(i)*s3(i)
+    s2(i)=f(1,i)-s13
+
+    do j = j1,j2
+      k = i-1
+      s32 = s3(k)*s2(i)
+      s1(i)=f(3,k)-s32
+      s21 = s2(k)*s1(i)
+      s3(k)=f(2,k)-s21
+      s13 = s1(k)*s3(k)
+      s2(k)=f(1,k)-s13
+      i = k
+    enddo
+
+    s1(i)=b1
+    j2 = j2+2
+    s1(j2)=yy(1)
+    s2(j2)=yy(2)
+    s3(j2)=yy(3)
+   3 continue
+  !stop
+    do i = 1,n
+      yprime(i)=s1(i)
+    enddo
+  !print *,yprime; stop
+  !!ndloop: do nd=1,ndp
+  !!  if (nd == 1) then
+  !!    j1=1
+  !!    j2=kdis(1)-2
+  !!  else if (nd == ndp) then
+  !!    j1=kdis(ndis)+1
+  !!    j2=n-2
+  !!  else
+  !!    j1=kdis(nd-1)+1
+  !!    j2=kdis(nd)-2
+  !!  endif
+  !!  print *,'j1,j2:',j1,j2
+  !!  if ((j2+1-j1)>0) then
+  !!    a0=zero
+  !!  else
+  !!    j2=j2+2; print *,j1,j2; print *,(r(j2)-r(j1))
+  !!    yy(1)=(y(j2)-y(j1))/(r(j2)-r(j1))
+  !!    s1(j1)=yy(1)
+  !!    s1(j2)=yy(1)
+  !!    s2(j1)=yy(2)
+  !!    s2(j2)=yy(2)
+  !!    s3(j1)=yy(3)
+  !!    s3(j2)=yy(3)
+  !!    cycle ndloop
+  !!  endif
+
+  !  if (nd == 1) goto 4
+  !  if (nd == ndp) goto 5
+  !  j1=kdis(nd-1)+1
+  !  j2=kdis(nd)-2
+  !  goto 6
+  !    4 j1=1
+  !  j2=kdis(1)-2
+  !  goto 6
+  !    5 j1=kdis(ndis)+1
+  !  j2=n-2
+  !    6 if ((j2+1-j1)>0) goto 11
+  !  j2=j2+2
+  !  yy(1)=(y(j2)-y(j1))/(r(j2)-r(j1))
+  !  s1(j1)=yy(1)
+  !  s1(j2)=yy(1)
+  !  s2(j1)=yy(2)
+  !  s2(j2)=yy(2)
+  !  s3(j1)=yy(3)
+  !  s3(j2)=yy(3)
+  !  cycle ndloop !goto 3
+  !   11 a0=zero
+
+  !!  if (j1 == 1) then
+  !!    b0=zero
+  !!  else
+  !!    h=r(j1+1)-r(j1)
+  !!    h2=r(j1+2)-r(j1)
+  !!    yy(1)=h*h2*(h2-h)
+  !!    h=h*h
+  !!    h2=h2*h2
+  !!    b0=(y(j1)*(h-h2)+y(j1+1)*h2-y(j1+2)*h)/yy(1)
+  !!  endif
+  !!  b1=b0
+
+  !  if (j1 == 1) goto 7
+  !  h=r(j1+1)-r(j1)
+  !  h2=r(j1+2)-r(j1)
+  !  yy(1)=h*h2*(h2-h)
+  !  h=h*h
+  !  h2=h2*h2
+  !  b0=(y(j1)*(h-h2)+y(j1+1)*h2-y(j1+2)*h)/yy(1)
+  !  goto 8
+  ! 7 b0=zero
+  ! 8 b1=b0
+
+  !!  if (j2 > 1000) then
+  !!    write(*,*) 'ERROR: in subroutine deriv for j2!'
+  !!    stop
+  !!  endif
+
+  !!  do i=j1,j2
+  !!    h=r(i+1)-r(i)
+  !!    yy(1)=y(i+1)-y(i)
+  !!    h2=h*h
+  !!    ha=h-a0
+  !!    h2a=h-two*a0
+  !!    h3a=two*h-three*a0
+  !!    h2b=h2*b0
+  !!    s1(i)=h2/ha
+  !!    s2(i)=-ha/(h2a*h2)
+  !!    s3(i)=-h*h2a/h3a
+  !!    f(1,i)=(yy(1)-h*b0)/(h*ha)
+  !!    f(2,i)=(h2b-yy(1)*(two*h-a0))/(h*h2*h2a)
+  !!    f(3,i)=-(h2b-three*yy(1)*ha)/(h*h3a)
+  !!    a0=s3(i)
+  !!    b0=f(3,i)
+  !!  enddo
+
+  !! i=j2+1
+  !!  h=r(i+1)-r(i)
+  !!  yy(1)=y(i+1)-y(i)
+  !!  h2=h*h
+  !!  ha=h-a0
+  !!  h2a=h*ha
+  !!  h2b=h2*b0-yy(1)*(two*h-a0)
+  !!  s1(i)=h2/ha
+  !!  f(1,i)=(yy(1)-h*b0)/h2a
+  !!  ha=r(j2)-r(i+1)
+  !!  yy(1)=-h*ha*(ha+h)
+  !!  ha=ha*ha
+  !!  yy(1)=(y(i+1)*(h2-ha)+y(i)*ha-y(j2)*h2)/yy(1)
+  !!  s3(i)=(yy(1)*h2a+h2b)/(h*h2*(h-two*a0))
+  !!  s13=s1(i)*s3(i)
+  !!  s2(i)=f(1,i)-s13
+
+  !! do j=j1,j2
+  !!   k=i-1
+  !!   s32=s3(k)*s2(i)
+  !!   s1(i)=f(3,k)-s32
+  !!   s21=s2(k)*s1(i)
+  !!   s3(k)=f(2,k)-s21
+  !!   s13=s1(k)*s3(k)
+  !!   s2(k)=f(1,k)-s13
+  !!   i=k
+  !! enddo
+
+  !!  s1(i)=b1
+  !!  j2=j2+2
+  !!  s1(j2)=yy(1)
+  !!  s2(j2)=yy(2)
+  !!  s3(j2)=yy(3)
+  !!enddo ndloop
+  !!!3 continue
+  !!!stop
+  !!do i=1,n
+  !!  yprime(i)=s1(i)
+  !!enddo
+  return
+  end subroutine deriv
+  !=======================================================
+
+  ! compute spline coefficients
+  subroutine spline_construction(xp,yp,np,tp1,tpn,coef)
+  implicit none
+  ! tangent to the spline imposed at the first and last points
+  real(kind=kreal), intent(in) :: tp1,tpn
+  ! number of input points and coordinates of the input points
+  integer, intent(in) :: np
+  real(kind=kreal), dimension(np), intent(in) :: xp,yp
+  ! spline coefficients output by the routine
+  real(kind=kreal),dimension(np), intent(out) :: coef
+  integer :: i
+  real(kind=kreal),dimension(:), allocatable :: temp_array
+  real(kind=kreal),parameter :: one=1.0_kreal,two=2.0_kreal,three=3.0_kreal
+
+  allocate(temp_array(np))
+  coef(1) = - one/two
+  temp_array(1) = (three/(xp(2)-xp(1)))*((yp(2)-yp(1))/(xp(2)-xp(1))-tp1)
+
+  do i = 2,np-1
+    coef(i) = ((xp(i)-xp(i-1))/(xp(i+1)-xp(i-1))-one) &
+      / ((xp(i)-xp(i-1))/(xp(i+1)-xp(i-1))*coef(i-1)+two)
+
+    temp_array(i) = (6.0_kreal*((yp(i+1)-yp(i))/(xp(i+1)-xp(i)) &
+      - (yp(i)-yp(i-1))/(xp(i)-xp(i-1)))/(xp(i+1)-xp(i-1)) &
+      - (xp(i)-xp(i-1))/(xp(i+1)-xp(i-1))*temp_array(i-1)) &
+      / ((xp(i)-xp(i-1))/(xp(i+1)-xp(i-1))*coef(i-1)+two)
+
+  enddo
+  coef(np) = ((three/(xp(np)-xp(np-1))) &
+    * (tpn-(yp(np)-yp(np-1))/(xp(np)-xp(np-1))) &
+    - one/two*temp_array(np-1))/(one/two*coef(np-1)+one)
+
+  do i = np-1,1,-1
+    coef(i) = coef(i)*coef(i+1) + temp_array(i)
+  enddo
+  deallocate(temp_array)
+  return
+  end subroutine spline_construction
+  !=======================================================
+
+  ! evaluate a spline
+  subroutine spline_evaluation(xp,yp,coef,np,x,y)
+  implicit none
+  ! number of input points and coordinates of the input points
+  integer,intent(in) :: np
+  real(kind=kreal),dimension(np),intent(in) :: xp,yp
+  ! spline coefficients to use
+  real(kind=kreal),dimension(np),intent(in) :: coef
+  ! abscissa at which we need to evaluate the value of the spline
+  real(kind=kreal),intent(in) :: x
+  ! ordinate evaluated by the routine for the spline at this abscissa
+  real(kind=kreal),intent(out) :: y
+  integer :: iloop,ilower,ihigher
+  real(kind=kreal) :: coef1,coef2
+
+  ! initialize to the whole interval
+  ilower = 1
+  ihigher = np
+  ! determine the right interval to use, by dichotomy
+  do while(ihigher-ilower > 1)
+    ! compute the middle of the interval
+    iloop = (ihigher+ilower)/2
+    if (xp(iloop) > x) then
+      ihigher = iloop
+    else
+      ilower = iloop
+    endif
+  enddo
+
+  ! test that the interval obtained does not have a size of zero
+  ! (this could happen for instance in the case of duplicates in the input list of points)
+  if (xp(ihigher) == xp(ilower)) stop 'incorrect interval found in spline evaluation'
+
+  coef1 = (xp(ihigher)-x)/(xp(ihigher)-xp(ilower))
+  coef2 = (x-xp(ilower))/(xp(ihigher)-xp(ilower))
+
+  y = coef1*yp(ilower)+coef2*yp(ihigher)+ &
+    ((coef1**3-coef1)*coef(ilower)+ &
+    (coef2**3-coef2)*coef(ihigher))*((xp(ihigher)-xp(ilower))**2)/6.0_kreal
+
+  return
+  end subroutine spline_evaluation
+
+end module math_library
+
+
+!
+!=====================================================================
+!
+
+! MPI math library
+module math_library_mpi
+
+  use mpi
+  use constants_solver, only: CUSTOM_REAL
+  implicit none
+  include "precision.h"
+  integer,parameter :: kreal = CUSTOM_REAL
+  integer,parameter :: MPI_KREAL = CUSTOM_MPI_TYPE!MPI_DOUBLE_PRECISION
+
+  private :: iminscal,fminscal
+  private :: iminvec,fminvec
+  private :: isumscal,fsumscal
+  private :: imaxscal,fmaxscal
+  private :: imaxvec,fmaxvec
+
+  ! global sum of a scalar in all processors
+  interface sumscal
+    module procedure isumscal
+    module procedure fsumscal
+  end interface
+
+  ! global maximum of a scalar in all processors
+  interface minscal
+    module procedure iminscal
+    module procedure fminscal
+  end interface
+
+  ! global maximum of a scalar in all processors
+  interface maxscal
+    module procedure imaxscal
+    module procedure fmaxscal
+  end interface
+
+  ! global maximum of a vector in all processors
+  interface maxvec
+    module procedure imaxvec
+    module procedure fmaxvec
+  end interface
+
+  ! global minimum of a scalar in all processors
+  interface minvec
+    module procedure iminvec
+    module procedure fminvec
+  end interface
+
+contains
+
+  function iminscal(scal) result(gmin)
+  !
+  ! this finds a global minimum of a scalar across the processors
+  !
+  implicit none
+  integer,intent(in)::scal
+  integer :: gmin
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gmin,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ierr)
+
+  return
+  end function iminscal
+  !=======================================================
+
+  function fminscal(scal) result(gmin)
+  !
+  ! this finds a global minimum of a scalar across the processors
+  !
+  implicit none
+  real(kind=kreal),intent(in)::scal
+  real(kind=kreal) :: gmin
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gmin,1,MPI_KREAL,MPI_MIN,MPI_COMM_WORLD,ierr)
+
+  return
+  end function fminscal
+  !=======================================================
+
+  function imaxscal(scal) result(gmax)
+  !
+  ! this finds a global maximum of a scalar across the processors
+  !
+  implicit none
+  integer,intent(in)::scal
+  integer :: gmax
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gmax,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ierr)
+
+  return
+  end function imaxscal
+  !=======================================================
+
+  function fmaxscal(scal) result(gmax)
+  !
+  ! this finds a global maximum of a scalar across the processors
+  !
+  implicit none
+  real(kind=kreal),intent(in)::scal
+  real(kind=kreal) :: gmax
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gmax,1,MPI_KREAL,MPI_MAX,MPI_COMM_WORLD,ierr)
+
+  return
+  end function fmaxscal
+  !=======================================================
+
+  function imaxvec(vec) result(gmax)
+  implicit none
+  integer,intent(in)::vec(:)
+  integer :: lmax,gmax ! local and global
+  integer :: ierr
+
+  lmax=maxval(vec)
+
+  call MPI_ALLREDUCE(lmax,gmax,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ierr)
+
+  return
+  end function imaxvec
+  !=======================================================
+
+  function fmaxvec(vec) result(gmax)
+  implicit none
+  real(kind=kreal),intent(in)::vec(:)
+  real(kind=kreal) :: lmax,gmax ! local and global
+  integer :: ierr
+
+  lmax=maxval(vec)
+
+  call MPI_ALLREDUCE(lmax,gmax,1,MPI_KREAL,MPI_MAX,MPI_COMM_WORLD,ierr)
+
+  return
+  end function fmaxvec
+  !=======================================================
+
+  function iminvec(vec) result(gmin)
+  implicit none
+  integer,intent(in)::vec(:)
+  integer :: lmin,gmin ! local and global
+  integer :: ierr
+
+  lmin=minval(vec)
+
+  call MPI_ALLREDUCE(lmin,gmin,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ierr)
+
+  return
+  end function iminvec
+  !=======================================================
+
+  function fminvec(vec) result(gmin)
+  implicit none
+  real(kind=kreal),intent(in)::vec(:)
+  real(kind=kreal) :: lmin,gmin ! local and global
+  integer :: ierr
+
+  lmin=minval(vec)
+
+  call MPI_ALLREDUCE(lmin,gmin,1,MPI_KREAL,MPI_MIN,MPI_COMM_WORLD,ierr)
+
+  return
+  end function fminvec
+  !=======================================================
+
+  function isumscal(scal) result(gsum)
+  !
+  ! this finds a global summation of a scalar across the processors
+  !
+  implicit none
+  integer,intent(in)::scal
+  integer :: gsum
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gsum,1,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,ierr)
+
+  return
+  end function isumscal
+  !=======================================================
+
+  function fsumscal(scal) result(gsum)
+  !
+  ! this finds a global summation of a scalar across the processors
+  !
+  implicit none
+  real(kind=kreal),intent(in)::scal
+  real(kind=kreal) :: gsum
+  integer :: ierr
+
+  call MPI_ALLREDUCE(scal,gsum,1,MPI_KREAL,MPI_SUM,MPI_COMM_WORLD,ierr)
+
+  return
+  end function fsumscal
+  !=======================================================
+
+  function dot_product_par(vec1,vec2) result(gdot)
+  !
+  ! this finds global dot product of two vectors across the processors
+  !
+  implicit none
+  real(kind=kreal),intent(in)::vec1(:),vec2(:)
+  real(kind=kreal) :: ldot,gdot
+  integer :: ierr
+
+  ! find local dot
+  ldot=dot_product(vec1,vec2)
+  call MPI_ALLREDUCE(ldot,gdot,1,MPI_KREAL,MPI_SUM,MPI_COMM_WORLD,ierr)
+
+  return
+  end function dot_product_par
+
+end module math_library_mpi
+
+
+
+! NOTES:
+!  - gll_points not needed can be removed
+!  - can be make faster using orthoginality of shape functions
+! this module contains routines to compute Gauss-Legendre-Lobatto quadrature
+! REVISION
+!   HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module gll_library1
+
+  integer,parameter :: kdble=selected_real_kind(15) ! double precision
+
+  private :: endw1,endw2,gammaf,jacg
+
+contains
+
+! this subroutines computes derivatives of the shape fucntions at gll
+! points. the 8-noded hexahedra is conformed to the exodus/cubit numbering
+! convention
+
+  subroutine dshape_function_hex8(ndim,ngnod,ngllx,nglly,ngllz,ngll,xigll,etagll, &
+                                  zetagll,dshape_hex8)
+
+  implicit none
+  integer,intent(in) :: ndim,ngnod,ngllx,nglly,ngllz,ngll
+
+  ! gauss-lobatto-legendre points of integration
+  real(kind=kdble),intent(in) :: xigll(ngllx),etagll(nglly),zetagll(ngllz)
+
+  ! derivatives of the 3d shape functions
+  real(kind=kdble),intent(out) :: dshape_hex8(ndim,ngnod,ngll)
+
+  integer :: i,j,k,i_gnod,igll
+
+  ! location of the nodes of the 3d quadrilateral elements
+  !real(kind=kdble) :: xi,eta,gamma
+  real(kind=kdble) :: xip,xim,etap,etam,zetap,zetam
+
+  ! for checking the 3d shape functions
+  real(kind=kdble) :: sum_dshapexi,sum_dshapeeta,sum_dshapezeta
+
+  real(kind=kdble),parameter :: one=1.0_kdble,one_eighth = 0.125_kdble, &
+  zero = 0.0_kdble,zerotol = 1.0e-12_kdble !1.0e-12_kdble WARNING: please correct immediately
+
+  ! check that the parameter file is correct
+  if (ngnod /= 8) then
+    write(*,*) 'ERROR: elements must have 8 geometrical nodes!'
+    stop
+  endif
+  !print *,'after:',ndim,ngnod,ngll
+  ! compute the derivatives of 3d shape functions
+  !dshape_hex8=zero
+  igll = 0
+  do k = 1,ngllz
+    zetap = one + zetagll(k)
+    zetam = one - zetagll(k)
+    do j = 1,nglly
+      etap = one + etagll(j)
+      etam = one - etagll(j)
+      do i = 1,ngllx
+        igll = igll+1
+
+        !xi = xigll(i)
+        !eta = etagll(j)
+        !gamma = zetagll(k)
+
+        xip = one + xigll(i)
+        xim = one - xigll(i)
+
+        dshape_hex8(1,1,igll) = - one_eighth*etam*zetam
+        dshape_hex8(1,2,igll) = one_eighth*etam*zetam
+        dshape_hex8(1,3,igll) = one_eighth*etap*zetam
+        dshape_hex8(1,4,igll) = - one_eighth*etap*zetam
+        dshape_hex8(1,5,igll) = - one_eighth*etam*zetap
+        dshape_hex8(1,6,igll) = one_eighth*etam*zetap
+        dshape_hex8(1,7,igll) = one_eighth*etap*zetap
+        dshape_hex8(1,8,igll) = - one_eighth*etap*zetap
+
+        dshape_hex8(2,1,igll) = - one_eighth*xim*zetam
+        dshape_hex8(2,2,igll) = - one_eighth*xip*zetam
+        dshape_hex8(2,3,igll) = one_eighth*xip*zetam
+        dshape_hex8(2,4,igll) = one_eighth*xim*zetam
+        dshape_hex8(2,5,igll) = - one_eighth*xim*zetap
+        dshape_hex8(2,6,igll) = - one_eighth*xip*zetap
+        dshape_hex8(2,7,igll) = one_eighth*xip*zetap
+        dshape_hex8(2,8,igll) = one_eighth*xim*zetap
+
+        dshape_hex8(3,1,igll) = - one_eighth*xim*etam
+        dshape_hex8(3,2,igll) = - one_eighth*xip*etam
+        dshape_hex8(3,3,igll) = - one_eighth*xip*etap
+        dshape_hex8(3,4,igll) = - one_eighth*xim*etap
+        dshape_hex8(3,5,igll) = one_eighth*xim*etam
+        dshape_hex8(3,6,igll) = one_eighth*xip*etam
+        dshape_hex8(3,7,igll) = one_eighth*xip*etap
+        dshape_hex8(3,8,igll) = one_eighth*xim*etap
+
+      enddo
+    enddo
+  enddo
+
+  ! check the shape functions and their derivatives
+
+  do i = 1,ngll
+        sum_dshapexi = zero
+        sum_dshapeeta = zero
+        sum_dshapezeta = zero
+
+        do i_gnod = 1,ngnod
+          sum_dshapexi = sum_dshapexi + dshape_hex8(1,i_gnod,i)
+          sum_dshapeeta = sum_dshapeeta + dshape_hex8(2,i_gnod,i)
+          sum_dshapezeta = sum_dshapezeta + dshape_hex8(3,i_gnod,i)
+          !print *,sum_dshapexi,sum_dshapeeta,sum_dshapezeta
+        enddo
+
+        ! sum of derivative of shape functions should be zero
+        if (abs(sum_dshapexi) > zerotol) then
+          write(*,*) 'ERROR: derivative xi shape functions!'
+          stop
+        endif
+        if (abs(sum_dshapeeta) > zerotol) then
+          write(*,*) 'ERROR: derivative eta shape functions!'
+          stop
+        endif
+        if (abs(sum_dshapezeta) > zerotol) then
+          write(*,*) 'ERROR: derivative gamma shape functions!'
+          print *,ngllx,nglly,ngllz,sum_dshapexi,sum_dshapeeta,sum_dshapezeta,zerotol
+          print *,xigll
+          print *,etagll
+          print *,zetagll
+          if (all(xigll == etagll))print *,'yes0'
+          if (all(xigll == zetagll))print *,'yes1'
+          stop
+        endif
+  enddo
+
+  end subroutine dshape_function_hex8
+
+!
+!=======================================================
+!
+
+! This subroutines computes derivatives of the shape fucntions at given point.
+! the 8-noded hexahedra is conformed to the exodus/cubit numbering
+! convention
+
+!NOTE: dimension of dshape_hex8 is (ngnod,3) NOT (3,ngnode)
+
+  subroutine dshape_function_hex8_point(ngnod,xi,eta,zeta,dshape_hex8)
+
+  implicit none
+  integer,intent(in) :: ngnod
+
+  ! given point
+  double precision :: xi
+  double precision :: eta
+  double precision :: zeta
+
+  ! derivatives of the 3d shape functions
+  double precision :: dshape_hex8(ngnod,3)
+
+  integer :: i_gnod
+
+  double precision :: xip,xim,etap,etam,zetap,zetam
+
+  ! for checking the 3d shape functions
+  double precision :: sum_dshapexi,sum_dshapeeta,sum_dshapezeta
+
+  real(kind=kdble),parameter :: one=1.0_kdble,one_eighth = 0.125_kdble, &
+  zero = 0.0_kdble,zerotol = 1.0e-12_kdble !1.0e-12_kdble WARNING: please correct immediately
+
+  ! check that the parameter file is correct
+  if (ngnod /= 8) then
+    write(*,*) 'ERROR: elements must have 8 geometrical nodes!'
+    stop
+  endif
+
+  ! compute the derivatives of 3d shape functions
+  zetap = one + zeta
+  zetam = one - zeta
+  etap =  one + eta
+  etam =  one - eta
+  xip =   one + xi
+  xim =   one - xi
+
+  dshape_hex8 = zero
+
+  dshape_hex8(1,1) = - one_eighth*etam*zetam
+  dshape_hex8(2,1) = one_eighth*etam*zetam
+  dshape_hex8(3,1) = one_eighth*etap*zetam
+  dshape_hex8(4,1) = - one_eighth*etap*zetam
+  dshape_hex8(5,1) = - one_eighth*etam*zetap
+  dshape_hex8(6,1) = one_eighth*etam*zetap
+  dshape_hex8(7,1) = one_eighth*etap*zetap
+  dshape_hex8(8,1) = - one_eighth*etap*zetap
+
+  dshape_hex8(1,2) = - one_eighth*xim*zetam
+  dshape_hex8(2,2) = - one_eighth*xip*zetam
+  dshape_hex8(3,2) = one_eighth*xip*zetam
+  dshape_hex8(4,2) = one_eighth*xim*zetam
+  dshape_hex8(5,2) = - one_eighth*xim*zetap
+  dshape_hex8(6,2) = - one_eighth*xip*zetap
+  dshape_hex8(7,2) = one_eighth*xip*zetap
+  dshape_hex8(8,2) = one_eighth*xim*zetap
+
+  dshape_hex8(1,3) = - one_eighth*xim*etam
+  dshape_hex8(2,3) = - one_eighth*xip*etam
+  dshape_hex8(3,3) = - one_eighth*xip*etap
+  dshape_hex8(4,3) = - one_eighth*xim*etap
+  dshape_hex8(5,3) = one_eighth*xim*etam
+  dshape_hex8(6,3) = one_eighth*xip*etam
+  dshape_hex8(7,3) = one_eighth*xip*etap
+  dshape_hex8(8,3) = one_eighth*xim*etap
+
+  ! check the shape functions and their derivatives
+  sum_dshapexi = zero
+  sum_dshapeeta = zero
+  sum_dshapezeta = zero
+
+  do i_gnod = 1,ngnod
+    sum_dshapexi = sum_dshapexi + dshape_hex8(i_gnod,1)
+    sum_dshapeeta = sum_dshapeeta + dshape_hex8(i_gnod,2)
+    sum_dshapezeta = sum_dshapezeta + dshape_hex8(i_gnod,3)
+  enddo
+
+  ! sum of derivative of shape functions should be zero
+  if (abs(sum_dshapexi) > zerotol) then
+    write(*,*) 'ERROR: derivative xi shape functions!'
+    stop
+  endif
+  if (abs(sum_dshapeeta) > zerotol) then
+    write(*,*) 'ERROR: derivative eta shape functions!'
+    stop
+  endif
+  if (abs(sum_dshapezeta) > zerotol) then
+    write(*,*) 'ERROR: derivative gamma shape functions!'
+    stop
+  endif
+
+  end subroutine dshape_function_hex8_point
+
+!
+!===============================================================================
+!
+
+! this subroutine computes GLL quadrature points and weights for 3D
+
+  subroutine gll_quadrature(ndim,ngllx,nglly,ngllz,ngll,gll_points,gll_weights, &
+                            lagrange_gll,dlagrange_gll)
+
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngllz,ngll
+  real(kind=kdble),dimension(ngll),intent(out) :: gll_weights
+  real(kind=kdble),dimension(ndim,ngll),intent(out) :: gll_points
+  real(kind=kdble),dimension(ngll,ngll),intent(out) :: lagrange_gll
+  real(kind=kdble),dimension(ndim,ngll,ngll),intent(out) :: dlagrange_gll
+  !real(kind=kdble),dimension(ngll,ngll) :: flagrange_gll
+  !real(kind=kdble),dimension(ndim,ngll,ngll) ::fdlagrange_gll
+  real(kind=kdble),parameter :: zero=0.0_kdble,one=1.0_kdble
+  real(kind=kdble),parameter :: jacobi_alpha=zero,jacobi_beta=zero
+  integer :: i,j,k,n,ngllxy
+  integer :: ip,ipx,ipy,ipz ! integration points
+  integer :: inpy,inpz
+  integer :: np,npx,npy,npz ! interpolation function points
+  real(kind=kdble) :: xi,eta,zeta
+  real(kind=kdble),dimension(ngllx) :: gllpx,gllwx ! GLL points and weights
+  real(kind=kdble),dimension(nglly) :: gllpy,gllwy ! GLL points and weights
+  real(kind=kdble),dimension(ngllz) :: gllpz,gllwz ! GLL points and weights
+  real(kind=kdble),dimension(ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(nglly) :: lagrange_y,lagrange_dy
+  real(kind=kdble),dimension(ngllz) :: lagrange_z,lagrange_dz
+
+  ! compute everything in indexed order
+
+  ! get GLL points
+  ! for alpha=beta=0, jacobi polynomial is legendre polynomial
+  ! for ngllx=nglly=ngllz=ngll, need to call only once
+  call zwgljd(gllpx,gllwx,ngllx,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpy,gllwy,nglly,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpz,gllwz,ngllz,jacobi_alpha,jacobi_beta)
+
+  ip = 0
+  do ipz = 1,ngllz
+    do ipy = 1,nglly
+      do ipx = 1,ngllx
+        ip = ip+1
+        ! integration points
+        gll_points(1,ip)=gllpx(ipx)
+        gll_points(2,ip)=gllpy(ipy)
+        gll_points(3,ip)=gllpz(ipz)
+
+        ! integration weights
+        gll_weights(ip)=gllwx(ipx)*gllwy(ipy)*gllwz(ipz)
+      enddo
+    enddo
+  enddo
+
+  !! faster approach
+  !ngllxy=ngllx*nglly
+  !flagrange_gll=zero
+  !fdlagrange_gll=zero
+  !ip=0
+  !! do ii=1,ngll ! ngllx*nglly*ngllz
+  !do ipz=1,ngllz
+  !  do ipy=1,nglly
+  !    do ipx=1,ngllx
+  !      ip=ip+1
+  !      ! integration point
+  !      xi=gll_points(1,ip)
+  !      eta=gll_points(2,ip)
+  !      zeta=gll_points(3,ip)
+
+  !      ! interpolation function is orthogonal
+  !      flagrange_gll(ip,ip)=one
+
+  !      ! compute 1d lagrange polynomials on GLL points
+  !      ! this can also be computed in a simple manner due to the orthogonality
+  !      call lagrange1dGLL(ngllx,gllpx,xi,lagrange_x,lagrange_dx)
+  !      call lagrange1dGLL(nglly,gllpy,eta,lagrange_y,lagrange_dy)
+  !      call lagrange1dGLL(ngllz,gllpz,zeta,lagrange_z,lagrange_dz)
+
+  !      ! derivatives are nonzeros only on the lines intersecting the integration
+  !      ! point
+
+  !      ! x line
+  !      inpz=ngllxy*(ipz-1); inpy=ngllx*(ipy-1)
+  !      do npx=1,ngllx
+  !        np=inpz+inpy+npx
+  !        fdlagrange_gll(1,ip,np)=lagrange_dx(npx)
+  !      enddo
+
+  !      ! y line
+  !      !inpz=ngllxy*(ipz-1) ! this was computed just above
+  !      do npy=1,nglly
+  !        np=inpz+ngllx*(npy-1)+ipx
+  !        fdlagrange_gll(2,ip,np)=lagrange_dy(npy)
+  !      enddo
+
+  !      ! z line
+  !      inpy=ngllx*(ipy-1)
+  !      do npz=1,ngllz
+  !        np=ngllxy*(npz-1)+inpy+ipx
+  !        fdlagrange_gll(3,ip,np)=lagrange_dz(npz)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! easier and general approach
+  do ip = 1,ngll ! ngllx*nglly*ngllz
+    xi=gll_points(1,ip)
+    eta=gll_points(2,ip)
+    zeta=gll_points(3,ip)
+
+    ! compute 1d lagrange polynomials on GLL points
+    ! this can also be computed in a simple manner due to the orthogonality as
+    ! above
+    call lagrange1dGLL(ngllx,gllpx,xi,lagrange_x,lagrange_dx)
+    call lagrange1dGLL(nglly,gllpy,eta,lagrange_y,lagrange_dy)
+    call lagrange1dGLL(ngllz,gllpz,zeta,lagrange_z,lagrange_dz)
+    n = 0
+    do k = 1,ngllz
+      do j = 1,nglly
+        do i = 1,ngllx
+          n = n+1
+          lagrange_gll(ip,n)=lagrange_x(i)*lagrange_y(j)*lagrange_z(k)
+          dlagrange_gll(1,ip,n)=lagrange_dx(i)*lagrange_y(j)*lagrange_z(k)
+          dlagrange_gll(2,ip,n)=lagrange_x(i)*lagrange_dy(j)*lagrange_z(k)
+          dlagrange_gll(3,ip,n)=lagrange_x(i)*lagrange_y(j)*lagrange_dz(k)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  !print *,maxval(abs(lagrange_gll-flagrange_gll))
+  !print *,maxval(abs(dlagrange_gll-fdlagrange_gll))
+  !stop
+  return
+  end subroutine gll_quadrature
+
+!
+!===========================================
+!
+
+! this subroutine computes lagrange polynomials and their derivatives defined on
+! GLL points at an arbitrary point
+
+  subroutine gll_lagrange3d_point(ndim,ngllx,nglly,ngllz,ngll,xi,eta,zeta, &
+                                  lagrange_gll,dlagrange_gll)
+
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngllz,ngll
+  real(kind=kdble),intent(in) :: xi,eta,zeta
+  real(kind=kdble),dimension(ngll),intent(out) :: lagrange_gll
+  real(kind=kdble),dimension(ndim,ngll),intent(out) :: dlagrange_gll
+
+  integer :: i,j,k,n
+  real(kind=kdble),dimension(ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(nglly) :: lagrange_y,lagrange_dy
+  real(kind=kdble),dimension(ngllz) :: lagrange_z,lagrange_dz
+
+  ! compute 1d lagrange polynomials
+  call lagrange1d(ngllx,xi,lagrange_x,lagrange_dx)
+  call lagrange1d(nglly,eta,lagrange_y,lagrange_dy)
+  call lagrange1d(ngllz,zeta,lagrange_z,lagrange_dz)
+
+  n = 0
+  do k = 1,ngllz
+    do j = 1,nglly
+      do i = 1,ngllx
+        n = n+1
+        lagrange_gll(n)=lagrange_x(i)*lagrange_y(j)*lagrange_z(k)
+        dlagrange_gll(1,n)=lagrange_dx(i)*lagrange_y(j)*lagrange_z(k)
+        dlagrange_gll(2,n)=lagrange_x(i)*lagrange_dy(j)*lagrange_z(k)
+        dlagrange_gll(3,n)=lagrange_x(i)*lagrange_y(j)*lagrange_dz(k)
+      enddo
+    enddo
+  enddo
+
+  return
+  end subroutine gll_lagrange3d_point
+
+!
+!===========================================
+!
+
+! subroutine below is only applicable ngllx=nglly=ngllz and NGLLX_INF=NGLLY_INF=NGLLZ_INF=3
+! this subroutine computes GLL quadrature points and weights for 3D
+
+  subroutine gll_quadrature3inNGLL(ndim,ngllx,ngll,gllpx,gllpx1,lagrange_gll)
+
+  implicit none
+  integer,intent(in) :: ndim,ngllx,ngll
+  real(kind=kdble),intent(in) :: gllpx(ngllx),gllpx1(3)
+  real(kind=kdble),dimension(ngll,27),intent(out) :: lagrange_gll
+
+  real(kind=kdble),parameter :: zero=0.0_kdble,one=1.0_kdble
+  integer :: i,j,k
+  integer :: ip,ipx,ipy,ipz ! integration points
+  integer :: np ! interpolation function points
+  real(kind=kdble) :: xi,eta,zeta
+  real(kind=kdble),dimension(ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(ngllx) :: lagrange_y,lagrange_dy
+  real(kind=kdble),dimension(ngllx) :: lagrange_z,lagrange_dz
+
+  real(kind=kdble),dimension(ndim,ngll) :: gll_points
+
+  ! compute everything in indexed order
+  ip = 0
+  do ipz = 1,ngllx
+    do ipy = 1,ngllx
+      do ipx = 1,ngllx
+        ip = ip+1;
+        ! integration points
+        gll_points(1,ip)=gllpx(ipx)
+        gll_points(2,ip)=gllpx(ipy)
+        gll_points(3,ip)=gllpx(ipz)
+      enddo
+    enddo
+  enddo
+
+  ! easier and general approach
+  do ip = 1,ngll ! ngllx*nglly*ngllz
+    xi=gll_points(1,ip)
+    eta=gll_points(2,ip)
+    zeta=gll_points(3,ip)
+
+    ! compute 1d lagrange polynomials on GLL points
+    ! this can also be computed in a simple manner due to the orthogonality as
+    ! above
+    call lagrange1dGLL(3,gllpx1,xi,lagrange_x,lagrange_dx)
+    call lagrange1dGLL(3,gllpx1,eta,lagrange_y,lagrange_dy)
+    call lagrange1dGLL(3,gllpx1,zeta,lagrange_z,lagrange_dz)
+    np = 0
+    do k = 1,3
+      do j = 1,3
+        do i = 1,3
+          np = np+1
+          lagrange_gll(ip,np)=lagrange_x(i)*lagrange_y(j)*lagrange_z(k)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  return
+  end subroutine gll_quadrature3inNGLL
+
+!
+!===========================================
+!
+
+! this subroutine computes GLL quadrature points and weights for 2D
+
+  subroutine gll_quadrature2d(ndim,ngllx,nglly,ngll,gll_points2d,gll_weights2d, &
+                              lagrange_gll2d,dlagrange_gll2d)
+  implicit none
+  integer,intent(in) :: ndim,ngllx,nglly,ngll
+  real(kind=kdble),dimension(ngll),intent(out) :: gll_weights2d
+  real(kind=kdble),dimension(2,ngll),intent(out) :: gll_points2d
+  real(kind=kdble),dimension(ngll,ngll),intent(out) :: lagrange_gll2d
+  real(kind=kdble),dimension(ndim,ngll,ngll),intent(out) :: dlagrange_gll2d
+
+  real(kind=kdble),parameter :: jacobi_alpha=0.0_kdble,jacobi_beta=0.0_kdble
+  integer :: i,ii,j,k,n
+  real(kind=kdble) :: xi,eta !,zeta
+  real(kind=kdble),dimension(ngllx) :: gllpx,gllwx ! GLL points and weights
+  real(kind=kdble),dimension(nglly) :: gllpy,gllwy ! GLL points and weights
+  real(kind=kdble),dimension(ngllx) :: lagrange_x,lagrange_dx
+  real(kind=kdble),dimension(nglly) :: lagrange_y,lagrange_dy
+
+  ! compute everything in indexed order
+
+  ! GLL points and weights (source: http://mathworld.wolfram.com/lobattoquadrature.html)
+  !gllp(1)=-1.0_kdble   ; gllw(1)=1.0_kdble/3.0_kdble
+  !gllp(2)= 0.0_kdble   ; gllw(2)=4.0_kdble/3.0_kdble
+  !gllp(3)= 1.0_kdble   ; gllw(3)=gllw(1)
+
+  ! get GLL points
+  ! for alpha=beta=0, jacobi polynomial is legendre polynomial
+  ! for ngllx=nglly=ngllz=ngll, need to call only once
+  call zwgljd(gllpx,gllwx,ngllx,jacobi_alpha,jacobi_beta)
+  call zwgljd(gllpy,gllwy,nglly,jacobi_alpha,jacobi_beta)
+
+  n = 0
+  do j = 1,nglly
+    do i = 1,ngllx
+      n = n+1
+      ! integration points
+      gll_points2d(1,n)=gllpx(i)
+      gll_points2d(2,n)=gllpy(j)
+
+      ! integration weights
+      gll_weights2d(n)=gllwx(i)*gllwy(j)
+    enddo
+  enddo
+
+  do ii = 1,ngll ! ngllx*nglly
+    xi=gll_points2d(1,ii)
+    eta=gll_points2d(2,ii)
+
+    ! compute 1d lagrange polynomials
+    ! this can also be computed in a simple manner due to the orthogonality
+    call lagrange1dGLL(ngllx,gllpx,xi,lagrange_x,lagrange_dx)
+    call lagrange1dGLL(nglly,gllpy,eta,lagrange_y,lagrange_dy)
+    n = 0
+    do j = 1,nglly
+      do i = 1,ngllx
+        n = n+1
+        lagrange_gll2d(ii,n)=lagrange_x(i)*lagrange_y(j)
+        dlagrange_gll2d(1,ii,n)=lagrange_dx(i)*lagrange_y(j)
+        dlagrange_gll2d(2,ii,n)=lagrange_x(i)*lagrange_dy(j)
+      enddo
+    enddo
+  enddo
+
+  return
+  end subroutine gll_quadrature2d
+
+!
+!===========================================
+!
+
+! this subroutine computes GLL quadrature points and weights for 1D
+
+  subroutine gll_quadrature1d(ndim,ngllx,ngll,gll_points1d,gll_weights1d, &
+                              lagrange_gll1d,dlagrange_gll1d)
+  implicit none
+  integer,intent(in) :: ndim,ngllx,ngll
+  real(kind=kdble),dimension(ngll),intent(out) :: gll_weights1d
+  real(kind=kdble),dimension(ndim,ngll),intent(out) :: gll_points1d
+  real(kind=kdble),dimension(ngll,ngll),intent(out) :: lagrange_gll1d
+  real(kind=kdble),dimension(ndim,ngll,ngll),intent(out) :: dlagrange_gll1d
+
+  real(kind=kdble),parameter :: jacobi_alpha=0.0_kdble,jacobi_beta=0.0_kdble
+  integer :: i,ii,j,k,n
+  real(kind=kdble) :: xi
+  real(kind=kdble),dimension(ngllx) :: gllpx,gllwx ! GLL points and weights
+  real(kind=kdble),dimension(ngllx) :: lagrange_x,lagrange_dx
+
+  ! compute everything in indexed order
+
+  ! GLL points and weights (source: http://mathworld.wolfram.com/lobattoquadrature.html)
+  !gllp(1)=-1.0_kdble   ; gllw(1)=1.0_kdble/3.0_kdble
+  !gllp(2)= 0.0_kdble   ; gllw(2)=4.0_kdble/3.0_kdble
+  !gllp(3)= 1.0_kdble   ; gllw(3)=gllw(1)
+
+  ! get GLL points
+  ! for alpha=beta=0, jacobi polynomial is legendre polynomial
+  ! for ngllx=nglly=ngllz=ngll, need to call only once
+  call zwgljd(gllpx,gllwx,ngllx,jacobi_alpha,jacobi_beta)
+
+  n = 0
+  do i = 1,ngllx
+    n = n+1
+    ! integration points
+    gll_points1d(1,n)=gllpx(i)
+
+    ! integration weights
+    gll_weights1d(n)=gllwx(i)
+  enddo
+
+  do ii = 1,ngll ! ngllx
+    xi=gll_points1d(1,ii)
+
+    ! compute 1d lagrange polynomials
+    ! this can also be computed in a simple manner due to the orthogonality
+    call lagrange1dGLL(ngllx,gllpx,xi,lagrange_x,lagrange_dx)
+
+    n = 0
+    do i = 1,ngllx
+      n = n+1
+      lagrange_gll1d(ii,n)=lagrange_x(i)
+      dlagrange_gll1d(1,ii,n)=lagrange_dx(i)
+    enddo
+  enddo
+
+  return
+  end subroutine gll_quadrature1d
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+
+  subroutine lagrange1d(nenode,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenode ! number of nodes in an 1d element
+  integer :: i,j,k
+  real(kind=kdble),intent(in) :: xi ! point where to calculate lagrange function and
+  !its derivative
+  real(kind=kdble),dimension(nenode),intent(out) :: phi,dphi_dxi
+  real(kind=kdble),dimension(nenode) :: xii,term,dterm,sum_term
+  real(kind=kdble) :: dx
+
+  ! compute natural coordnates
+  dx = 2.0_kdble/real((nenode-1),kdble)! length = 2.0 as xi is taken -1 to +1
+  do i = 1,nenode
+    ! coordinates when origin is in the left
+    xii(i)=real((i-1),kdble)*dx
+  enddo
+
+  ! origin is tranformed to mid point
+  xii = xii-1.0_kdble
+
+  do i = 1,nenode
+    k = 0
+    phi(i)=1.0_kdble
+    do j = 1,nenode
+      if (j /= i) then
+        k = k+1
+        term(k)=(xi-xii(j))/(xii(i)-xii(j))
+        dterm(k)=1.0_kdble/(xii(i)-xii(j)) ! derivative of the term wrt xi
+
+        phi(i)=phi(i)*(xi-xii(j))/(xii(i)-xii(j))
+      endif
+    enddo
+
+    sum_term = 1.0_kdble
+    do j = 1,nenode-1
+      do k = 1,nenode-1
+        if (k == j) then
+          sum_term(j)=sum_term(j)*dterm(k)
+        else
+          sum_term(j)=sum_term(j)*term(k)
+        endif
+      enddo
+    enddo
+    dphi_dxi(i)=0.0_kdble
+    do j = 1,nenode-1
+      dphi_dxi(i)=dphi_dxi(i)+sum_term(j)
+    enddo
+  enddo
+
+  return
+  end subroutine lagrange1d
+
+!
+!===============================================================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+
+  subroutine lagrange1dGEN(nenod,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  integer :: i,j,k
+  real(kind=kdble),intent(in) :: xi ! point where to calculate lagrange function and
+  !its derivative
+  real(kind=kdble),dimension(nenod),intent(out) :: phi,dphi_dxi
+  real(kind=kdble),dimension(nenod) :: xii
+  real(kind=kdble),dimension(nenod-1) :: term,dterm,sum_term
+  real(kind=kdble) :: dx
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  ! compute natural coordnates
+  dx = 2.0_kdble/real((nenod-1),kdble)! length = 2.0 as xi is taken -1 to +1
+  do i = 1,nenod
+    ! coordinates when origin is in the left
+    xii(i)=real((i-1),kdble)*dx
+  enddo
+
+  ! origin is tranformed to mid point
+  xii = xii-one
+
+  do i = 1,nenod
+    k = 0
+    phi(i)=one
+    do j = 1,nenod
+      if (j /= i) then
+        k = k+1
+        term(k)=(xi-xii(j))/(xii(i)-xii(j))
+        dterm(k)=one/(xii(i)-xii(j)) ! derivative of the term wrt xi
+
+        phi(i)=phi(i)*term(k) !(xi-xii(j))/(xii(i)-xii(j))
+      endif
+    enddo
+
+    ! derivative of the polynomial: product rule
+    sum_term = one
+    do j = 1,nenod-1
+      do k = 1,nenod-1
+        if (k == j) then
+          sum_term(j)=sum_term(j)*dterm(k)
+        else
+          sum_term(j)=sum_term(j)*term(k)
+        endif
+      enddo
+    enddo
+    dphi_dxi(i)=sum(sum_term)
+    !dphi_dxi(i)=0.0_kdble
+    !do j=1,nenod-1
+    !  dphi_dxi(i)=dphi_dxi(i)+sum_term(j)
+    !enddo
+  enddo
+
+  return
+  end subroutine lagrange1dGEN
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+! Assumed Shape array: pass pointer, subarray or allocatable array
+
+  subroutine lagrange1dGENAS(nenod,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  integer :: i,j,k
+  real(kind=kdble),intent(in) :: xi ! point where to calculate lagrange function and
+  !its derivative
+  real(kind=kdble),dimension(:),intent(out) :: phi,dphi_dxi !,dimension(nenod)
+  real(kind=kdble),dimension(nenod) :: xii
+  real(kind=kdble),dimension(nenod-1) :: term,dterm,sum_term
+  real(kind=kdble) :: dx
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  ! compute natural coordnates
+  dx = 2.0_kdble/real((nenod-1),kdble)! length = 2.0 as xi is taken -1 to +1
+  do i = 1,nenod
+    ! coordinates when origin is in the left
+    xii(i)=real((i-1),kdble)*dx
+  enddo
+
+  ! origin is tranformed to mid point
+  xii = xii-one
+
+  do i = 1,nenod
+    k = 0
+    phi(i)=one
+    do j = 1,nenod
+      if (j /= i) then
+        k = k+1
+        term(k)=(xi-xii(j))/(xii(i)-xii(j))
+        dterm(k)=one/(xii(i)-xii(j)) ! derivative of the term wrt xi
+
+        phi(i)=phi(i)*term(k) !(xi-xii(j))/(xii(i)-xii(j))
+      endif
+    enddo
+
+    ! derivative of the polynomial: product rule
+    sum_term = one
+    do j = 1,nenod-1
+      do k = 1,nenod-1
+        if (k == j) then
+          sum_term(j)=sum_term(j)*dterm(k)
+        else
+          sum_term(j)=sum_term(j)*term(k)
+        endif
+      enddo
+    enddo
+    dphi_dxi(i)=sum(sum_term)
+    !dphi_dxi(i)=0.0_kdble
+    !do j=1,nenod-1
+    !  dphi_dxi(i)=dphi_dxi(i)+sum_term(j)
+    !enddo
+  enddo
+
+  return
+  end subroutine lagrange1dGENAS
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+
+  subroutine lagrange1dGLL(nenod,xii,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  real(kind=kdble),dimension(nenod),intent(in) :: xii
+  real(kind=kdble),intent(in) :: xi ! point where to calculate lagrange function and
+  !its derivative
+  real(kind=kdble),dimension(nenod),intent(out) :: phi,dphi_dxi
+
+  integer :: i,j,k
+  real(kind=kdble),dimension(nenod-1) :: term,dterm,sum_term
+  real(kind=kdble) :: dx
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  do i = 1,nenod
+    k = 0
+    phi(i)=one
+    do j = 1,nenod
+      if (j /= i) then
+        k = k+1
+        term(k)=(xi-xii(j))/(xii(i)-xii(j))
+        dterm(k)=one/(xii(i)-xii(j)) ! derivative of the term wrt xi
+
+        phi(i)=phi(i)*term(k)!(xi-xii(j))/(xii(i)-xii(j))
+      endif
+    enddo
+
+    ! derivative of the polynomial: product rule
+    sum_term = one
+    do j = 1,nenod-1
+      do k = 1,nenod-1
+        if (k == j) then
+          sum_term(j)=sum_term(j)*dterm(k)
+        else
+          sum_term(j)=sum_term(j)*term(k)
+        endif
+      enddo
+    enddo
+    dphi_dxi(i)=sum(sum_term)
+    !dphi_dxi(i)=0.0_kdble
+    !do j=1,nenod-1
+    !  dphi_dxi(i)=dphi_dxi(i)+sum_term(j)
+    !enddo
+  enddo
+
+  return
+  end subroutine lagrange1dGLL
+
+!
+!===========================================
+!
+
+! this subroutine computes the 1d lagrange interpolation functions and their
+! derivatives at a given point xi.
+! Assumed Shape array: pass pointer, subarray or allocatable array
+
+  subroutine lagrange1dGLLAS(nenod,xii,xi,phi,dphi_dxi)
+
+  implicit none
+  integer,intent(in) :: nenod ! number of nodes in an 1d element
+  real(kind=kdble),dimension(nenod),intent(in) :: xii
+  real(kind=kdble),intent(in) :: xi ! point where to calculate lagrange function and
+  !its derivative
+  real(kind=kdble),dimension(:),intent(out) :: phi,dphi_dxi !,dimension(nenod)
+
+  integer :: i,j,k
+  real(kind=kdble),dimension(nenod-1) :: term,dterm,sum_term
+  real(kind=kdble) :: dx
+  real(kind=kdble),parameter :: one=1.0_kdble
+
+  do i = 1,nenod
+    k = 0
+    phi(i)=one
+    do j = 1,nenod
+      if (j /= i) then
+        k = k+1
+        term(k)=(xi-xii(j))/(xii(i)-xii(j))
+        dterm(k)=one/(xii(i)-xii(j)) ! derivative of the term wrt xi
+
+        phi(i)=phi(i)*term(k)!(xi-xii(j))/(xii(i)-xii(j))
+      endif
+    enddo
+
+    ! derivative of the polynomial: product rule
+    sum_term = one
+    do j = 1,nenod-1
+      do k = 1,nenod-1
+        if (k == j) then
+          sum_term(j)=sum_term(j)*dterm(k)
+        else
+          sum_term(j)=sum_term(j)*term(k)
+        endif
+      enddo
+    enddo
+    dphi_dxi(i)=sum(sum_term)
+    !dphi_dxi(i)=0.0_kdble
+    !do j=1,nenod-1
+    !  dphi_dxi(i)=dphi_dxi(i)+sum_term(j)
+    !enddo
+  enddo
+
+  return
+  end subroutine lagrange1dGLLAS
+
+!===========================================
+!
+!  Library to compute the Gauss-Lobatto-Legendre points and weights
+!  Based on Gauss-Lobatto routines from M.I.T.
+!  Department of Mechanical Engineering
+!
+!===========================================
+
+  real(kind=kdble) function endw1(n,alpha,beta) !double precision
+
+  implicit none
+
+  integer n
+  real(kind=kdble) alpha,beta !double precision
+
+  real(kind=kdble), parameter :: zero=0._kdble,one=1._kdble,two=2._kdble, &
+  three = 3._kdble,four = 4._kdble !double precision
+  real(kind=kdble) apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3 !double precision
+  !double precision, external :: gammaf
+  integer i
+
+  f3 = zero
+  apb   = alpha+beta
+  if (n == 0) then
+    endw1 = zero
+    return
+  endif
+  f1   = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+  f1   = f1*(apb+two)*two**(apb+two)/two
+  if (n == 1) then
+    endw1 = f1
+    return
+  endif
+  fint1 = gammaf(alpha+two)*gammaf(beta+one)/gammaf(apb+three)
+  fint1 = fint1*two**(apb+two)
+  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+  fint2 = fint2*two**(apb+three)
+  f2    = (-two*(beta+two)*fint1 + (apb+four)*fint2) * (apb+three)/four
+  if (n == 2) then
+    endw1 = f2
+    return
+  endif
+  do i = 3,n
+    di   = dble(i-1)
+    abn  = alpha+beta+di
+    abnn = abn+di
+    a1   = -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+    a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
+    a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
+    f3   =  -(a2*f2+a1*f1)/a3
+    f1   = f2
+    f2   = f3
+  enddo
+  endw1  = f3
+
+  end function endw1
+  !=======================================================================
+
+  real(kind=kdble) function endw2(n,alpha,beta) !double precision
+
+  implicit none
+
+  integer n
+  real(kind=kdble) alpha,beta !double precision
+
+  real(kind=kdble), parameter :: zero=0._kdble,one=1._kdble,two=2._kdble, &
+  three = 3._kdble,four = 4._kdble !double precision
+  real(kind=kdble) apb,f1,fint1,fint2,f2,di,abn,abnn,a1,a2,a3,f3 !double precision
+  !real(kind=kdble), external :: gammaf
+  integer i
+
+  apb   = alpha+beta
+  f3 = zero
+  if (n == 0) then
+    endw2 = zero
+    return
+  endif
+  f1   = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+  f1   = f1*(apb+two)*two**(apb+two)/two
+  if (n == 1) then
+    endw2 = f1
+    return
+  endif
+  fint1 = gammaf(alpha+one)*gammaf(beta+two)/gammaf(apb+three)
+  fint1 = fint1*two**(apb+two)
+  fint2 = gammaf(alpha+two)*gammaf(beta+two)/gammaf(apb+four)
+  fint2 = fint2*two**(apb+three)
+  f2    = (two*(alpha+two)*fint1 - (apb+four)*fint2) * (apb+three)/four
+  if (n == 2) then
+    endw2 = f2
+    return
+  endif
+  do i = 3,n
+    di   = dble(i-1)
+    abn  = alpha+beta+di
+    abnn = abn+di
+    a1   =  -(two*(di+alpha)*(di+beta))/(abn*abnn*(abnn+one))
+    a2   =  (two*(alpha-beta))/(abnn*(abnn+two))
+    a3   =  (two*(abn+one))/((abnn+two)*(abnn+one))
+    f3   =  -(a2*f2+a1*f1)/a3
+    f1   = f2
+    f2   = f3
+  enddo
+  endw2  = f3
+
+  end function endw2
+
+  !
+  !=======================================================================
+  !
+
+  real(kind=kdble) function gammaf (x) !double precision
+
+  implicit none
+
+  real(kind=kdble), parameter :: pi = 3.141592653589793_kdble !double precision
+
+  real(kind=kdble) x !double precision
+
+  real(kind=kdble), parameter :: half=0.5_kdble,one=1._kdble,two=2._kdble !double precision
+
+  gammaf = one
+
+  if (x == -half) gammaf = -two*sqrt(pi)
+  if (x == half) gammaf =  sqrt(pi)
+  if (x == one ) gammaf =  one
+  if (x == two ) gammaf =  one
+  if (x == 1.5_kdble) gammaf =  sqrt(pi)/2._kdble
+  if (x == 2.5_kdble) gammaf =  1.5_kdble*sqrt(pi)/2._kdble
+  if (x == 3.5_kdble) gammaf =  2.5_kdble*1.5_kdble*sqrt(pi)/2._kdble
+  if (x == 3._kdble ) gammaf =  2._kdble
+  if (x == 4._kdble ) gammaf = 6._kdble
+  if (x == 5._kdble ) gammaf = 24._kdble
+  if (x == 6._kdble ) gammaf = 120._kdble
+
+  end function gammaf
+
+  !
+  !=====================================================================
+  !
+
+  subroutine jacg (xjac,np,alpha,beta)
+
+  !=======================================================================
+  !
+  ! computes np Gauss points, which are the zeros of the
+  ! Jacobi polynomial with parameters alpha and beta
+  !
+  !                  .alpha = beta =  0.0  ->  Legendre points
+  !                  .alpha = beta = -0.5  ->  Chebyshev points
+  !
+  !=======================================================================
+
+  implicit none
+
+  integer np
+  real(kind=kdble) alpha,beta !double precision
+  real(kind=kdble) xjac(np) !double precision
+
+  integer k,j,i,jmin,jm,n
+  real(kind=kdble) xlast,dth,x,x1,x2,recsum,delx,xmin,swap !double precision
+  real(kind=kdble) p,pd,pm1,pdm1,pm2,pdm2 !double precision
+
+  integer, parameter :: K_MAX_ITER = 10
+  real(kind=kdble), parameter :: zero = 0._kdble, eps = 1.0e-12_kdble !double precision
+
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  xlast = 0._kdble
+  n   = np-1
+  dth = 4._kdble*atan(1._kdble)/(2._kdble*dble(n)+2._kdble)
+  p = 0._kdble
+  pd = 0._kdble
+  jmin = 0
+  do j = 1,np
+    if (j == 1) then
+      x = cos((2._kdble*(dble(j)-1._kdble)+1._kdble)*dth)
+    else
+      x1 = cos((2._kdble*(dble(j)-1._kdble)+1._kdble)*dth)
+      x2 = xlast
+      x  = (x1+x2)/2._kdble
+    endif
+    do k = 1,K_MAX_ITER
+      call jacobf (p,pd,pm1,pdm1,pm2,pdm2,np,alpha,beta,x)
+      recsum = 0._kdble
+      jm = j-1
+      do i = 1,jm
+          recsum = recsum+1._kdble/(x-xjac(np-i+1))
+      enddo
+      delx = -p/(pd-recsum*p)
+      x    = x+delx
+      if (abs(delx) < eps) goto 31
+    enddo
+  31  continue
+    xjac(np-j+1) = x
+    xlast        = x
+  enddo
+  do i = 1,np
+    xmin = 2._kdble
+    do j = i,np
+      if (xjac(j) < xmin) then
+          xmin = xjac(j)
+          jmin = j
+      endif
+    enddo
+    if (jmin /= i) then
+      swap = xjac(i)
+      xjac(i) = xjac(jmin)
+      xjac(jmin) = swap
+    endif
+  enddo
+
+  end subroutine jacg
+
+  !
+  !=====================================================================
+  !
+
+  subroutine jacobf (poly,pder,polym1,pderm1,polym2,pderm2,n,alp,bet,x)
+
+  !=======================================================================
+  !
+  ! Computes the Jacobi polynomial of degree n and its derivative at x
+  !
+  !=======================================================================
+
+  implicit none
+
+  real(kind=kdble) poly,pder,polym1,pderm1,polym2,pderm2,alp,bet,x !double precision
+  integer n
+
+  real(kind=kdble) apb,polyl,pderl,dk,a1,a2,b3,a3,a4,polyn,pdern,psave,pdsave !double precision
+  integer k
+
+  apb  = alp+bet
+  poly = 1._kdble
+  pder = 0._kdble
+  psave = 0._kdble
+  pdsave = 0._kdble
+
+  if (n == 0) return
+
+  polyl = poly
+  pderl = pder
+  poly  = (alp-bet+(apb+2._kdble)*x)/2._kdble
+  pder  = (apb+2._kdble)/2._kdble
+  if (n == 1) return
+
+  do k = 2,n
+    dk = dble(k)
+    a1 = 2._kdble*dk*(dk+apb)*(2._kdble*dk+apb-2._kdble)
+    a2 = (2._kdble*dk+apb-1._kdble)*(alp**2-bet**2)
+    b3 = (2._kdble*dk+apb-2._kdble)
+    a3 = b3*(b3+1._kdble)*(b3+2._kdble)
+    a4 = 2._kdble*(dk+alp-1._kdble)*(dk+bet-1._kdble)*(2._kdble*dk+apb)
+    polyn  = ((a2+a3*x)*poly-a4*polyl)/a1
+    pdern  = ((a2+a3*x)*pder-a4*pderl+a3*poly)/a1
+    psave  = polyl
+    pdsave = pderl
+    polyl  = poly
+    poly   = polyn
+    pderl  = pder
+    pder   = pdern
+  enddo
+
+  polym1 = polyl
+  pderm1 = pderl
+  polym2 = psave
+  pderm2 = pdsave
+
+  end subroutine jacobf
+
+  !
+  !------------------------------------------------------------------------
+  !
+
+  real(kind=kdble) function PNDLEG (Z,N) !double precision
+
+  !------------------------------------------------------------------------
+  !
+  !     Compute the derivative of the Nth order Legendre polynomial at Z.
+  !     Based on the recursion formula for the Legendre polynomials.
+  !
+  !------------------------------------------------------------------------
+  implicit none
+
+  real(kind=kdble) z !double precision
+  integer n
+
+  real(kind=kdble) P1,P2,P1D,P2D,P3D,FK,P3 !double precision
+  integer k
+
+  P1   = 1._kdble
+  P2   = Z
+  P1D  = 0._kdble
+  P2D  = 1._kdble
+  P3D  = 1._kdble
+
+  do K = 1, N-1
+    FK  = dble(K)
+    P3  = ((2._kdble*FK+1._kdble)*Z*P2-FK*P1)/(FK+1._kdble)
+    P3D = ((2._kdble*FK+1._kdble)*P2+(2._kdble*FK+1._kdble)*Z*P2D-FK*P1D)/(FK+1._kdble)
+    P1  = P2
+    P2  = P3
+    P1D = P2D
+    P2D = P3D
+  enddo
+
+  PNDLEG = P3D
+
+  end function pndleg
+
+  !
+  !------------------------------------------------------------------------
+  !
+
+  real(kind=kdble) function PNLEG (Z,N) !double precision
+
+  !------------------------------------------------------------------------
+  !
+  !     Compute the value of the Nth order Legendre polynomial at Z.
+  !     Based on the recursion formula for the Legendre polynomials.
+  !
+  !------------------------------------------------------------------------
+  implicit none
+
+  real(kind=kdble) z !double precision
+  integer n
+
+  real(kind=kdble) P1,P2,P3,FK !double precision
+  integer k
+
+  P1   = 1._kdble
+  P2   = Z
+  P3   = P2
+
+  do K = 1, N-1
+    FK  = dble(K)
+    P3  = ((2._kdble*FK+1._kdble)*Z*P2 - FK*P1)/(FK+1._kdble)
+    P1  = P2
+    P2  = P3
+  enddo
+
+  PNLEG = P3
+
+  end function pnleg
+
+  !
+  !------------------------------------------------------------------------
+  !
+
+  real(kind=kdble) function pnormj (n,alpha,beta) !double precision
+
+  implicit none
+
+  real(kind=kdble) alpha,beta !double precision
+  integer n
+
+  real(kind=kdble) one,two,dn,const,prod,dindx,frac !double precision
+  !real(kind=kdble), external :: gammaf
+  integer i
+
+  one   = 1._kdble
+  two   = 2._kdble
+  dn    = dble(n)
+  const = alpha+beta+one
+
+  if (n <= 1) then
+    prod   = gammaf(dn+alpha)*gammaf(dn+beta)
+    prod   = prod/(gammaf(dn)*gammaf(dn+alpha+beta))
+    pnormj = prod * two**const/(two*dn+const)
+    return
+  endif
+
+  prod  = gammaf(alpha+one)*gammaf(beta+one)
+  prod  = prod/(two*(one+const)*gammaf(const+one))
+  prod  = prod*(one+alpha)*(two+alpha)
+  prod  = prod*(one+beta)*(two+beta)
+
+  do i = 3,n
+    dindx = dble(i)
+    frac  = (dindx+alpha)*(dindx+beta)/(dindx*(dindx+alpha+beta))
+    prod  = prod*frac
+  enddo
+
+  pnormj = prod * two**const/(two*dn+const)
+
+  end function pnormj
+
+  !
+  !------------------------------------------------------------------------
+  !
+
+  subroutine zwgjd(z,w,np,alpha,beta)
+
+  !=======================================================================
+  !
+  !     Z w g j d : Generate np Gauss-Jacobi points and weights
+  !                 associated with Jacobi polynomial of degree n = np-1
+  !
+  !     Note : Coefficients alpha and beta must be greater than -1.
+  !     ----
+  !=======================================================================
+
+  implicit none
+
+  real(kind=kdble), parameter :: zero=0._kdble,one=1._kdble,two=2._kdble !double precision
+
+  integer np
+  real(kind=kdble) z(np),w(np) !double precision
+  real(kind=kdble) alpha,beta !double precision
+
+  integer n,np1,np2,i
+  real(kind=kdble) p,pd,pm1,pdm1,pm2,pdm2 !double precision
+  real(kind=kdble) apb,dnp1,dnp2,fac1,fac2,fac3,fnorm,rcoef !double precision
+  !real(kind=kdble), external :: gammaf,pnormj
+
+  pd = zero
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  n    = np-1
+  apb  = alpha+beta
+  p    = zero
+  pdm1 = zero
+
+  if (np <= 0) then
+    write(*,*) 'ERROR: number of Gauss points < 1!'
+    stop
+  endif
+
+  if ((alpha <= -one) .or. (beta <= -one)) then
+    write(*,*) 'ERROR: alpha and beta must be greater than -1!'
+    stop
+  endif
+
+  if (np == 1) then
+    z(1) = (beta-alpha)/(apb+two)
+    w(1) = gammaf(alpha+one)*gammaf(beta+one)/gammaf(apb+two) * two**(apb+one)
+    return
+  endif
+
+  call jacg(z,np,alpha,beta)
+
+  np1   = n+1
+  np2   = n+2
+  dnp1  = dble(np1)
+  dnp2  = dble(np2)
+  fac1  = dnp1+alpha+beta+one
+  fac2  = fac1+dnp1
+  fac3  = fac2+one
+  fnorm = pnormj(np1,alpha,beta)
+  rcoef = (fnorm*fac2*fac3)/(two*fac1*dnp2)
+  do i = 1,np
+    call jacobf(p,pd,pm1,pdm1,pm2,pdm2,np2,alpha,beta,z(i))
+    w(i) = -rcoef/(p*pdm1)
+  enddo
+
+  end subroutine zwgjd
+
+  !
+  !------------------------------------------------------------------------
+  !
+
+  subroutine zwgljd(z,w,np,alpha,beta)
+
+  !=======================================================================
+  !
+  !     Z w g l j d : Generate np Gauss-Lobatto-Jacobi points and the
+  !     -----------   weights associated with Jacobi polynomials of degree
+  !                   n = np-1.
+  !
+  !     Note : alpha and beta coefficients must be greater than -1.
+  !            Legendre polynomials are special case of Jacobi polynomials
+  !            just by setting alpha and beta to 0.
+  !
+  !=======================================================================
+
+  implicit none
+
+  real(kind=kdble), parameter :: zero=0._kdble,one=1._kdble,two=2._kdble !double precision
+
+  integer np
+  real(kind=kdble) alpha,beta !double precision
+  real(kind=kdble) z(np), w(np) !double precision
+
+  integer n,nm1,i
+  real(kind=kdble) p,pd,pm1,pdm1,pm2,pdm2 !double precision
+  real(kind=kdble) alpg,betg !double precision
+  !real(kind=kdble), external :: endw1,endw2
+
+  p = zero
+  pm1 = zero
+  pm2 = zero
+  pdm1 = zero
+  pdm2 = zero
+
+  n   = np-1
+  nm1 = n-1
+  pd  = zero
+
+  if (np <= 1) then
+    write(*,*) 'ERROR: number of Gauss-Lobatto points < 2!'
+    stop
+  endif
+
+  ! with spectral elements, use at least 3 points
+  if (np < 3) then
+    write(*,*) 'WARNING: number of Gauss-Lobatto points < 3!'
+    !stop
+  endif
+  !if (np <= 2) stop 'minimum number of Gauss-Lobatto points for the SEM is 3'
+
+  if ((alpha <= -one) .or. (beta <= -one)) then
+    write(*,*) 'ERROR: alpha and beta must be greater than -1!'
+    stop
+  endif
+
+  if (nm1 > 0) then
+    alpg  = alpha+one
+    betg  = beta+one
+    call zwgjd(z(2),w(2),nm1,alpg,betg)
+  endif
+
+  z(1)  = - one
+  z(np) =  one
+
+  do i = 2,np-1
+    w(i) = w(i)/(one-z(i)**2)
+  enddo
+
+  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(1))
+  w(1)  = endw1(n,alpha,beta)/(two*pd)
+  call jacobf(p,pd,pm1,pdm1,pm2,pdm2,n,alpha,beta,z(np))
+  w(np) = endw2(n,alpha,beta)/(two*pd)
+
+  end subroutine zwgljd
+
+
+end module gll_library1
+
+#endif
diff --git a/src/specfem3D/SIEM_poisson.F90 b/src/specfem3D/SIEM_poisson.F90
new file mode 100644
index 000000000..a3a2cc0d4
--- /dev/null
+++ b/src/specfem3D/SIEM_poisson.F90
@@ -0,0 +1,2361 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+
+!TODO: replace ibool with inode_elmt
+! this module contains infinite-element routines
+! REVISION
+!   HNG, Apr 11,2012; HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module poisson
+
+  use constants_solver, only: CUSTOM_REAL,SIZE_REAL
+  integer,parameter :: kreal = CUSTOM_REAL
+
+contains
+
+  subroutine poisson_stiffness(iregion,nelmt,nnode,ibool,xstore,ystore,zstore, &
+                               storekmat,dprecon)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE
+  use gll_library1
+  use math_library, only: determinant,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  !use specfem_par_crustmantle, only: rmassz_crust_mantle !TODO: remove this
+  implicit none
+  integer,intent(in) :: iregion,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(out) :: storekmat(NGLLCUBE,NGLLCUBE,nelmt),dprecon(nnode)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,k,i_elmt
+  integer :: dnx,dny,dnz,egdof(NGLLCUBE),ignod(ngnod)
+  real(kind=kreal) :: detjac
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+                      zetagll(NGLLZ),wzgll(NGLLZ)
+  real(kind=kdble) :: dshape_hex8(ndim,ngnod,NGLLCUBE),gll_weights(NGLLCUBE), &
+                      gll_points(ndim,NGLLCUBE), &
+                      lagrange_gll(NGLLCUBE,NGLLCUBE),dlagrange_gll(ndim,NGLLCUBE,NGLLCUBE)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLLCUBE),jac(ndim,ndim), &
+                      kmat(NGLLCUBE,NGLLCUBE)
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX,NGLLY,NGLLZ,NGLLCUBE,xigll,etagll, &
+                            zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLLCUBE,gll_points,gll_weights, &
+                      lagrange_gll,dlagrange_gll)
+
+  dnx = NGLLX-1; dny = NGLLY-1; dnz = NGLLZ-1
+  storekmat = zero; dprecon = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong!!!!
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);               ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);       ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! top corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLLCUBE/))
+    kmat = zero
+    do i = 1,NGLLCUBE
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gll(:,i,:)) ! use der for gll
+      kmat = kmat+matmul(transpose(deriv),deriv)*detjac*gll_weights(i)
+    enddo
+    storekmat(:,:,i_elmt)=kmat
+    do k = 1,NGLLCUBE
+      dprecon(egdof(k))=dprecon(egdof(k))+kmat(k,k)
+    enddo
+  enddo
+  end subroutine poisson_stiffness
+
+!
+!===========================================
+!
+
+  subroutine poisson_stiffnessINF(nelmt,nnode,ibool,xstore,ystore,zstore, &
+                                  storekmat,dprecon)
+
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLLCUBE
+  use infinite_element
+  use math_library, only: determinant,invert
+  implicit none
+  integer,intent(in) :: nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(out) :: storekmat(NGLLCUBE,NGLLCUBE,nelmt),dprecon(nnode)
+
+  integer,parameter :: iface = 6,ndim = 3,ngnod = 8,nginf = 8
+  ! GLL-Radau quadrature
+  integer,parameter :: nipinf = NGLLCUBE,nipx = NGLLX !NGLLX = NGLLY = NGLLZ
+
+  !! Gauss quadrature
+  !integer,parameter :: nipinf=8,nipx=8
+
+  integer :: i,k,i_elmt
+  integer :: egdof(NGLLCUBE),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal),parameter :: one=1.0_kreal,zero=0.0_kreal
+  real(kind=kreal) :: detjac
+  real(kind=kreal) :: coordinf(nginf,ndim),deriv(ndim,NGLLCUBE)
+  real(kind=kreal) :: jac(ndim,ndim),kmat(NGLLCUBE,NGLLCUBE),x0(ndim)
+
+  real(kind=kdble),parameter :: done=1.0_kdble
+  real(kind=kdble) :: ainf,gaminf,GLw(nipinf)
+  real(kind=kdble) :: shape_infinite(nipinf,nginf),dshape_infinite(ndim,nipinf,nginf)
+  real(kind=kdble) :: lagrange_gl(nipinf,NGLLCUBE),dlagrange_gl(ndim,nipinf,NGLLCUBE)
+
+  ! ainf is irrevelant for the time being
+  ! nd,ainf,gaminf can be removed from argument list
+  gaminf = 1.0002_kdble !1.99_kdble
+
+  x0=(/ -0.6334289, 0.4764568, 0.6045561 /)!zero ! center of the Earth
+
+  ! GLL-Radau quadrature
+  call shape_function_infiniteGLHEX8ZW_GLLR(ndim,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nipinf, &
+                                            iface,gaminf,ainf,shape_infinite,dshape_infinite,lagrange_gl,dlagrange_gl, &
+                                            GLw)
+  !! Gauss quadrature
+  !call shape_function_infiniteGLHEX8ZW_GQ(ndim,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nipx,nipinf, &
+  !iface,done,gaminf,ainf,shape_infinite,dshape_infinite,lagrange_gl,dlagrange_gl,GLw)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-2
+  storekmat = zero; dprecon = zero
+  do i_elmt = 1,nelmt
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ-1:dnz,i_elmt),(/ngnod/))
+    ! indicial order NOT EXODUS order!!!
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);         ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(1,NGLLY,1,i_elmt);     ignod(4)=ibool(NGLLX,NGLLY,1,i_elmt)
+    ! top corner nodes - second last
+    ignod(5)=ibool(1,1,NGLLZ-1,i_elmt);     ignod(6)=ibool(NGLLX,1,NGLLZ-1,i_elmt)
+    ignod(7)=ibool(1,NGLLY,NGLLZ-1,i_elmt); ignod(8)=ibool(NGLLX,NGLLY,NGLLZ-1,i_elmt);
+
+    coordinf(:,1)=xstore(ignod)
+    coordinf(:,2)=ystore(ignod)
+    coordinf(:,3)=zstore(ignod)
+
+    ! Zienkiewicz infinite coordinates
+    coordinf(5,:)=x0+gaminf*(coordinf(1,:)-x0)
+    coordinf(6,:)=x0+gaminf*(coordinf(2,:)-x0)
+    coordinf(7,:)=x0+gaminf*(coordinf(3,:)-x0)
+    coordinf(8,:)=x0+gaminf*(coordinf(4,:)-x0)
+
+    ! Point X0 (Pole)
+    coordinf(1,:)=x0; coordinf(2,:)=x0
+    coordinf(3,:)=x0; coordinf(4,:)=x0
+
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLLCUBE/))
+    kmat = zero
+    do i = 1,nipinf
+      jac = matmul(dshape_infinite(:,i,:),coordinf) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gl(:,i,:))
+      kmat = kmat+matmul(transpose(deriv),deriv)*detjac*GLw(i)
+    enddo
+    storekmat(:,:,i_elmt)=kmat
+    do k = 1,NGLLCUBE
+      dprecon(egdof(k))=dprecon(egdof(k))+kmat(k,k)
+    enddo
+  enddo
+  !stop 'oh0'
+  end subroutine poisson_stiffnessINF
+
+!
+!===========================================
+!
+
+  subroutine poisson_stiffness3(iregion,nelmt,nnode,ibool,xstore,ystore,zstore, &
+                                nnode1,ibool1,storekmat,dprecon)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE, &
+                              NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use gll_library1
+  use math_library, only: determinant,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  !use specfem_par_crustmantle, only: rmassz_crust_mantle !TODO: remove this
+  implicit none
+  integer,intent(in) :: iregion,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(out) :: storekmat(NGLLCUBE_INF,NGLLCUBE_INF,nelmt),dprecon(nnode1)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,k,i_elmt
+  integer :: dnx,dny,dnz,egdof(NGLLCUBE_INF),ignod(ngnod)
+  real(kind=kreal) :: detjac
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll1(NGLLX_INF),wxgll1(NGLLX_INF),etagll1(NGLLY_INF), &
+  wygll1(NGLLY_INF),zetagll1(NGLLZ_INF),wzgll1(NGLLZ_INF)
+  real(kind=kdble) :: dshape_hex8(ndim,ngnod,NGLLCUBE_INF),gll_weights(NGLLCUBE_INF), &
+                      gll_points(ndim,NGLLCUBE_INF), &
+                      lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF),dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLLCUBE_INF),jac(ndim,ndim), &
+                      kmat(NGLLCUBE_INF,NGLLCUBE_INF)
+
+  call zwgljd(xigll1,wxgll1,NGLLX_INF,jalpha,jbeta)
+  call zwgljd(etagll1,wygll1,NGLLY_INF,jalpha,jbeta)
+  call zwgljd(zetagll1,wzgll1,NGLLZ_INF,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,xigll1,etagll1, &
+                            zetagll1,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+                      lagrange_gll,dlagrange_gll)
+
+  dnx = NGLLX-1; dny = NGLLY-1; dnz = NGLLZ-1
+  storekmat = zero; dprecon = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong!!!!
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! top corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = ibool1(:,i_elmt)!reshape(ibool1(:,:,:,i_elmt),(/NGLLCUBE_INF/))
+    kmat = zero
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gll(:,i,:)) ! use der for gll
+      kmat = kmat+matmul(transpose(deriv),deriv)*detjac*gll_weights(i)
+    enddo
+    storekmat(:,:,i_elmt)=kmat
+    do k = 1,NGLLCUBE_INF
+      dprecon(egdof(k))=dprecon(egdof(k))+kmat(k,k)
+    enddo
+  enddo
+  end subroutine poisson_stiffness3
+
+!
+!===========================================
+!
+
+  subroutine poisson_stiffnessINF3(nelmt,nnode,ibool,xstore,ystore,zstore,nnode1, &
+                                   ibool1,storekmat,dprecon)
+
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLLCUBE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use infinite_element
+  use math_library, only: determinant,invert
+  implicit none
+  integer,intent(in) :: nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(out) :: storekmat(NGLLCUBE_INF,NGLLCUBE_INF,nelmt),dprecon(nnode1)
+
+  integer,parameter :: iface = 6,ndim = 3,ngnod = 8,nginf = 8
+  ! GLL-Radau quadrature
+  integer,parameter :: nipinf = NGLLCUBE_INF,nipx = NGLLX_INF !NGLLX_INF = NGLLY_INF = NGLLZ_INF
+
+  !! Gauss quadrature
+  !integer,parameter :: nipinf=8,nipx=8
+
+  integer :: i,k,i_elmt
+  integer :: egdof(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal),parameter :: one=1.0_kreal,zero=0.0_kreal
+  real(kind=kreal) :: detjac
+  real(kind=kreal) :: coordinf(nginf,ndim),deriv(ndim,NGLLCUBE_INF)
+  real(kind=kreal) :: jac(ndim,ndim),kmat(NGLLCUBE_INF,NGLLCUBE_INF),x0(ndim)
+
+  real(kind=kdble),parameter :: done=1.0_kdble
+  real(kind=kdble) :: ainf,gaminf,GLw(nipinf)
+  real(kind=kdble) :: shape_infinite(nipinf,nginf),dshape_infinite(ndim,nipinf,nginf)
+  real(kind=kdble) :: lagrange_gl(nipinf,NGLLCUBE_INF),dlagrange_gl(ndim,nipinf,NGLLCUBE_INF)
+
+  ! ainf is irrevelant for the time being
+  ! nd,ainf,gaminf can be removed from argument list
+  gaminf = 1.0002_kdble !1.99_kdble
+
+  x0=(/ -0.6334289, 0.4764568, 0.6045561 /)!zero ! center of the Earth
+
+  ! GLL-Radau quadrature
+  call shape_function_infiniteGLHEX8ZW_GLLR(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF, &
+  nipinf,iface,gaminf,ainf,shape_infinite,dshape_infinite,lagrange_gl, &
+  dlagrange_gl,GLw)
+  !! Gauss quadrature
+  !call shape_function_infiniteGLHEX8ZW_GQ(ndim,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nipx,nipinf, &
+  !iface,done,gaminf,ainf,shape_infinite,dshape_infinite,lagrange_gl,dlagrange_gl,GLw)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-2
+  storekmat = zero; dprecon = zero
+  do i_elmt = 1,nelmt
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ-1:dnz,i_elmt),(/ngnod/))
+    ! indicial order NOT EXODUS order!!!
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);         ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(1,NGLLY,1,i_elmt);     ignod(4)=ibool(NGLLX,NGLLY,1,i_elmt)
+    ! top corner nodes - second last
+    ignod(5)=ibool(1,1,NGLLZ-1,i_elmt);     ignod(6)=ibool(NGLLX,1,NGLLZ-1,i_elmt)
+    ignod(7)=ibool(1,NGLLY,NGLLZ-1,i_elmt); ignod(8)=ibool(NGLLX,NGLLY,NGLLZ-1,i_elmt);
+
+    coordinf(:,1)=xstore(ignod)
+    coordinf(:,2)=ystore(ignod)
+    coordinf(:,3)=zstore(ignod)
+
+    ! Zienkiewicz infinite coordinates
+    coordinf(5,:)=x0+gaminf*(coordinf(1,:)-x0)
+    coordinf(6,:)=x0+gaminf*(coordinf(2,:)-x0)
+    coordinf(7,:)=x0+gaminf*(coordinf(3,:)-x0)
+    coordinf(8,:)=x0+gaminf*(coordinf(4,:)-x0)
+
+    ! Point X0 (Pole)
+    coordinf(1,:)=x0; coordinf(2,:)=x0
+    coordinf(3,:)=x0; coordinf(4,:)=x0
+
+    egdof = ibool1(:,i_elmt) !reshape(ibool1(:,:,:,i_elmt),(/NGLLCUBE_INF/))
+    kmat = zero
+    do i = 1,nipinf
+      jac = matmul(dshape_infinite(:,i,:),coordinf) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gl(:,i,:))
+      kmat = kmat+matmul(transpose(deriv),deriv)*detjac*GLw(i)
+    enddo
+    storekmat(:,:,i_elmt)=kmat
+    do k = 1,NGLLCUBE_INF
+      dprecon(egdof(k))=dprecon(egdof(k))+kmat(k,k)
+    enddo
+  enddo
+  end subroutine poisson_stiffnessINF3
+
+!
+!===========================================
+!
+
+  subroutine compute_poisson_rhoload()
+
+  use constants_solver, only: IREGION_INNER_CORE,IREGION_OUTER_CORE,IREGION_CRUST_MANTLE, &
+    NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_INNER_CORE, &
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+    NGLOB_CRUST_MANTLE
+  use specfem_par, only: A_array_rotation,B_array_rotation,deltat,it,DT,t0, &
+    scale_t_inv,two_omega_earth,load
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+    storerhojw_cm,gdof_cm
+  use specfem_par_outercore, only: displ_outer_core,ibool_outer_core, &
+    storerhojw_oc,gdof_oc
+  use specfem_par_innercore, only: displ_inner_core,ibool_inner_core, &
+    storerhojw_ic,gdof_ic
+
+  implicit none
+  !real(kind=CUSTOM_REAL) :: time
+  real(kind=CUSTOM_REAL) :: load_ic(NGLOB_INNER_CORE),load_oc(NGLOB_OUTER_CORE), &
+                            load_cm(NGLOB_CRUST_MANTLE)
+
+  !NGLLCUBE = NGLLX*NGLLY*NGLLZ
+
+  load = 0.0_CUSTOM_REAL
+
+  ! crust-mantle
+  call poisson_load_onlyrhoFAST(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+                                nglob_crust_mantle,ibool_crust_mantle,storerhojw_cm,load_cm)
+
+  ! outer core
+  call poisson_load_onlyrhoFAST(IREGION_OUTER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_outer_core, &
+                                nglob_outer_core,ibool_outer_core,storerhojw_oc,load_oc)
+
+  ! inner core
+  call poisson_load_onlyrhoFAST(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+                                nglob_inner_core,ibool_inner_core,storerhojw_ic,load_ic)
+
+  ! infinite
+  ! this region has no contrbution
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle
+  load(gdof_cm)=load(gdof_cm)+load_cm
+
+  ! outer core
+  load(gdof_oc)=load(gdof_oc)+load_oc
+
+  ! inner core
+  load(gdof_ic)=load(gdof_ic)+load_ic
+
+  ! infinite
+  ! there no contribution from infinite region
+
+  load(0)=0.0_CUSTOM_REAL
+
+  return
+
+  end subroutine compute_poisson_rhoload
+
+!
+!===========================================
+!
+
+  subroutine compute_poisson_rhoload3()
+
+  use constants_solver, only: IREGION_INNER_CORE,IREGION_OUTER_CORE, &
+    IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_INNER_CORE, &
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+    NGLOB_CRUST_MANTLE
+  use specfem_par, only: A_array_rotation,B_array_rotation,deltat,it,DT,t0, &
+    scale_t_inv,two_omega_earth,load1,nnode_ic1,nnode_oc1,nnode_cm1,nnode_inf1
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+    storerhojw_cm1,gdof_cm1,inode_elmt_cm1
+  use specfem_par_outercore, only: displ_outer_core,ibool_outer_core, &
+    storerhojw_oc1,gdof_oc1,inode_elmt_oc1
+  use specfem_par_innercore, only: displ_inner_core,ibool_inner_core, &
+    storerhojw_ic1,gdof_ic1,inode_elmt_ic1
+
+  use specfem_par, only: NPROCTOT_VAL
+  use specfem_par, only: NUM_INTERFACES_CRUST_MANTLE1, &
+    MAX_NIBOOL_INTERFACES_CRUST_MANTLE1,NIBOOL_INTERFACES_CRUST_MANTLE1, &
+    IBOOL_INTERFACES_CRUST_MANTLE1,MY_neighborS_CRUST_MANTLE1
+  use specfem_par, only: NUM_INTERFACES_INNER_CORE1, &
+    MAX_NIBOOL_INTERFACES_INNER_CORE1,NIBOOL_INTERFACES_INNER_CORE1, &
+    IBOOL_INTERFACES_INNER_CORE1,MY_neighborS_INNER_CORE1
+  use specfem_par, only: NUM_INTERFACES_OUTER_CORE1, &
+    MAX_NIBOOL_INTERFACES_OUTER_CORE1,NIBOOL_INTERFACES_OUTER_CORE1, &
+    IBOOL_INTERFACES_OUTER_CORE1,MY_neighborS_OUTER_CORE1
+  implicit none
+  !real(kind=CUSTOM_REAL) :: time
+  real(kind=CUSTOM_REAL) :: load_ic(nnode_ic1),load_oc(nnode_oc1), &
+                            load_cm(nnode_cm1)
+
+  !NGLLCUBE = NGLLX*NGLLY*NGLLZ
+
+  load1 = 0.0_CUSTOM_REAL
+
+  !! crust-mantle
+  !call poisson_load_onlyrho3(1,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+  !nglob_crust_mantle,ibool_crust_mantle,xstore0_crust_mantle,ystore0_crust_mantle, &
+  !zstore0_crust_mantle,rhostore_crust_mantle,nnode_cm1,inode_elmt_cm1,load_cm)
+  !
+  !! outer core
+  !call poisson_load_onlyrho3(2,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_outer_core, &
+  !nglob_outer_core,ibool_outer_core,xstore0_outer_core,ystore0_outer_core, &
+  !zstore0_outer_core,rhostore_outer_core,nnode_oc1,inode_elmt_oc1,load_oc)
+  !
+  !! inner core
+  !call poisson_load_onlyrho3(3,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+  !nglob_inner_core,ibool_inner_core,xstore0_inner_core,ystore0_inner_core, &
+  !zstore0_inner_core,rhostore_inner_core,nnode_ic1,inode_elmt_ic1,load_ic)
+
+  ! crust-mantle
+  call poisson_load_onlyrhoFAST3(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+                                 nglob_crust_mantle,ibool_crust_mantle,storerhojw_cm1,nnode_cm1,inode_elmt_cm1, &
+                                 load_cm)
+
+  ! outer core
+  call poisson_load_onlyrhoFAST3(IREGION_OUTER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_outer_core, &
+                                 nglob_outer_core,ibool_outer_core,storerhojw_oc1,nnode_oc1,inode_elmt_oc1, &
+                                 load_oc)
+
+  ! inner core
+  call poisson_load_onlyrhoFAST3(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+                                 nglob_inner_core,ibool_inner_core,storerhojw_ic1,nnode_ic1,inode_elmt_ic1, &
+                                 load_ic)
+
+  ! infinite
+  ! this region has no contrbution
+
+  !!! assemble stiffness matrices
+  !!! assemble across the MPI processes in a region
+  !!! crust_mantle
+  !!call assemble_MPI_scalar(NPROCTOT_VAL,nnode_cm1, &
+  !!                      load_cm, &
+  !!                      num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+  !!                      nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
+  !!                      my_neighbors_crust_mantle1)
+  !!
+  !!! outer core
+  !!call assemble_MPI_scalar(NPROCTOT_VAL,nnode_oc1, &
+  !!                     load_oc, &
+  !!                      num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
+  !!                      nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+  !!                      my_neighbors_outer_core1)
+  !!
+  !!! inner core
+  !!call assemble_MPI_scalar(NPROCTOT_VAL,nnode_ic1, &
+  !!                      load_ic, &
+  !!                      num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
+  !!                      nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
+  !!                      my_neighbors_inner_core1)
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle
+  load1(gdof_cm1)=load1(gdof_cm1)+load_cm
+
+  ! outer core
+  load1(gdof_oc1)=load1(gdof_oc1)+load_oc
+
+  ! inner core
+  load1(gdof_ic1)=load1(gdof_ic1)+load_ic
+
+  ! infinite
+  ! there is no contribution from infinite region
+
+  load1(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_poisson_rhoload3
+
+!
+!===========================================
+!
+
+  subroutine compute_grav_kl1_load(component)
+
+  ! Computes load for the first gravity kernel that must be solved using SIEM
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_CRUST_MANTLE, &
+                              NGLOB_CRUST_MANTLE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use specfem_par, only: load1, nnode_cm1
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+                                    storejw_cm1,gdof_cm1,inode_elmt_cm1, &
+                                    rho1siem_kl_crust_mantle
+  use gll_library1
+  implicit none
+
+  ! IO:
+  integer :: component ! the component of the kernel to be calculated
+
+  integer,parameter :: ndim = 3,ngnod = 8
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF),lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF), &
+  dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+
+  real(kind=CUSTOM_REAL) :: load_cm(nnode_cm1), evalue(NGLLCUBE_INF), eload(NGLLCUBE_INF)
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,eld(NGLLCUBE_INF),rho(NGLLCUBE_INF)
+
+  ! Based off of poisson_load_solid3FAST
+  load1 = 0.0_CUSTOM_REAL
+  load_cm = zero
+  do i_elmt = 1,nspec_crust_mantle
+    evalue = reshape(rho1siem_kl_crust_mantle(component,1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      eload = eload + storejw_cm1(i,i_elmt)*evalue(i)
+    enddo
+    egdof = inode_elmt_cm1(:,i_elmt)
+    load_cm(egdof)=load_cm(egdof)+eload
+  enddo
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle -  multiply by 4*PI*G! or scaled
+  load1(gdof_cm1)=load1(gdof_cm1) + 4.0_kreal*load_cm
+  load1(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_grav_kl1_load
+
+!
+!===========================================
+!
+
+  subroutine compute_grav_kl2_load(icomp,jcomp)
+
+  ! Computes load for the first gravity kernel that must be solved using SIEM
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_CRUST_MANTLE, &
+    NGLOB_CRUST_MANTLE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use specfem_par, only: load1, nnode_cm1
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+          storejw_cm1,gdof_cm1,inode_elmt_cm1, &
+          rho2siem_kl_crust_mantle
+  use gll_library1
+
+  implicit none
+  ! IO variables
+  integer :: icomp,jcomp, i_elmt, i
+  !Local
+  integer,parameter :: ndim = 3,ngnod = 8
+  real(kind=kreal),parameter :: zero=0.0_kreal
+  integer :: egdof(NGLLCUBE_INF),ignod(ngnod)
+  real(kind=CUSTOM_REAL) :: load_cm(nnode_cm1), evalue(NGLLCUBE_INF), eload(NGLLCUBE_INF)
+
+
+  load1 = 0.0_CUSTOM_REAL
+
+  ! Based off of poisson_load_solid3FAST
+  load_cm = zero
+  do i_elmt = 1,nspec_crust_mantle
+    evalue = reshape(rho2siem_kl_crust_mantle(icomp,jcomp,1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      eload = eload + storejw_cm1(i,i_elmt)*evalue(i)
+    enddo
+    egdof = inode_elmt_cm1(:,i_elmt)
+    load_cm(egdof)=load_cm(egdof)+eload
+  enddo
+  ! assemble across the regions but not MPI
+  ! crust_mantle -  multiply by 4*PI*G! or scaled
+  load1(gdof_cm1)=load1(gdof_cm1) + 4.0_kreal*load_cm
+  load1(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_grav_kl2_load
+
+!
+!===========================================
+!
+
+  subroutine compute_poisson_load()
+
+  use constants_solver, only: IREGION_INNER_CORE,IREGION_CRUST_MANTLE, &
+    NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_INNER_CORE, &
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+    NGLOB_CRUST_MANTLE
+  use specfem_par, only: A_array_rotationL,B_array_rotationL,deltat,it,DT,t0, &
+    scale_t_inv,two_omega_earth,load
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+    storederiv_cm,storerhojw_cm,gdof_cm
+  use specfem_par_outercore, only: displ_outer_core,ibool_outer_core, &
+    storederiv_oc,storerhojw_oc,gdof_oc
+  use specfem_par_innercore, only: displ_inner_core,ibool_inner_core, &
+    storederiv_ic,storerhojw_ic,gdof_ic
+
+  implicit none
+  real(kind=CUSTOM_REAL) :: time
+  real(kind=CUSTOM_REAL) :: load_ic(NGLOB_INNER_CORE),load_oc(NGLOB_OUTER_CORE), &
+  load_cm(NGLOB_CRUST_MANTLE)
+
+  !NGLLCUBE = NGLLX*NGLLY*NGLLZ
+
+  load = 0.0_CUSTOM_REAL
+
+  ! inner core
+  !call poisson_load_solid(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+  !nglob_inner_core,ibool_inner_core,xstore0_inner_core,ystore0_inner_core, &
+  !zstore0_inner_core,rhostore_inner_core,displ_inner_core,load_ic)
+
+  call poisson_load_solidFAST(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+                              nglob_inner_core,ibool_inner_core,storederiv_ic,storerhojw_ic,displ_inner_core, &
+                              load_ic)
+
+  ! crust-mantle
+  !call poisson_load_solid(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+  !nglob_crust_mantle,ibool_crust_mantle,xstore0_crust_mantle,ystore0_crust_mantle, &
+  !zstore0_crust_mantle,rhostore_crust_mantle,displ_crust_mantle,load_cm)
+
+  call poisson_load_solidFAST(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+                              nglob_crust_mantle,ibool_crust_mantle,storederiv_cm,storerhojw_cm, &
+                              displ_crust_mantle,load_cm)
+
+  ! outer core
+  ! time
+  if (CUSTOM_REAL == SIZE_REAL) then
+    time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
+  else
+    time = (dble(it-1)*DT-t0)*scale_t_inv
+  endif
+  !call poisson_load_fluid(time,deltat,two_omega_earth,NSPEC_OUTER_CORE, &
+  !NGLOB_OUTER_CORE,A_array_rotation,B_array_rotation,rhostore_outer_core, &
+  !displ_outer_core,load_oc)
+
+  !call poisson_load_fluidNEW(nspec_outer_core,nglob_outer_core,ibool_outer_core, &
+  !xstore0_outer_core,ystore0_outer_core,zstore0_outer_core,rhostore_outer_core, &
+  !time,deltat,two_omega_earth,A_array_rotationL,B_array_rotationL, &
+  !displ_outer_core,load_oc)
+
+  call poisson_load_fluidNEWFAST(nspec_outer_core,nglob_outer_core,ibool_outer_core, &
+  storederiv_oc,storerhojw_oc,time,deltat,two_omega_earth,A_array_rotationL, &
+  B_array_rotationL,displ_outer_core,load_oc)
+
+  ! infinite
+  ! this region has no contrbution
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle
+  load(gdof_cm)=load(gdof_cm)+load_cm
+
+  ! outer core
+  load(gdof_oc)=load(gdof_oc)+load_oc
+
+  ! inner core
+  load(gdof_ic)=load(gdof_ic)+load_ic
+
+  ! infinite
+  ! there no contribution from infinite region
+
+  load(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_poisson_load
+
+!
+!===========================================
+!
+
+  subroutine compute_poisson_load3()
+
+  use constants_solver, only: IREGION_INNER_CORE,IREGION_CRUST_MANTLE, &
+    NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_INNER_CORE, &
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+    NGLOB_CRUST_MANTLE
+  use specfem_par, only: A_array_rotationL3,B_array_rotationL3,deltat,it,DT,t0, &
+  scale_t_inv,two_omega_earth,load1,nnode_ic1,nnode_oc1,nnode_cm1,nnode_inf1
+  use specfem_par_crustmantle, only: displ_crust_mantle,ibool_crust_mantle, &
+  storederiv_cm1,storerhojw_cm1,gdof_cm1,inode_elmt_cm1
+  use specfem_par_outercore, only: displ_outer_core,ibool_outer_core, &
+  storederiv_oc1,storerhojw_oc1,gdof_oc1,inode_elmt_oc1
+  use specfem_par_innercore, only: displ_inner_core,ibool_inner_core, &
+  storederiv_ic1,storerhojw_ic1,gdof_ic1,inode_elmt_ic1
+
+  implicit none
+  real(kind=CUSTOM_REAL) :: time
+  real(kind=CUSTOM_REAL) :: load_ic(nnode_ic1),load_oc(nnode_oc1), &
+  load_cm(nnode_cm1)
+
+  !NGLLCUBE = NGLLX*NGLLY*NGLLZ
+
+  load1 = 0.0_CUSTOM_REAL
+  ! inner core
+  call poisson_load_solid3FAST(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+                               nglob_inner_core,ibool_inner_core,storederiv_ic1,storerhojw_ic1, &
+                               displ_inner_core,nnode_ic1,inode_elmt_ic1,load_ic)
+
+  ! crust-mantle
+  call poisson_load_solid3FAST(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+                               nglob_crust_mantle,ibool_crust_mantle,storederiv_cm1,storerhojw_cm1, &
+                               displ_crust_mantle,nnode_cm1,inode_elmt_cm1,load_cm)
+
+  ! outer core
+  ! time
+  if (CUSTOM_REAL == SIZE_REAL) then
+    time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
+  else
+    time = (dble(it-1)*DT-t0)*scale_t_inv
+  endif
+  call poisson_load_fluidNEW3FAST(nspec_outer_core,nglob_outer_core, &
+  ibool_outer_core,storederiv_oc1,storerhojw_oc1,time,deltat,two_omega_earth, &
+  A_array_rotationL3,B_array_rotationL3,displ_outer_core,nnode_oc1,inode_elmt_oc1, &
+  load_oc)
+
+  ! infinite
+  ! this region has no contrbution
+
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle
+  load1(gdof_cm1)=load1(gdof_cm1)+load_cm
+
+  ! outer core
+  load1(gdof_oc1)=load1(gdof_oc1)+load_oc
+
+  ! inner core
+  load1(gdof_ic1)=load1(gdof_ic1)+load_ic
+
+
+  load1(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_poisson_load3
+
+!
+!===========================================
+!
+
+  subroutine compute_backward_poisson_load3()
+
+  use constants_solver, only: IREGION_INNER_CORE,IREGION_CRUST_MANTLE, &
+    NGLLX,NGLLY,NGLLZ,NGLLCUBE,NSPEC_INNER_CORE, &
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+    NGLOB_CRUST_MANTLE
+  use specfem_par, only: b_A_array_rotationL3,b_B_array_rotationL3,deltat,it,DT,t0, &
+  scale_t_inv,two_omega_earth,b_load1,nnode_ic1,nnode_oc1,nnode_cm1,nnode_inf1,b_deltat
+  use specfem_par_crustmantle, only: b_displ_crust_mantle,ibool_crust_mantle, &
+  storederiv_cm1,storerhojw_cm1,gdof_cm1,inode_elmt_cm1
+  use specfem_par_outercore, only: b_displ_outer_core,ibool_outer_core, &
+  storederiv_oc1,storerhojw_oc1,gdof_oc1,inode_elmt_oc1
+  use specfem_par_innercore, only: b_displ_inner_core,ibool_inner_core, &
+  storederiv_ic1,storerhojw_ic1,gdof_ic1,inode_elmt_ic1
+
+  implicit none
+  real(kind=CUSTOM_REAL) :: time
+  ! local loads
+  real(kind=CUSTOM_REAL) :: b_load_ic(nnode_ic1),b_load_oc(nnode_oc1), &
+  b_load_cm(nnode_cm1)
+
+  !NGLLCUBE = NGLLX*NGLLY*NGLLZ
+
+  b_load1 = 0.0_CUSTOM_REAL
+  ! inner core
+  call poisson_load_solid3FAST(IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_inner_core, &
+                               nglob_inner_core,ibool_inner_core,storederiv_ic1,storerhojw_ic1, &
+                               b_displ_inner_core,nnode_ic1,inode_elmt_ic1,b_load_ic)
+
+  ! crust-mantle
+  call poisson_load_solid3FAST(IREGION_CRUST_MANTLE,NGLLX,NGLLY,NGLLZ,NGLLCUBE,nspec_crust_mantle, &
+                               nglob_crust_mantle,ibool_crust_mantle,storederiv_cm1,storerhojw_cm1, &
+                               b_displ_crust_mantle,nnode_cm1,inode_elmt_cm1,b_load_cm)
+
+  ! outer core
+  ! time
+  if (CUSTOM_REAL == SIZE_REAL) then
+    time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
+  else
+    time = (dble(it-1)*DT-t0)*scale_t_inv
+  endif
+
+  ! WE - unsure on whether to use b_deltat or deltat
+  ! Note that two_omega_earth is already reversed
+  call poisson_load_fluidNEW3FAST(nspec_outer_core,nglob_outer_core, &
+  ibool_outer_core,storederiv_oc1,storerhojw_oc1,time,b_deltat,two_omega_earth, &
+  b_A_array_rotationL3, b_B_array_rotationL3,b_displ_outer_core,nnode_oc1,inode_elmt_oc1, &
+  b_load_oc)
+
+  ! infinite
+  ! this region has no contrbution
+
+
+  ! assemble across the regions but not MPI
+  ! crust_mantle
+  b_load1(gdof_cm1)=b_load1(gdof_cm1)+b_load_cm
+
+  ! outer core
+  b_load1(gdof_oc1)=b_load1(gdof_oc1)+b_load_oc
+
+  ! inner core
+  b_load1(gdof_ic1)=b_load1(gdof_ic1)+b_load_ic
+
+
+  b_load1(0)=0.0_CUSTOM_REAL
+  return
+
+  end subroutine compute_backward_poisson_load3
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_solid(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,ibool, &
+                                xstore,ystore,zstore,rhostore,disp,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NDIM
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(in) :: rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) :: disp(NDIM,nnode)
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLL),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,rho(NGLL)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+  zetagll(NGLLZ),wzgll(NGLLZ)
+
+  real(kind=kdble) :: gll_weights(NGLL),gll_points(ndim,NGLL), &
+  lagrange_gll(NGLL,NGLL),dlagrange_gll(ndim,NGLL,NGLL), &
+  dshape_hex8(ndim,ngnod,NGLL)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLL),jac(ndim,ndim)
+
+  real(kind=kreal) :: edisp(NDIM,NGLL),eload(NGLL)
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX,NGLLY,NGLLZ,NGLL,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLL,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = 0.0_kreal
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    rho = reshape(rhostore(:,:,:,i_elmt),(/NGLL/))
+    edisp=disp(:,egdof)
+
+    eload = zero
+    do i = 1,NGLL
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gll(:,i,:))
+      eload = eload+rho(i)*dotmat(ndim,NGLL,deriv,edisp)*detjac*gll_weights(i) !matmul(transpose(deriv),edisp)*detjac**gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eload
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  end subroutine poisson_load_solid
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_solid3(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,ibool, &
+                                 xstore,ystore,zstore,rhostore,disp,nnode1,ibool1,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NDIM,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(in) :: rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) :: disp(NDIM,nnode)
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),egdof1(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,rho(NGLLCUBE_INF)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX_INF),wxgll(NGLLX_INF),etagll(NGLLY_INF),wygll(NGLLY_INF), &
+  zetagll(NGLLZ_INF),wzgll(NGLLZ_INF)
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF),lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF), &
+  dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLLCUBE_INF),jac(ndim,ndim)
+
+  real(kind=kreal) :: edisp(NDIM,NGLLCUBE_INF),eload(NGLLCUBE_INF)
+
+  call zwgljd(xigll,wxgll,NGLLX_INF,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY_INF,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ_INF,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-1
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = 0.0_kreal
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof1 = reshape(ibool(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    egdof=ibool1(:,i_elmt)
+    rho = reshape(rhostore(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    edisp=disp(:,egdof1)
+
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gll(:,i,:))
+      eload = eload+rho(i)*dotmat(ndim,NGLLCUBE_INF,deriv,edisp)*detjac*gll_weights(i) !matmul(transpose(deriv),edisp)*detjac**gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eload
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  end subroutine poisson_load_solid3
+
+!
+!===========================================
+!
+
+!TODO: transpose can be avoided here doing so in preintegrate
+
+  subroutine poisson_load_solidFAST(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,ibool, &
+                                    storederiv,storerhojw,disp,load)
+
+  use math_library, only: dotmat
+  use gll_library1
+  use specfem_par, only: NDIM
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  storederiv(NDIM,NGLL,NGLL,nelmt),storerhojw(NGLL,nelmt)
+  real(kind=kreal),intent(in) :: disp(NDIM,nnode)
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ngnod = 8
+  integer :: i,i_elmt
+  integer :: egdof(NGLL)
+  real(kind=kreal) :: deriv(ndim,NGLL),edisp(NDIM,NGLL),eload(NGLL)
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    edisp=disp(:,egdof)
+
+    eload = zero
+    do i = 1,NGLL
+      deriv=storederiv(:,:,i,i_elmt)
+      eload = eload+storerhojw(i,i_elmt)*matmul(transpose(deriv),edisp(:,i))
+      !eload=eload+storerhojw(i,i_elmt)*dotmat(ndim,NGLLCUBE_INF,deriv,edisp)
+    enddo
+    load(egdof)=load(egdof)+eload
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  return
+  end subroutine poisson_load_solidFAST
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_solid3FAST1(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode, &
+                                      ibool,storederiv,storerhojw,disp,nnode1,ibool1,load)
+
+  use math_library, only: dotmat
+  use gll_library1
+  use specfem_par, only: NDIM,lagrange_gll1
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX_INF, &
+  NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  storederiv(NDIM,NGLLCUBE_INF,NGLLCUBE_INF,nelmt), &
+  storerhojw(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) :: disp(NDIM,nnode)
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ngnod = 8
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),egdof1(NGLLCUBE_INF)
+  real(kind=kreal) :: divs,deriv(ndim,NGLLCUBE_INF),edisp(NDIM,NGLLCUBE_INF),eload(NGLLCUBE_INF)
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+
+    egdof1 = reshape(ibool(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    egdof=ibool1(:,i_elmt)
+    edisp=disp(:,egdof1)
+
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      deriv=storederiv(:,:,i,i_elmt)
+      divs = dot_product(deriv(1,:),edisp(1,:))+dot_product(deriv(2,:),edisp(2,:))+&
+      dot_product(deriv(3,:),edisp(3,:)) ! rho should be included here
+      eload = eload+storerhojw(i,i_elmt)*divs*lagrange_gll1(i,:)
+      !eload=eload+storerhojw(i,i_elmt)*dotmat(ndim,NGLLCUBE_INF,deriv,edisp)
+    enddo
+    load(egdof)=load(egdof)+eload
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  return
+  end subroutine poisson_load_solid3FAST1
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_solid3FAST(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,ibool, &
+                                     storederiv,storerhojw,disp,nnode1,ibool1,load)
+
+  use math_library, only: dotmat
+  use gll_library1
+  use specfem_par, only: NDIM
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  storederiv(NDIM,NGLLCUBE_INF,NGLLCUBE_INF,nelmt),storerhojw(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) :: disp(NDIM,nnode)
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ngnod = 8
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),egdof1(NGLLCUBE_INF)
+  real(kind=kreal) :: deriv(ndim,NGLLCUBE_INF),edisp(NDIM,NGLLCUBE_INF),eload(NGLLCUBE_INF)
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+
+    egdof1 = reshape(ibool(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    egdof=ibool1(:,i_elmt)
+    edisp=disp(:,egdof1)
+
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      deriv=storederiv(:,:,i,i_elmt)
+      eload = eload+storerhojw(i,i_elmt)*matmul(transpose(deriv),edisp(:,i))
+      !eload=eload+storerhojw(i,i_elmt)*dotmat(ndim,NGLLCUBE_INF,deriv,edisp)
+    enddo
+    load(egdof)=load(egdof)+eload
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  return
+  end subroutine poisson_load_solid3FAST
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluid(time,deltat,two_omega_earth,NSPEC,NGLOB, &
+                                A_array_rotation,B_array_rotation,rhostore,displfluid,load)
+
+  use constants_solver
+
+  use specfem_par, only: hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy, &
+  hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+  minus_rho_g_over_kappa_fluid,d_ln_density_dr_table,MOVIE_VOLUME
+
+  use specfem_par_outercore, only: xix => xix_outer_core,xiy => xiy_outer_core, &
+  xiz => xiz_outer_core,etax => etax_outer_core,etay => etay_outer_core, &
+  etaz => etaz_outer_core,gammax => gammax_outer_core,gammay => gammay_outer_core, &
+  gammaz => gammaz_outer_core,ibool => ibool_outer_core
+
+  implicit none
+
+  integer :: NSPEC,NGLOB
+
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: A_array_rotation, &
+  B_array_rotation,rhostore
+
+  ! displacement and acceleration
+  real(kind=CUSTOM_REAL),dimension(NGLOB) :: displfluid,load
+
+
+  ! local parameters
+
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+  ! for gravity
+  !integer :: int_radius
+  !double precision :: radius,theta,phi,gxl,gyl,gzl
+  !double precision :: cos_theta,sin_theta,cos_phi,sin_phi
+
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t, &
+  A_rotation,B_rotation,ux_rotation,uy_rotation,dpotentialdx_with_rot, &
+  dpotentialdy_with_rot
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A, &
+  source_euler_B
+
+  integer :: ispec,iglob
+  integer :: i,j,k,l
+
+  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl, &
+  gammazl,jacobianl
+  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l,sum_terms
+
+  !double precision :: grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho
+
+  !  integer :: computed_elements
+  !integer :: num_elements,ispec_p
+  !integer :: iphase
+
+  load = 0.0_CUSTOM_REAL
+  do ispec = 1,NSPEC
+    ! only compute element which belong to current phase (inner or outer elements)
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l = 1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + displfluid(ibool(l,j,k,ispec)) * hprime_xx(i,l)
+            tempx2l = tempx2l + displfluid(ibool(i,l,k,ispec)) * hprime_yy(j,l)
+            tempx3l = tempx3l + displfluid(ibool(i,j,l,ispec)) * hprime_zz(k,l)
+          enddo
+
+          ! get derivatives of velocity potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+          ! compute the jacobian
+          jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl)       &
+                    - xiyl*(etaxl*gammazl-etazl*gammaxl)                          &
+                    + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          dpotentialdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          dpotentialdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          dpotentialdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          ! compute contribution of rotation and add to gradient of potential
+          ! this term has no Z component
+          if (ROTATION_VAL) then
+
+            ! store the source for the Euler scheme for A_rotation and B_rotation
+            two_omega_deltat = deltat * two_omega_earth
+
+            cos_two_omega_t = cos(two_omega_earth*time)
+            sin_two_omega_t = sin(two_omega_earth*time)
+
+            ! time step deltat of Euler scheme is included in the source
+            source_euler_A(i,j,k) = two_omega_deltat                              &
+                  *(cos_two_omega_t*dpotentialdyl+sin_two_omega_t*dpotentialdxl)
+            source_euler_B(i,j,k) = two_omega_deltat                              &
+                  *(sin_two_omega_t*dpotentialdyl-cos_two_omega_t*dpotentialdxl)
+
+            A_rotation = A_array_rotation(i,j,k,ispec)
+            B_rotation = B_array_rotation(i,j,k,ispec)
+
+            ux_rotation =   A_rotation*cos_two_omega_t+B_rotation*sin_two_omega_t
+            uy_rotation = - A_rotation*sin_two_omega_t+B_rotation*cos_two_omega_t
+
+            dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+            dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+          else
+            dpotentialdx_with_rot = dpotentialdxl
+            dpotentialdy_with_rot = dpotentialdyl
+
+          endif  ! end of section with rotation
+
+          tempx1(i,j,k)=jacobianl*(xixl*dpotentialdx_with_rot+xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+          tempx2(i,j,k)=jacobianl*(etaxl*dpotentialdx_with_rot+etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+          tempx3(i,j,k)=jacobianl*(gammaxl*dpotentialdx_with_rot+gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+        enddo
+      enddo
+    enddo
+
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+          tempx1l = 0.0_CUSTOM_REAL
+          tempx2l = 0.0_CUSTOM_REAL
+          tempx3l = 0.0_CUSTOM_REAL
+
+          do l = 1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + tempx1(l,j,k) * hprimewgll_xx(l,i)
+            tempx2l = tempx2l + tempx2(i,l,k) * hprimewgll_yy(l,j)
+            tempx3l = tempx3l + tempx3(i,j,l) * hprimewgll_zz(l,k)
+          enddo
+
+          ! sum contributions from each element to the global mesh and add gravity term
+          sum_terms =-(wgllwgll_yz(j,k)*tempx1l+wgllwgll_xz(i,k)*tempx2l+wgllwgll_xy(i,j)*tempx3l)
+
+          load(ibool(i,j,k,ispec))=load(ibool(i,j,k,ispec))+rhostore(i,j,k,ispec)*sum_terms
+
+        enddo
+      enddo
+    enddo
+
+  enddo ! ispec = 1,NSPEC spectral element loop
+  load=-4.0_CUSTOM_REAL*load
+  return
+
+  end subroutine poisson_load_fluid
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluid3(time,deltat,two_omega_earth,NSPEC,NGLOB, &
+                                 A_array_rotation,B_array_rotation,rhostore,displfluid,nnode1,ibool1,load)
+
+  use constants_solver
+
+  use specfem_par, only: NGLLCUBE_INF,hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx, &
+  hprimewgll_yy,hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+  minus_rho_g_over_kappa_fluid,d_ln_density_dr_table,MOVIE_VOLUME
+
+  use specfem_par_outercore, only: xix => xix_outer_core,xiy => xiy_outer_core, &
+  xiz => xiz_outer_core,etax => etax_outer_core,etay => etay_outer_core, &
+  etaz => etaz_outer_core,gammax => gammax_outer_core,gammay => gammay_outer_core, &
+  gammaz => gammaz_outer_core,ibool => ibool_outer_core
+
+  implicit none
+
+  integer,intent(in) :: NSPEC,NGLOB,nnode1
+  integer,intent(in) :: ibool1(NGLLCUBE_INF,NSPEC)
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: A_array_rotation, &
+  B_array_rotation,rhostore
+
+  ! displacement and acceleration
+  real(kind=CUSTOM_REAL),dimension(NGLOB) :: displfluid,load
+
+
+  ! local parameters
+
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+  ! for gravity
+  !integer :: int_radius
+  !double precision :: radius,theta,phi,gxl,gyl,gzl
+  !double precision :: cos_theta,sin_theta,cos_phi,sin_phi
+
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t, &
+  A_rotation,B_rotation,ux_rotation,uy_rotation,dpotentialdx_with_rot, &
+  dpotentialdy_with_rot
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A, &
+  source_euler_B
+
+  integer :: ispec,iglob
+  integer :: i,j,k,l
+
+  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl, &
+  gammazl,jacobianl
+  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l,sum_terms
+
+  !double precision :: grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho
+
+  !  integer :: computed_elements
+  !integer :: num_elements,ispec_p
+  !integer :: iphase
+
+  load = 0.0_CUSTOM_REAL
+  do ispec = 1,NSPEC
+    ! only compute element which belong to current phase (inner or outer elements)
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l = 1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + displfluid(ibool(l,j,k,ispec)) * hprime_xx(i,l)
+            tempx2l = tempx2l + displfluid(ibool(i,l,k,ispec)) * hprime_yy(j,l)
+            tempx3l = tempx3l + displfluid(ibool(i,j,l,ispec)) * hprime_zz(k,l)
+          enddo
+
+          ! get derivatives of velocity potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+          ! compute the jacobian
+          jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl)       &
+                    - xiyl*(etaxl*gammazl-etazl*gammaxl)                          &
+                    + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          dpotentialdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          dpotentialdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          dpotentialdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          ! compute contribution of rotation and add to gradient of potential
+          ! this term has no Z component
+          if (ROTATION_VAL) then
+
+            ! store the source for the Euler scheme for A_rotation and B_rotation
+            two_omega_deltat = deltat * two_omega_earth
+
+            cos_two_omega_t = cos(two_omega_earth*time)
+            sin_two_omega_t = sin(two_omega_earth*time)
+
+            ! time step deltat of Euler scheme is included in the source
+            source_euler_A(i,j,k) = two_omega_deltat &
+                  * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+            source_euler_B(i,j,k) = two_omega_deltat &
+                  * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+
+            A_rotation = A_array_rotation(i,j,k,ispec)
+            B_rotation = B_array_rotation(i,j,k,ispec)
+
+            ux_rotation =   A_rotation*cos_two_omega_t+B_rotation*sin_two_omega_t
+            uy_rotation = - A_rotation*sin_two_omega_t+B_rotation*cos_two_omega_t
+
+            dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+            dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+          else
+            dpotentialdx_with_rot = dpotentialdxl
+            dpotentialdy_with_rot = dpotentialdyl
+
+          endif  ! end of section with rotation
+
+          tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot+xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+          tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot+etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+          tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot+gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+        enddo
+      enddo
+    enddo
+
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+          tempx1l = 0.0_CUSTOM_REAL
+          tempx2l = 0.0_CUSTOM_REAL
+          tempx3l = 0.0_CUSTOM_REAL
+
+          do l = 1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + tempx1(l,j,k) * hprimewgll_xx(l,i)
+            tempx2l = tempx2l + tempx2(i,l,k) * hprimewgll_yy(l,j)
+            tempx3l = tempx3l + tempx3(i,j,l) * hprimewgll_zz(l,k)
+          enddo
+
+          ! sum contributions from each element to the global mesh and add gravity term
+          sum_terms = - (wgllwgll_yz(j,k)*tempx1l + wgllwgll_xz(i,k)*tempx2l + wgllwgll_xy(i,j)*tempx3l)
+
+          load(ibool(i,j,k,ispec)) = load(ibool(i,j,k,ispec)) + rhostore(i,j,k,ispec)*sum_terms
+
+        enddo
+      enddo
+    enddo
+
+  enddo ! ispec = 1,NSPEC spectral element loop
+  load=-4.0_CUSTOM_REAL*load
+  return
+
+  end subroutine poisson_load_fluid3
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluidNEW(nelmt,nnode,ibool,xstore,ystore,zstore, &
+                                   rhostore,time,deltat,two_omega_earth,A_array_rot,B_array_rot,dispf,load)
+
+  use specfem_par, only: 
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLL,ROTATION_VAL
+
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+
+  implicit none
+
+  integer,intent(in) :: nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) :: xstore(nnode),ystore(nnode),zstore(nnode), &
+  rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=CUSTOM_REAL),intent(in) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLL,nelmt),intent(inout) :: A_array_rot, &
+  B_array_rot
+  real(kind=kreal),intent(in) :: dispf(1,nnode) !\chi
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLL),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac(NGLL),rho(NGLL)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+  zetagll(NGLLZ),wzgll(NGLLZ)
+
+  real(kind=kdble) :: gll_weights(NGLL),gll_points(ndim,NGLL), &
+  lagrange_gll(NGLL,NGLL),dlagrange_gll(ndim,NGLL,NGLL), &
+  dshape_hex8(ndim,ngnod,NGLL)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLL,NGLL),jac(ndim,ndim)
+
+  real(kind=kreal) :: echi(NGLL,1),edisp(ndim,NGLL),eload(NGLL),gradchi(ndim,1)
+  real(kind=kreal) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rot, &
+  B_rot,ux_rot,uy_rot
+  real(kind=kreal),dimension(NGLL) :: source_euler_A,source_euler_B
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX,NGLLY,NGLLZ,NGLL,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLL,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-1
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    rho = reshape(rhostore(:,:,:,i_elmt),(/NGLL/))
+    echi(:,1)=dispf(1,egdof)
+
+    ! compute diaplacement
+    do i = 1,NGLL
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac(i)=determinant(jac)
+      call invert(jac)
+      deriv(:,:,i)=matmul(jac,dlagrange_gll(:,i,:))
+      gradchi = matmul(deriv(:,:,i),echi)
+
+      edisp(:,i)=gradchi(:,1)
+      ! compute contribution of rotation and add to gradient of potential
+      ! this term has no Z component
+      if (ROTATION_VAL) then
+        ! store the source for the Euler scheme for A_rotation and B_rotation
+        two_omega_deltat = deltat*two_omega_earth
+
+        cos_two_omega_t = cos(two_omega_earth*time)
+        sin_two_omega_t = sin(two_omega_earth*time)
+
+        ! time step deltat of Euler scheme is included in the source
+        source_euler_A(i)=two_omega_deltat*(cos_two_omega_t*gradchi(2,1)+         &
+                              sin_two_omega_t*gradchi(1,1))
+        source_euler_B(i)=two_omega_deltat*(sin_two_omega_t*gradchi(2,1)-         &
+                              cos_two_omega_t*gradchi(1,1))
+
+        A_rot=A_array_rot(i,i_elmt)
+        B_rot=B_array_rot(i,i_elmt)
+
+        ux_rot = A_rot*cos_two_omega_t+B_rot*sin_two_omega_t
+        uy_rot=-A_rot*sin_two_omega_t+B_rot*cos_two_omega_t
+
+        edisp(1,i)=edisp(1,i)+ux_rot
+        edisp(2,i)=edisp(2,i)+uy_rot
+      endif
+    enddo
+
+    ! integration
+    eload = zero
+    do i = 1,NGLL
+      eload = eload+rho(i)*dotmat(ndim,NGLL,deriv(:,:,i),edisp)*detjac(i)*gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eload
+
+    ! update rotation term with Euler scheme
+    if (ROTATION_VAL) then
+      ! use the source saved above
+      A_array_rot(:,i_elmt) = A_array_rot(:,i_elmt) + source_euler_A
+      B_array_rot(:,i_elmt) = B_array_rot(:,i_elmt) + source_euler_B
+    endif
+
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  end subroutine poisson_load_fluidNEW
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluidNEW3(nelmt,nnode,ibool,xstore,ystore,zstore, &
+                                    rhostore,time,deltat,two_omega_earth,A_array_rot,B_array_rot,dispf,nnode1, &
+                                    ibool1,load)
+
+  use specfem_par, only: 
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLL,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF, &
+  ROTATION_VAL
+
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+
+  implicit none
+
+  integer,intent(in) :: nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) :: xstore(nnode),ystore(nnode),zstore(nnode), &
+  rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=CUSTOM_REAL),intent(in) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLLCUBE_INF,nelmt),intent(inout) :: A_array_rot, &
+  B_array_rot
+  real(kind=kreal),intent(in) :: dispf(1,nnode) !\chi
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),egdof1(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac(NGLLCUBE_INF),rho(NGLLCUBE_INF)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX_INF),wxgll(NGLLX_INF),etagll(NGLLY_INF),wygll(NGLLY_INF), &
+  zetagll(NGLLZ_INF),wzgll(NGLLZ_INF)
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF), &
+  lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF),dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF), &
+  dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLLCUBE_INF,NGLLCUBE_INF),jac(ndim,ndim)
+
+  real(kind=kreal) :: echi(NGLLCUBE_INF,1),edisp(ndim,NGLLCUBE_INF),eload(NGLLCUBE_INF),gradchi(ndim,1)
+  real(kind=kreal) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rot, &
+  B_rot,ux_rot,uy_rot
+  real(kind=kreal),dimension(NGLLCUBE_INF) :: source_euler_A,source_euler_B
+
+  call zwgljd(xigll,wxgll,NGLLX_INF,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY_INF,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ_INF,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-1
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/))
+    ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof1 = reshape(ibool(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    egdof=ibool1(:,i_elmt)
+    rho = reshape(rhostore(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    echi(:,1)=dispf(1,egdof1)
+
+    ! compute diaplacement
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac(i)=determinant(jac)
+      call invert(jac)
+      deriv(:,:,i)=matmul(jac,dlagrange_gll(:,i,:))
+      gradchi = matmul(deriv(:,:,i),echi)
+
+      edisp(:,i)=gradchi(:,1)
+      ! compute contribution of rotation and add to gradient of potential
+      ! this term has no Z component
+      if (ROTATION_VAL) then
+        ! store the source for the Euler scheme for A_rotation and B_rotation
+        two_omega_deltat = deltat*two_omega_earth
+
+        cos_two_omega_t = cos(two_omega_earth*time)
+        sin_two_omega_t = sin(two_omega_earth*time)
+
+        ! time step deltat of Euler scheme is included in the source
+        source_euler_A(i)=two_omega_deltat*(cos_two_omega_t*gradchi(2,1)+         &
+                              sin_two_omega_t*gradchi(1,1))
+        source_euler_B(i)=two_omega_deltat*(sin_two_omega_t*gradchi(2,1)-         &
+                              cos_two_omega_t*gradchi(1,1))
+
+        A_rot=A_array_rot(i,i_elmt)
+        B_rot=B_array_rot(i,i_elmt)
+
+        ux_rot = A_rot*cos_two_omega_t+B_rot*sin_two_omega_t
+        uy_rot=-A_rot*sin_two_omega_t+B_rot*cos_two_omega_t
+
+        edisp(1,i)=edisp(1,i)+ux_rot
+        edisp(2,i)=edisp(2,i)+uy_rot
+      endif
+    enddo
+
+    ! integration
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      eload = eload+rho(i)*dotmat(ndim,NGLLCUBE_INF,deriv(:,:,i),edisp)*detjac(i)*gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eload
+
+    ! update rotation term with Euler scheme
+    if (ROTATION_VAL) then
+      ! use the source saved above
+      A_array_rot(:,i_elmt) = A_array_rot(:,i_elmt) + source_euler_A
+      B_array_rot(:,i_elmt) = B_array_rot(:,i_elmt) + source_euler_B
+    endif
+
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  end subroutine poisson_load_fluidNEW3
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluidNEWFAST(nelmt,nnode,ibool,storederiv,storerhojw, &
+                                       time,deltat,two_omega_earth,A_array_rot,B_array_rot,dispf,load)
+
+  use math_library, only: dotmat
+  use gll_library1
+  use specfem_par, only: NDIM
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLL,ROTATION_VAL
+
+  implicit none
+
+  integer,intent(in) :: nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) :: storederiv(NDIM,NGLL,NGLL,nelmt), &
+  storerhojw(NGLL,nelmt)
+  real(kind=CUSTOM_REAL),intent(in) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLL,nelmt),intent(inout) :: A_array_rot, &
+  B_array_rot
+  real(kind=kreal),intent(in) :: dispf(1,nnode) !\chi
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLL),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac(NGLL),rho(NGLL)
+  real(kind=kreal) :: deriv(ndim,NGLL,NGLL),echi(NGLL,1),edisp(ndim,NGLL), &
+  eload(NGLL),gradchi(ndim,1)
+  real(kind=kreal) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rot, &
+  B_rot,ux_rot,uy_rot
+  real(kind=kreal),dimension(NGLL) :: source_euler_A,source_euler_B
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  load = zero
+  do i_elmt = 1,nelmt
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    echi(:,1)=dispf(1,egdof)
+
+    ! compute diaplacement
+    do i = 1,NGLL
+      deriv(:,:,i)=storederiv(:,:,i,i_elmt)
+      gradchi = matmul(deriv(:,:,i),echi)
+
+      edisp(:,i)=gradchi(:,1)
+      ! compute contribution of rotation and add to gradient of potential
+      ! this term has no Z component
+      if (ROTATION_VAL) then
+        ! store the source for the Euler scheme for A_rotation and B_rotation
+        two_omega_deltat = deltat*two_omega_earth
+
+        cos_two_omega_t = cos(two_omega_earth*time)
+        sin_two_omega_t = sin(two_omega_earth*time)
+
+        ! time step deltat of Euler scheme is included in the source
+        source_euler_A(i)=two_omega_deltat*(cos_two_omega_t*gradchi(2,1)+         &
+                              sin_two_omega_t*gradchi(1,1))
+        source_euler_B(i)=two_omega_deltat*(sin_two_omega_t*gradchi(2,1)-         &
+                              cos_two_omega_t*gradchi(1,1))
+
+        A_rot=A_array_rot(i,i_elmt)
+        B_rot=B_array_rot(i,i_elmt)
+
+        ux_rot = A_rot*cos_two_omega_t+B_rot*sin_two_omega_t
+        uy_rot=-A_rot*sin_two_omega_t+B_rot*cos_two_omega_t
+
+        edisp(1,i)=edisp(1,i)+ux_rot
+        edisp(2,i)=edisp(2,i)+uy_rot
+      endif
+    enddo
+
+    ! integration
+    eload = zero
+    do i = 1,NGLL
+      eload = eload+storerhojw(i,i_elmt)*matmul(transpose(deriv(:,:,i)),edisp(:,i))
+      !eload=eload+storerhojw(i,i_elmt)*dotmat(ndim,NGLLCUBE_INF,deriv(:,:,i),edisp)
+    enddo
+    load(egdof)=load(egdof)+eload
+
+    ! update rotation term with Euler scheme
+    if (ROTATION_VAL) then
+      ! use the source saved above
+      A_array_rot(:,i_elmt) = A_array_rot(:,i_elmt) + source_euler_A
+      B_array_rot(:,i_elmt) = B_array_rot(:,i_elmt) + source_euler_B
+    endif
+
+  enddo !i_elmt = 1,nelmt
+
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  return
+  end subroutine poisson_load_fluidNEWFAST
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_fluidNEW3FAST(nelmt,nnode,ibool,storederiv,storerhojw, &
+                                        time,deltat,two_omega_earth,A_array_rot,B_array_rot,dispf,nnode1,ibool1,load)
+
+  use math_library, only: dotmat
+  use gll_library1
+  use specfem_par, only: NDIM
+  use constants_solver, only: NGLLX,NGLLY,NGLLZ,NGLL,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF, &
+  ROTATION_VAL
+
+  implicit none
+
+  integer,intent(in) :: nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) :: storederiv(NDIM,NGLLCUBE_INF,NGLLCUBE_INF,nelmt),storerhojw(NGLLCUBE_INF,nelmt)
+  real(kind=CUSTOM_REAL),intent(in) :: time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL),dimension(NGLLCUBE_INF,nelmt),intent(inout) :: A_array_rot, &
+  B_array_rot
+  real(kind=kreal),intent(in) :: dispf(1,nnode) !\chi
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),egdof1(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac(NGLLCUBE_INF),rho(NGLLCUBE_INF)
+  !real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF),lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF), &
+  !dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+  real(kind=kreal) :: deriv(ndim,NGLLCUBE_INF,NGLLCUBE_INF),echi(NGLLCUBE_INF,1),edisp(ndim,NGLLCUBE_INF), &
+  eload(NGLLCUBE_INF),gradchi(ndim,1)
+  real(kind=kreal) :: two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rot,B_rot, &
+  ux_rot,uy_rot
+  real(kind=kreal),dimension(NGLLCUBE_INF) :: source_euler_A,source_euler_B
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  !! compute gauss-lobatto-legendre quadrature information
+  !call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+  !lagrange_gll,dlagrange_gll)
+
+  load = zero
+  do i_elmt = 1,nelmt
+    egdof1 = reshape(ibool(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+    egdof=ibool1(:,i_elmt)
+    echi(:,1)=dispf(1,egdof1)
+
+    ! compute diaplacement
+    do i = 1,NGLLCUBE_INF
+      deriv(:,:,i)=storederiv(:,:,i,i_elmt)
+      gradchi = matmul(deriv(:,:,i),echi)
+
+      edisp(:,i)=gradchi(:,1)
+      ! compute contribution of rotation and add to gradient of potential
+      ! this term has no Z component
+      if (ROTATION_VAL) then
+        ! store the source for the Euler scheme for A_rotation and B_rotation
+        two_omega_deltat = deltat*two_omega_earth
+
+        cos_two_omega_t = cos(two_omega_earth*time)
+        sin_two_omega_t = sin(two_omega_earth*time)
+
+        ! time step deltat of Euler scheme is included in the source
+        source_euler_A(i)=two_omega_deltat*(cos_two_omega_t*gradchi(2,1)+         &
+                              sin_two_omega_t*gradchi(1,1))
+        source_euler_B(i)=two_omega_deltat*(sin_two_omega_t*gradchi(2,1)-         &
+                              cos_two_omega_t*gradchi(1,1))
+
+        A_rot=A_array_rot(i,i_elmt)
+        B_rot=B_array_rot(i,i_elmt)
+
+        ux_rot = A_rot*cos_two_omega_t+B_rot*sin_two_omega_t
+        uy_rot=-A_rot*sin_two_omega_t+B_rot*cos_two_omega_t
+
+        edisp(1,i)=edisp(1,i)+ux_rot
+        edisp(2,i)=edisp(2,i)+uy_rot
+      endif
+    enddo
+
+    ! integration
+    eload = zero
+    do i = 1,NGLLCUBE_INF
+      eload = eload+storerhojw(i,i_elmt)*matmul(transpose(deriv(:,:,i)),edisp(:,i))
+      !eload=eload+storerhojw(i,i_elmt)*dotmat(ndim,NGLLCUBE_INF,deriv(:,:,i),edisp)
+    enddo
+    load(egdof)=load(egdof)+eload
+
+    ! update rotation term with Euler scheme
+    if (ROTATION_VAL) then
+      ! use the source saved above
+      A_array_rot(:,i_elmt) = A_array_rot(:,i_elmt) + source_euler_A
+      B_array_rot(:,i_elmt) = B_array_rot(:,i_elmt) + source_euler_B
+    endif
+
+  enddo !i_elmt = 1,nelmt
+
+  ! multiply by 4*PI*G! or scaled
+  load=-4.0_kreal*load
+  return
+  end subroutine poisson_load_fluidNEW3FAST
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_onlyrho(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode, &
+                                  ibool,xstore,ystore,zstore,rhostore,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(in) :: rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLL),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,eld(NGLL),rho(NGLL)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+  zetagll(NGLLZ),wzgll(NGLLZ)
+
+  real(kind=kdble) :: gll_weights(NGLL),gll_points(ndim,NGLL),lagrange_gll(NGLL,NGLL), &
+  dlagrange_gll(ndim,NGLL,NGLL),dshape_hex8(ndim,ngnod,NGLL)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLL),jac(ndim,ndim)
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX,NGLLY,NGLLZ,NGLL,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLL,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-1
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    rho = reshape(rhostore(:,:,:,i_elmt),(/NGLL/))
+
+    eld = zero
+    do i = 1,NGLL
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      eld = eld+rho(i)*lagrange_gll(i,:)*detjac*gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eld
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load = 4.0_kreal*load
+  end subroutine poisson_load_onlyrho
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_onlyrho3(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode, &
+                                   ibool,xstore,ystore,zstore,rhostore,nnode1,ibool1,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(in) :: rhostore(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,eld(NGLLCUBE_INF),rho(NGLLCUBE_INF)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX_INF),wxgll(NGLLX_INF),etagll(NGLLY_INF),wygll(NGLLY_INF), &
+  zetagll(NGLLZ_INF),wzgll(NGLLZ_INF)
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF),lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF), &
+  dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLLCUBE_INF),jac(ndim,ndim)
+
+  call zwgljd(xigll,wxgll,NGLLX_INF,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY_INF,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ_INF,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !dnx=NGLLX-1; dny=NGLLY-1; dnz=NGLLZ-1
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);             ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);     ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = ibool1(:,i_elmt) !reshape(ibool1(:,:,:,i_elmt),(/NGLLCUBE_INF/))
+    rho = reshape(rhostore(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/)) !WARNING: ONLY for 5 to 3 GLL extraction
+
+    eld = zero
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      eld = eld+rho(i)*lagrange_gll(i,:)*detjac*gll_weights(i)
+    enddo
+    load(egdof)=load(egdof)+eld
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load = 4.0_kreal*load
+  end subroutine poisson_load_onlyrho3
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_onlyrhoFAST(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode, &
+                                      ibool,storerhojw,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) :: storerhojw(NGLL,nelmt)
+  real(kind=kreal),intent(out) :: load(nnode)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLL),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: eld(NGLL)
+
+  real(kind=kdble) :: gll_weights(NGLL),gll_points(ndim,NGLL),lagrange_gll(NGLL,NGLL), &
+  dlagrange_gll(ndim,NGLL,NGLL),dshape_hex8(ndim,ngnod,NGLL)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLL,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    eld = zero
+    do i = 1,NGLL
+      eld = eld+storerhojw(i,i_elmt)*lagrange_gll(i,:)
+    enddo
+    load(egdof)=load(egdof)+eld
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load = 4.0_kreal*load
+  end subroutine poisson_load_onlyrhoFAST
+
+!
+!===========================================
+!
+
+  subroutine poisson_load_onlyrhoFAST3(iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode, &
+                                       ibool,storerhojw,nnode1,ibool1,load)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+  use gll_library1
+  use math_library, only: determinant,dotmat,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  implicit none
+
+  integer,intent(in) :: iregion,NGLLX,NGLLY,NGLLZ,NGLL,nelmt,nnode,nnode1
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt),ibool1(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(in) ::  storerhojw(NGLLCUBE_INF,nelmt)
+  real(kind=kreal),intent(out) :: load(nnode1)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: egdof(NGLLCUBE_INF),ignod(ngnod) !dnx,dny,dnz,
+  real(kind=kreal) :: detjac,eld(NGLLCUBE_INF),rho(NGLLCUBE_INF)
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE_INF),gll_points(ndim,NGLLCUBE_INF),lagrange_gll(NGLLCUBE_INF,NGLLCUBE_INF), &
+  dlagrange_gll(ndim,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(ndim,ngnod,NGLLCUBE_INF)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  !TODO: can store deriv, and detjac*gll_weights(i) for speeding up
+  load = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    egdof = ibool1(:,i_elmt) !reshape(ibool1(:,:,:,i_elmt),(/NGLLCUBE_INF/))
+    eld = zero
+    do i = 1,NGLLCUBE_INF
+      eld = eld+storerhojw(i,i_elmt)*lagrange_gll(i,:)
+    enddo
+    load(egdof)=load(egdof)+eld
+  enddo
+  ! multiply by 4*PI*G! or scaled
+  load = 4.0_kreal*load
+  end subroutine poisson_load_onlyrhoFAST3
+
+!
+!===========================================
+!
+
+  subroutine poisson_gravity(iregion,nelmt,nnode,ibool,xstore,ystore,zstore,pgrav, &
+                             gradphi)
+
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE,IREGION_INNER_CORE,NGLLX,NGLLY,NGLLZ,NGLL
+  use gll_library1
+  use math_library, only: determinant,invert
+  use specfem_par_innercore, only: idoubling_inner_core
+  !use specfem_par_crustmantle, only: rmassz_crust_mantle !TODO: remove this
+  implicit none
+  integer,intent(in) :: iregion,nelmt,nnode
+  integer,intent(in) :: ibool(NGLLX,NGLLY,NGLLZ,nelmt)
+  real(kind=kreal),intent(in) ::  xstore(nnode),ystore(nnode),zstore(nnode)
+  real(kind=kreal),intent(in) :: pgrav(nnode)
+  real(kind=kreal),intent(out) :: gradphi(3,NGLL,nelmt)
+
+  integer,parameter :: ndim = 3,ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: dnx,dny,dnz,egdof(NGLL),ignod(ngnod)
+  real(kind=kreal) :: detjac
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+  zetagll(NGLLZ),wzgll(NGLLZ)
+  real(kind=kdble) :: dshape_hex8(ndim,ngnod,NGLL),gll_weights(NGLL), &
+  gll_points(ndim,NGLL),lagrange_gll(NGLL,NGLL),dlagrange_gll(ndim,NGLL,NGLL)
+
+  real(kind=kreal) :: coord(ngnod,ndim),deriv(ndim,NGLL),jac(ndim,ndim)
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(ndim,ngnod,NGLLX,NGLLY,NGLLZ,NGLL,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(ndim,NGLLX,NGLLY,NGLLZ,NGLL,gll_points,gll_weights, &
+  lagrange_gll,dlagrange_gll)
+
+  dnx = NGLLX-1; dny = NGLLY-1; dnz = NGLLZ-1
+  gradphi = zero
+  do i_elmt = 1,nelmt
+    ! suppress fictitious elements in central cube
+    if (iregion == IREGION_INNER_CORE) then
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    endif
+    !ignod=reshape(ibool(1:NGLLX:dnx,1:NGLLY:dny,1:NGLLZ:dnz,i_elmt),(/ngnod/)) ! this is wrong!!!!
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool(1,1,1,i_elmt);               ignod(2)=ibool(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool(NGLLX,NGLLY,1,i_elmt);       ignod(4)=ibool(1,NGLLY,1,i_elmt)
+    ! top corner nodes
+    ignod(5)=ibool(1,1,NGLLZ,i_elmt);         ignod(6)=ibool(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool(NGLLX,NGLLY,NGLLZ,i_elmt); ignod(8)=ibool(1,NGLLY,NGLLZ,i_elmt)
+    coord(:,1)=xstore(ignod)
+    coord(:,2)=ystore(ignod)
+    coord(:,3)=zstore(ignod)
+    egdof = reshape(ibool(:,:,:,i_elmt),(/NGLL/))
+    do i = 1,NGLL
+      jac = matmul(dshape_hex8(:,:,i),coord) !jac = matmul(der,coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      deriv = matmul(jac,dlagrange_gll(:,i,:)) ! use der for gll
+      gradphi(:,i,i_elmt)=matmul(deriv,pgrav(egdof))
+    enddo
+
+  enddo
+  end subroutine poisson_gravity
+
+
+end module poisson
+
+#endif
diff --git a/src/specfem3D/SIEM_prepare_solver.F90 b/src/specfem3D/SIEM_prepare_solver.F90
new file mode 100644
index 000000000..178072f0c
--- /dev/null
+++ b/src/specfem3D/SIEM_prepare_solver.F90
@@ -0,0 +1,1196 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine SIEM_prepare_solver()
+
+  use specfem_par
+  implicit none
+
+  ! check if anything to do
+  if (.not. FULL_GRAVITY) return
+
+  ! safety stop
+  stop 'FULL_GRAVITY not fully implemented yet'
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+  ! compute and store integration coefficients
+  call SIEM_prepare_solver_preintegrate3()
+  call SIEM_prepare_solver_preintegrate()
+
+  ! sets the stiffness matrices for Poisson's solver
+  ! calculate dprecon
+  ! allocates load, regional pgravs (e.g. pgrav_cm1)
+  call SIEM_prepare_solver_poisson()
+
+  ! create sparse matrix
+  if (SOLVER == PETSC) call SIEM_prepare_solver_sparse()
+
+#endif
+
+  end subroutine SIEM_prepare_solver
+
+!
+!-------------------------------------------------------------------------------
+!
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+  subroutine prepare_solver_preintegrate()
+
+  use specfem_par, only: CUSTOM_REAL,myrank,NDIM,NGLLX,NGLLY,NGLLZ,NGLLCUBE
+
+  use specfem_par_crustmantle, only: ibool_crust_mantle,NSPEC_CRUST_MANTLE, &
+    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+    rhostore_crust_mantle
+
+  use specfem_par_innercore, only: idoubling_inner_core,IFLAG_IN_FICTITIOUS_CUBE, &
+    ibool_inner_core,NSPEC_INNER_CORE,xstore_inner_core,ystore_inner_core, &
+    zstore_inner_core,rhostore_inner_core
+
+  use specfem_par_outercore, only: ibool_outer_core,NSPEC_OUTER_CORE, &
+    xstore_outer_core,ystore_outer_core,zstore_outer_core,rhostore_outer_core
+
+  !use specfem_par_infinite
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE
+
+  use gll_library1, only: kdble,zwgljd,dshape_function_hex8,gll_quadrature
+  use math_library, only: determinant,invert
+
+  use specfem_par_innercore, only: idoubling_inner_core
+
+  use specfem_par_full_gravity, only: lagrange_gll, &
+    storederiv_cm,storerhojw_cm, storedetjac_cm, SAVE_JACOBIAN_ENSIGHT, &
+    storederiv_ic,storerhojw_ic, &
+    storederiv_oc,storerhojw_oc
+
+  implicit none
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt, j, k
+  integer :: ignod(ngnod) !dnx,dny,dnz,
+  real(kind=CUSTOM_REAL) :: detjac,rho(NGLLCUBE)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+
+  real(kind=kdble) :: xigll(NGLLX),wxgll(NGLLX),etagll(NGLLY),wygll(NGLLY), &
+                      zetagll(NGLLZ),wzgll(NGLLZ), detjac_cm_tmp(NGLLCUBE), element_detjac(NGLLX,NGLLY,NGLLZ)
+
+  real(kind=kdble) :: gll_weights(NGLLCUBE),gll_points(NDIM,NGLLCUBE), &
+                      dlagrange_gll(NDIM,NGLLCUBE,NGLLCUBE),dshape_hex8(NDIM,ngnod,NGLLCUBE)
+
+  real(kind=CUSTOM_REAL) :: coord(ngnod,NDIM),deriv(NDIM,NGLLCUBE),jac(NDIM,NDIM)
+
+  call zwgljd(xigll,wxgll,NGLLX,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(NDIM,ngnod,NGLLX,NGLLY,NGLLZ,NGLLCUBE,xigll,etagll, &
+                            zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(NDIM,NGLLX,NGLLY,NGLLZ,NGLLCUBE,gll_points,gll_weights, &
+                      lagrange_gll,dlagrange_gll)
+
+  ! inner core
+  storederiv_ic = 0.0_CUSTOM_REAL
+  storerhojw_ic = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_INNER_CORE
+    ! suppress fictitious elements in central cube
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1) = ibool_inner_core(1,1,1,i_elmt)
+    ignod(2) = ibool_inner_core(NGLLX,1,1,i_elmt)
+    ignod(3) = ibool_inner_core(NGLLX,NGLLY,1,i_elmt)
+    ignod(4) = ibool_inner_core(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5) = ibool_inner_core(1,1,NGLLZ,i_elmt)
+    ignod(6) = ibool_inner_core(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7) = ibool_inner_core(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8) = ibool_inner_core(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore_inner_core(ignod)
+    coord(:,2)=ystore_inner_core(ignod)
+    coord(:,3)=zstore_inner_core(ignod)
+    rho = reshape(rhostore_inner_core(:,:,:,i_elmt),(/NGLLCUBE/))
+
+    do i = 1,NGLLCUBE
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac = determinant(jac)
+      call invert(jac)
+      storederiv_ic(:,:,i,i_elmt) = matmul(jac,dlagrange_gll(:,i,:))
+      storerhojw_ic(i,i_elmt) = rho(i)*detjac*gll_weights(i)
+      enddo
+  enddo
+
+  ! outer core
+  storederiv_oc = 0.0_CUSTOM_REAL
+  storerhojw_oc = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1) = ibool_outer_core(1,1,1,i_elmt)
+    ignod(2) = ibool_outer_core(NGLLX,1,1,i_elmt)
+    ignod(3) = ibool_outer_core(NGLLX,NGLLY,1,i_elmt)
+    ignod(4) = ibool_outer_core(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5) = ibool_outer_core(1,1,NGLLZ,i_elmt)
+    ignod(6) = ibool_outer_core(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7) = ibool_outer_core(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8) = ibool_outer_core(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1) = xstore_outer_core(ignod)
+    coord(:,2) = ystore_outer_core(ignod)
+    coord(:,3) = zstore_outer_core(ignod)
+    rho = reshape(rhostore_outer_core(:,:,:,i_elmt),(/NGLLCUBE/))
+
+    do i = 1,NGLLCUBE
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac = determinant(jac)
+      call invert(jac)
+      storederiv_oc(:,:,i,i_elmt) = matmul(jac,dlagrange_gll(:,i,:))
+      storerhojw_oc(i,i_elmt) = rho(i)*detjac*gll_weights(i)
+    enddo
+  enddo
+
+  ! crust mantle
+  storederiv_cm = 0.0_CUSTOM_REAL
+  storerhojw_cm = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool_crust_mantle(1,1,1,i_elmt)
+    ignod(2)=ibool_crust_mantle(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool_crust_mantle(NGLLX,NGLLY,1,i_elmt)
+    ignod(4)=ibool_crust_mantle(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool_crust_mantle(1,1,NGLLZ,i_elmt)
+    ignod(6)=ibool_crust_mantle(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool_crust_mantle(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8)=ibool_crust_mantle(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore_crust_mantle(ignod)
+    coord(:,2)=ystore_crust_mantle(ignod)
+    coord(:,3)=zstore_crust_mantle(ignod)
+    rho = reshape(rhostore_crust_mantle(:,:,:,i_elmt),(/NGLLCUBE/))
+
+    do i = 1,NGLLCUBE
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      storederiv_cm(:,:,i,i_elmt)=matmul(jac,dlagrange_gll(:,i,:))
+      storerhojw_cm(i,i_elmt)=rho(i)*detjac*gll_weights(i)
+      detjac_cm_tmp(i) = detjac
+    enddo
+
+    if (SAVE_JACOBIAN_ENSIGHT) then
+      element_detjac(:,:,:) = reshape(detjac_cm_tmp(:), (/NGLLX,NGLLY,NGLLZ/) )
+      do k = 1, NGLLZ
+        do j = 1, NGLLY
+          do i = 1, NGLLX
+            storedetjac_cm(ibool_crust_mantle(i,j,k,i_elmt)) = element_detjac(i,j,k)
+          enddo
+        enddo
+      enddo
+    endif
+
+  enddo
+
+  end subroutine prepare_solver_preintegrate
+
+!
+!-------------------------------------------------------------------------------
+!
+
+  subroutine prepare_solver_preintegrate3()
+
+  use specfem_par, only: CUSTOM_REAL,NDIM,NGLLX,NGLLY,NGLLZ,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF
+
+  use specfem_par_crustmantle, only: ibool_crust_mantle,NSPEC_CRUST_MANTLE, &
+    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+    rhostore_crust_mantle,storederiv_cm1,storerhojw_cm1,storejw_cm1
+
+  use specfem_par_innercore, only: idoubling_inner_core,IFLAG_IN_FICTITIOUS_CUBE, &
+    ibool_inner_core,NSPEC_INNER_CORE,xstore_inner_core,ystore_inner_core, &
+    zstore_inner_core,rhostore_inner_core,storederiv_ic1,storerhojw_ic1
+
+  use specfem_par_outercore, only: ibool_outer_core,NSPEC_OUTER_CORE, &
+    xstore_outer_core,ystore_outer_core,zstore_outer_core,rhostore_outer_core, &
+    storederiv_oc1,storerhojw_oc1
+
+  !use specfem_par_infinite
+  use constants_solver, only: IFLAG_IN_FICTITIOUS_CUBE
+
+  use gll_library1, only: kdble,zwgljd,dshape_function_hex8,gll_quadrature
+  use math_library, only: determinant,invert
+
+  use specfem_par_innercore, only: idoubling_inner_core
+
+  use specfem_par_full_gravity, only: lagrange_gll1
+
+  implicit none
+
+  integer,parameter :: ngnod = 8
+
+  integer :: i,i_elmt
+  integer :: ignod(ngnod) !dnx,dny,dnz,
+  real(kind=CUSTOM_REAL) :: detjac,rho(NGLLCUBE_INF)
+
+  real(kind=kdble),parameter :: jalpha=0.0_kdble,jbeta=0.0_kdble,zero=0.0_kdble
+
+  real(kind=kdble) :: xigll(NGLLX_INF),wxgll(NGLLX_INF),etagll(NGLLY_INF),wygll(NGLLY_INF), &
+                      zetagll(NGLLZ_INF),wzgll(NGLLZ_INF)
+
+  real(kind=kdble) :: gll_weights1(NGLLCUBE_INF),gll_points1(NDIM,NGLLCUBE_INF), &
+                      dlagrange_gll1(NDIM,NGLLCUBE_INF,NGLLCUBE_INF),dshape_hex8(NDIM,ngnod,NGLLCUBE_INF)
+
+  real(kind=CUSTOM_REAL) :: coord(ngnod,NDIM),deriv(NDIM,NGLLCUBE_INF),jac(NDIM,NDIM)
+
+  call zwgljd(xigll,wxgll,NGLLX_INF,jalpha,jbeta)
+  call zwgljd(etagll,wygll,NGLLY_INF,jalpha,jbeta)
+  call zwgljd(zetagll,wzgll,NGLLZ_INF,jalpha,jbeta)
+
+  ! get derivatives of shape functions for 8-noded hex
+  call dshape_function_hex8(NDIM,ngnod,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,xigll,etagll, &
+  zetagll,dshape_hex8)
+
+  ! compute gauss-lobatto-legendre quadrature information
+  call gll_quadrature(NDIM,NGLLX_INF,NGLLY_INF,NGLLZ_INF,NGLLCUBE_INF,gll_points1,gll_weights1, &
+  lagrange_gll1,dlagrange_gll1)
+
+  ! inner core
+  storederiv_ic1 = 0.0_CUSTOM_REAL
+  storerhojw_ic1 = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_INNER_CORE
+    ! suppress fictitious elements in central cube
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool_inner_core(1,1,1,i_elmt)
+    ignod(2)=ibool_inner_core(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool_inner_core(NGLLX,NGLLY,1,i_elmt)
+    ignod(4)=ibool_inner_core(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool_inner_core(1,1,NGLLZ,i_elmt)
+    ignod(6)=ibool_inner_core(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool_inner_core(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8)=ibool_inner_core(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore_inner_core(ignod)
+    coord(:,2)=ystore_inner_core(ignod)
+    coord(:,3)=zstore_inner_core(ignod)
+    rho = reshape(rhostore_inner_core(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt),(/NGLLCUBE_INF/))
+
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      storederiv_ic1(:,:,i,i_elmt)=matmul(jac,dlagrange_gll1(:,i,:))
+      storerhojw_ic1(i,i_elmt)=rho(i)*detjac*gll_weights1(i)
+    enddo
+  enddo
+
+  ! outer core
+  storederiv_oc1 = 0.0_CUSTOM_REAL
+  storerhojw_oc1 = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool_outer_core(1,1,1,i_elmt)
+    ignod(2)=ibool_outer_core(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool_outer_core(NGLLX,NGLLY,1,i_elmt)
+    ignod(4)=ibool_outer_core(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool_outer_core(1,1,NGLLZ,i_elmt)
+    ignod(6)=ibool_outer_core(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool_outer_core(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8)=ibool_outer_core(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore_outer_core(ignod)
+    coord(:,2)=ystore_outer_core(ignod)
+    coord(:,3)=zstore_outer_core(ignod)
+    rho = reshape(rhostore_outer_core(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt), (/NGLLCUBE_INF/))
+
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      storederiv_oc1(:,:,i,i_elmt)=matmul(jac,dlagrange_gll1(:,i,:))
+      storerhojw_oc1(i,i_elmt)=rho(i)*detjac*gll_weights1(i)
+    enddo
+  enddo
+
+  ! crust mantle
+  storederiv_cm1 = 0.0_CUSTOM_REAL
+  storerhojw_cm1 = 0.0_CUSTOM_REAL
+  storejw_cm1 = 0.0_CUSTOM_REAL
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    ! EXODUS order NOT indicial order
+    ! bottom corner nodes
+    ignod(1)=ibool_crust_mantle(1,1,1,i_elmt)
+    ignod(2)=ibool_crust_mantle(NGLLX,1,1,i_elmt)
+    ignod(3)=ibool_crust_mantle(NGLLX,NGLLY,1,i_elmt)
+    ignod(4)=ibool_crust_mantle(1,NGLLY,1,i_elmt)
+    ! second-last corner nodes
+    ignod(5)=ibool_crust_mantle(1,1,NGLLZ,i_elmt)
+    ignod(6)=ibool_crust_mantle(NGLLX,1,NGLLZ,i_elmt)
+    ignod(7)=ibool_crust_mantle(NGLLX,NGLLY,NGLLZ,i_elmt)
+    ignod(8)=ibool_crust_mantle(1,NGLLY,NGLLZ,i_elmt)
+
+    coord(:,1)=xstore_crust_mantle(ignod)
+    coord(:,2)=ystore_crust_mantle(ignod)
+    coord(:,3)=zstore_crust_mantle(ignod)
+    rho = reshape(rhostore_crust_mantle(1:NGLLX:2,1:NGLLY:2,1:NGLLZ:2,i_elmt), (/NGLLCUBE_INF/))
+
+    do i = 1,NGLLCUBE_INF
+      jac = matmul(dshape_hex8(:,:,i),coord)
+      detjac=determinant(jac)
+      call invert(jac)
+      storederiv_cm1(:,:,i,i_elmt)=matmul(jac,dlagrange_gll1(:,i,:))
+      storerhojw_cm1(i,i_elmt)=rho(i)*detjac*gll_weights1(i)
+      storejw_cm1(i,i_elmt)=detjac*gll_weights1(i)
+    enddo
+  enddo
+
+  end subroutine prepare_solver_preintegrate3
+
+!
+!-------------------------------------------------------------------------------
+!
+
+! TODO: check this why is it so slow poisson_stiffness is very slow due to loop
+! of jacobian and etc. If we store jacobian before hand it should be faster!
+
+  subroutine prepare_solver_poisson()
+
+  use math_library_mpi, only: maxscal,minscal
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+  use index_region
+  use poisson, only: poisson_stiffness,poisson_stiffnessINF,poisson_stiffness3, &
+  poisson_stiffnessINF3
+
+  implicit none
+
+  if (myrank == 0 ) then
+    write(IMAIN,*) "preparing mass matrices."
+    write(IMAIN,*)
+  endif
+
+  ! indexify regions
+  call get_index_region()
+
+  ! Level-1 solver-------------------
+  allocate(storekmat_crust_mantle1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_CRUST_MANTLE), &
+           dprecon_crust_mantle1(nnode_cm1))
+  allocate(storekmat_outer_core1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_OUTER_CORE), &
+           dprecon_outer_core1(nnode_oc1))
+  allocate(storekmat_inner_core1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_INNER_CORE), &
+           dprecon_inner_core1(nnode_ic1))
+  if (ADD_TRINF) then
+    allocate(storekmat_trinfinite1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_TRINFINITE), &
+             dprecon_trinfinite1(nnode_trinf1))
+  endif
+  allocate(storekmat_infinite1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_INFINITE), &
+           dprecon_infinite1(nnode_inf1))
+
+  allocate(dprecon1(0:neq1),load1(0:neq1),pgrav_ic1(nnode_ic1), &
+  pgrav_oc1(nnode_oc1),pgrav_cm1(nnode_cm1),pgrav_trinf1(nnode_trinf1), &
+  pgrav_inf1(nnode_inf1))
+
+  if (SIMULATION_TYPE == 3) then
+    allocate(b_load1(0:neq1), b_pgrav_ic1(nnode_ic1), b_pgrav_oc1(nnode_oc1), &
+    b_pgrav_cm1(nnode_cm1), b_pgrav_trinf1(nnode_trinf1), b_pgrav_inf1(nnode_inf1))
+  endif
+
+  ! crust mantle
+  call poisson_stiffness3(IREGION_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
+  NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
+  zstore_crust_mantle,nnode_cm1,inode_elmt_cm1,storekmat_crust_mantle1, &
+  dprecon_crust_mantle1)
+
+  ! outer core
+  call poisson_stiffness3(IREGION_OUTER_CORE,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+  ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+  nnode_oc1,inode_elmt_oc1,storekmat_outer_core1,dprecon_outer_core1)
+
+  ! inner core
+  call poisson_stiffness3(IREGION_INNER_CORE,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+  ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+  nnode_ic1,inode_elmt_ic1,storekmat_inner_core1,dprecon_inner_core1)
+
+  if (ADD_TRINF) then
+    ! transition infinite
+    call poisson_stiffness3(IREGION_TRINFINITE,NSPEC_TRINFINITE,NGLOB_TRINFINITE, &
+    ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+    nnode_trinf1,inode_elmt_trinf1,storekmat_trinfinite1,dprecon_trinfinite1)
+  endif
+
+  ! infinite layer
+  call poisson_stiffnessINF3(NSPEC_INFINITE,NGLOB_INFINITE,ibool_infinite, &
+  xstore_infinite,ystore_infinite,zstore_infinite,nnode_inf1,inode_elmt_inf1, &
+  storekmat_infinite1,dprecon_infinite1)
+
+  call sync_all
+
+
+  ! assemble stiffness matrices
+  ! assemble across the MPI processes in a region
+  ! crust_mantle
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_cm1,dprecon_crust_mantle1, &
+       num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+       nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
+       my_neighbors_crust_mantle1)
+
+  ! outer core
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_oc1,dprecon_outer_core1, &
+       num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
+       nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+       my_neighbors_outer_core1)
+
+  ! inner core
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_ic1,dprecon_inner_core1, &
+       num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
+       nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
+       my_neighbors_inner_core1)
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    call assemble_MPI_scalar(NPROCTOT_VAL,nnode_trinf1,dprecon_trinfinite1, &
+         num_interfaces_trinfinite1,max_nibool_interfaces_trinfinite1, &
+         nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1, &
+         my_neighbors_trinfinite1)
+  endif
+
+  ! infinite
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_inf1,dprecon_infinite1, &
+       num_interfaces_infinite1,max_nibool_interfaces_infinite1, &
+       nibool_interfaces_infinite1,ibool_interfaces_infinite1, &
+       my_neighbors_infinite1)
+
+  call sync_all()
+
+  ! assemble across the different regions in a process
+  dprecon1 = zero
+  ! crust_mantle
+  dprecon1(gdof_cm1)=dprecon1(gdof_cm1)+dprecon_crust_mantle1
+
+  ! outer core
+  dprecon1(gdof_oc1)=dprecon1(gdof_oc1)+dprecon_outer_core1
+
+  ! inner core
+  dprecon1(gdof_ic1)=dprecon1(gdof_ic1)+dprecon_inner_core1
+
+  ! transition infinite
+  if (ADD_TRINF) then
+    dprecon1(gdof_trinf1)=dprecon1(gdof_trinf1)+dprecon_trinfinite1
+  endif
+
+  ! infinite
+  dprecon1(gdof_inf1)=dprecon1(gdof_inf1)+dprecon_infinite1
+
+  dprecon1(0)=0.0_CUSTOM_REAL
+
+  call sync_all
+
+  ! invert preconditioner
+  !dprecon1(1:)=1.0_CUSTOM_REAL/dprecon1(1:)
+  !-----------------Level-1 solver
+
+
+  ! Level-2 solver------------------
+  if (SOLVER_5GLL) then
+  allocate(storekmat_crust_mantle(NGLLCUBE,NGLLCUBE,NSPEC_CRUST_MANTLE), &
+           dprecon_crust_mantle(NGLOB_CRUST_MANTLE))
+  allocate(storekmat_outer_core(NGLLCUBE,NGLLCUBE,NSPEC_OUTER_CORE), &
+           dprecon_outer_core(NGLOB_OUTER_CORE))
+  allocate(storekmat_inner_core(NGLLCUBE,NGLLCUBE,NSPEC_INNER_CORE), &
+           dprecon_inner_core(NGLOB_INNER_CORE))
+  if (ADD_TRINF) then
+    allocate(storekmat_trinfinite(NGLLCUBE,NGLLCUBE,NSPEC_TRINFINITE), &
+    dprecon_trinfinite(NGLOB_TRINFINITE))
+  endif
+  allocate(storekmat_infinite(NGLLCUBE,NGLLCUBE,NSPEC_INFINITE), &
+           dprecon_infinite(NGLOB_INFINITE))
+  allocate(dprecon(0:neq),load(0:neq))
+
+  ! better to make dprecon_* local rather than global
+
+  ! crust mantle
+  call poisson_stiffness(IREGION_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
+  NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
+  zstore_crust_mantle,storekmat_crust_mantle,dprecon_crust_mantle)
+  ! outer core
+  call poisson_stiffness(IREGION_OUTER_CORE,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+  ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+  storekmat_outer_core,dprecon_outer_core)
+  ! inner core
+  call poisson_stiffness(IREGION_INNER_CORE,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+  ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+  storekmat_inner_core,dprecon_inner_core)
+
+  ! transition infinite
+
+  if (ADD_TRINF) then
+    call poisson_stiffness(IREGION_TRINFINITE,NSPEC_TRINFINITE,NGLOB_TRINFINITE, &
+    ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+    storekmat_trinfinite,dprecon_trinfinite)
+  endif
+
+  ! infinite layer
+  call poisson_stiffnessINF(NSPEC_INFINITE,NGLOB_INFINITE, &
+  ibool_infinite,xstore_infinite,ystore_infinite,zstore_infinite, &
+  storekmat_infinite,dprecon_infinite)
+
+  call sync_all
+  ! assemble stiffness matrices
+  ! assemble across the MPI processes in a region
+  ! crust_mantle
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_CRUST_MANTLE,dprecon_crust_mantle, &
+  num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+  nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+  my_neighbors_crust_mantle)
+
+  ! outer core
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_OUTER_CORE,dprecon_outer_core, &
+  num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+  nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+  my_neighbors_outer_core)
+
+  ! inner core
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INNER_CORE,dprecon_inner_core, &
+  num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+  nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+  my_neighbors_inner_core)
+
+  ! transition infinite
+
+  if (ADD_TRINF) then
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_TRINFINITE,dprecon_trinfinite, &
+  num_interfaces_trinfinite,max_nibool_interfaces_trinfinite, &
+  nibool_interfaces_trinfinite,ibool_interfaces_trinfinite, &
+  my_neighbors_trinfinite)
+  endif
+
+  ! infinite
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INFINITE,dprecon_infinite, &
+  num_interfaces_infinite,max_nibool_interfaces_infinite, &
+  nibool_interfaces_infinite,ibool_interfaces_infinite, &
+  my_neighbors_infinite)
+
+  call sync_all()
+
+  ! assemble across the different regions in a process
+  dprecon = zero
+  ! crust_mantle
+  dprecon(gdof_cm)=dprecon(gdof_cm)+dprecon_crust_mantle
+
+  ! outer core
+  dprecon(gdof_oc)=dprecon(gdof_oc)+dprecon_outer_core
+
+  ! inner core
+  dprecon(gdof_ic)=dprecon(gdof_ic)+dprecon_inner_core
+
+  ! transition infinite
+
+  if (ADD_TRINF) then
+    dprecon(gdof_trinf)=dprecon(gdof_trinf)+dprecon_trinfinite
+  endif
+
+  ! infinite
+  dprecon(gdof_inf)=dprecon(gdof_inf)+dprecon_infinite
+
+  dprecon(0)=0.0_CUSTOM_REAL
+
+  call sync_all
+  !--------------------Level-2 solver
+  endif ! if (SOLVER_5GLL) then
+  return
+
+  end subroutine prepare_solver_poisson
+
+!
+!-------------------------------------------------------------------------------
+!
+
+! TODO: check this why is it so slow poisson_stiffness is very slow due to loop
+! of jacobian and etc. If we store jacobian before hand it may be faster!
+! DEVELOPER
+!   Hom N Gharti
+! HISTORY
+!   Sep 30,2013
+
+  subroutine prepare_solver_sparse()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_trinfinite
+  use specfem_par_infinite
+  use index_region
+  use math_library, only: i_uniinv
+  use math_library_mpi, only: maxscal,minvec,maxvec
+  implicit none
+  logical :: ismpi
+  integer :: errcode
+  character(len=250) :: errtag
+
+  ! sparse stage 0
+  integer :: i,j,i_elmt,i_count,n,ncount
+  integer :: igdof,jgdof
+  integer :: nmax !,nsparse
+  integer :: nedof_ic,nedof_oc,nedof_cm,nedof_trinf,nedof_inf
+  integer :: gdof_elmt(NEDOF),ggdof_elmt(NEDOF)
+  integer :: nmax1 !,nsparse1
+  integer :: nedof_ic1,nedof_oc1,nedof_cm1,nedof_trinf1,nedof_inf1
+  integer :: gdof_elmt1(NEDOF1),ggdof_elmt1(NEDOF1)
+  integer,allocatable :: imap_ic(:),imap_oc(:),imap_cm(:),imap_trinf(:), &
+  imap_inf(:)
+  integer,allocatable :: ind0(:),iorder(:),row0(:),col0(:),grow0(:),gcol0(:)
+  real(kind=CUSTOM_REAL),allocatable :: kmat0(:)
+  integer,allocatable :: ind1(:),row1(:),col1(:)
+  real(kind=CUSTOM_REAL),allocatable :: kmat1(:)
+
+  integer :: nglob_ic,nglob_oc,nglob_cm,nglob_trinf,nglob_inf
+  integer :: nglob_ic1,nglob_oc1,nglob_cm1,nglob_trinf1,nglob_inf1
+
+  character(len=12) :: spm
+  character(len=60) :: fname
+
+  integer :: nx,ny,nz,nbyte,off0,gmin,gmax
+  integer :: i_bool,ibool,i_s,i0,i1,ier,j_proc
+  logical :: isbig
+
+  ! counting nonzero elements in offdiagonal portion
+  integer :: grow,ig0,ig1,ind,neq_part1
+  integer,allocatable :: nzero_rowoff1(:)
+  integer,allocatable :: igorder(:)
+
+  ismpi = .true.
+
+  if (myrank == 0 ) then
+    write(IMAIN,*) '-------------------- Preparing sparse matrix: --------------------'
+    write(IMAIN,*)
+  endif
+
+  !===============================================================================
+  ! Level-1 solver
+  !===============================================================================
+  ! Number of DOFs per element in each region
+  nedof_ic1 = NEDOFU1+NEDOFPHI1
+  nedof_oc1 = NEDOFCHI1+NEDOFP1+NEDOFPHI1
+  nedof_cm1 = NEDOFU1+NEDOFPHI1
+  nedof_trinf1 = NEDOFPHI1
+  nedof_inf1 = NEDOFPHI1
+
+  ! Maximum DOF in array - number of elements * Element_dof^2
+  nmax1=NSPEC_INNER_CORE*(nedof_ic1*nedof_ic1)+                                &
+       NSPEC_OUTER_CORE*(nedof_oc1*nedof_oc1)+                                 &
+       NSPEC_CRUST_MANTLE*(nedof_cm1*nedof_cm1)+                               &
+       NSPEC_TRINFINITE*(nedof_trinf1*nedof_trinf1)+                           &
+       NSPEC_INFINITE*(nedof_inf1*nedof_inf1)
+
+  allocate(col0(nmax1),row0(nmax1),gcol0(nmax1),grow0(nmax1),kmat0(nmax1))
+  if (myrank == 0) then
+    print *,' -- Elemental DOFs for IC : ', nedof_ic1
+    print *,' -- Maximum DOFs (nmax1)  : ', nmax1
+  endif
+
+  ! Allocate map for each region
+  allocate(imap_ic(nedof_ic1),imap_oc(nedof_oc1),imap_cm(nedof_cm1), &
+  imap_trinf(nedof_trinf1),imap_inf(nedof_inf1))
+
+  ! I THINK THIS SYNTAX MEANS CREATE A RANGE?
+  imap_ic=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
+  imap_oc=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapchi1, imapp1, imapphi1 /)
+  imap_cm=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
+  imap_trinf=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
+  imap_inf=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
+
+  ! read global degrees of freedoms from DATABASE files
+  write(spm,*)myrank
+  fname='DATABASES_MPI/gdof1_proc'//trim(adjustl(spm))
+  open(10,file=fname,action='read',status='old')
+  ! inner core
+  read(10,*)nglob_ic1                   ! Global DOF in inner core
+  allocate(ggdof_ic1(NNDOF,nglob_ic1))
+  read(10,*)ggdof_ic1
+  ! outer core
+  read(10,*)nglob_oc1                   ! Global DOF in outer core
+  allocate(ggdof_oc1(NNDOF,nglob_oc1))
+  read(10,*)ggdof_oc1
+  ! crust mantle
+  read(10,*)nglob_cm1                   ! Global DOF in crust mantle
+  allocate(ggdof_cm1(NNDOF,nglob_cm1))
+  read(10,*)ggdof_cm1
+  ! transition
+  read(10,*)nglob_trinf1                ! Global DOF in transition
+  allocate(ggdof_trinf1(NNDOF,nglob_trinf1))
+  read(10,*)ggdof_trinf1
+  ! infinite elements
+  read(10,*)nglob_inf1                  ! Global DOF in infinite
+  allocate(ggdof_inf1(NNDOF,nglob_inf1))
+  read(10,*)ggdof_inf1
+  close(10)
+
+  ! Find maximum ID (dof value) for any of the regions
+  ngdof1=maxscal(maxval( (/ maxval(ggdof_ic1),maxval(ggdof_oc1), &
+         maxval(ggdof_cm1),maxval(ggdof_trinf1),maxval(ggdof_inf1) /) ))
+  if (myrank == 0) write(*,'(a,i12)')' -- Total global degrees of freedom1: ',ngdof1
+
+
+  ! stage 0: store all elements
+  ncount = 0
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    ! Skip fictitious inner core cube
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+
+
+    ! Note: gdof_ic1 defined in specfem_par_innercore
+    ! Fetch gdof for IC element only on NGLLCUBE_INF points
+    gdof_elmt1 = reshape(gdof_ic1(inode_elmt_ic1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt1 = reshape(ggdof_ic1(:,inode_elmt_ic1(:,i_elmt)),(/NEDOF1/))
+    !if (myrank==0) print *,'ICkmat zeros1:',count(storekmat_inner_core1(:,:,i_elmt)==0.0_CUSTOM_REAL)
+    !if (myrank==0.and.i_elmt==1)print *,'kmat1:',storekmat_inner_core1(1,:,i_elmt)
+    do i = 1,nedof_ic1
+      do j = 1,nedof_ic1
+        igdof = gdof_elmt1(imap_ic(i))
+        jgdof = gdof_elmt1(imap_ic(j))
+        ! If a degree of freedom and a non-zero kmat:
+        if (igdof > 0.and.jgdof > 0.and.storekmat_inner_core1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+          ncount = ncount+1
+          ! Local (MPI?) map?
+          row0(ncount)=igdof
+          col0(ncount)=jgdof
+          ! Global map?
+          grow0(ncount)=ggdof_elmt1(imap_ic(i))
+          gcol0(ncount)=ggdof_elmt1(imap_ic(j))
+        endif
+      enddo
+    enddo
+  enddo
+  call sync_all
+
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    gdof_elmt1 = reshape(gdof_oc1(inode_elmt_oc1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt1 = reshape(ggdof_oc1(:,inode_elmt_oc1(:,i_elmt)),(/NEDOF1/))
+    do i = 1,nedof_oc1
+      do j = 1,nedof_oc1
+        igdof = gdof_elmt1(imap_oc(i))
+        jgdof = gdof_elmt1(imap_oc(j))
+        if (igdof > 0.and.jgdof > 0.and.storekmat_outer_core1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+          !if (myrank==0) write(1111,*) igdof,jgdof,storekmat_outer_core1(i,j,i_elmt)
+          ncount = ncount+1
+          row0(ncount)=igdof
+          col0(ncount)=jgdof
+          grow0(ncount)=ggdof_elmt1(imap_oc(i))
+          gcol0(ncount)=ggdof_elmt1(imap_oc(j))
+        endif
+      enddo
+    enddo
+  enddo
+
+
+  ! crust mantle
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    gdof_elmt1 = reshape(gdof_cm1(inode_elmt_cm1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt1 = reshape(ggdof_cm1(:,inode_elmt_cm1(:,i_elmt)),(/NEDOF1/))
+    !if (myrank==0) print *,'CMkmat zeros1:',count(storekmat_crust_mantle1(:,:,i_elmt)==0.0_CUSTOM_REAL)
+    do i = 1,nedof_cm1
+      do j = 1,nedof_cm1
+        igdof = gdof_elmt1(imap_cm(i))
+        jgdof = gdof_elmt1(imap_cm(j))
+        if (igdof > 0.and.jgdof > 0.and.storekmat_crust_mantle1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+          ncount = ncount+1
+          row0(ncount)=igdof
+          col0(ncount)=jgdof
+          grow0(ncount)=ggdof_elmt1(imap_cm(i))
+          gcol0(ncount)=ggdof_elmt1(imap_cm(j))
+        endif
+      enddo
+    enddo
+  enddo
+
+  ! transition infinite
+  do i_elmt = 1,NSPEC_TRINFINITE
+    gdof_elmt1 = reshape(gdof_trinf1(inode_elmt_trinf1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt1 = reshape(ggdof_trinf1(:,inode_elmt_trinf1(:,i_elmt)),(/NEDOF1/))
+    !if (myrank==0) print *,'TRINFkmat zeros1:',count(storekmat_trinfinite1(:,:,i_elmt)==0.0_CUSTOM_REAL)
+    do i = 1,nedof_trinf1
+      do j = 1,nedof_trinf1
+        igdof = gdof_elmt1(imap_trinf(i))
+        jgdof = gdof_elmt1(imap_trinf(j))
+        if (igdof > 0.and.jgdof > 0.and.storekmat_trinfinite1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+          ncount = ncount+1
+          row0(ncount)=igdof
+          col0(ncount)=jgdof
+          grow0(ncount)=ggdof_elmt1(imap_trinf(i))
+          gcol0(ncount)=ggdof_elmt1(imap_trinf(j))
+        endif
+      enddo
+    enddo
+  enddo
+
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    gdof_elmt1 = reshape(gdof_inf1(inode_elmt_inf1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt1 = reshape(ggdof_inf1(:,inode_elmt_inf1(:,i_elmt)),(/NEDOF1/))
+    !if (myrank==0) print *,'INFkmat zeros1:',count(storekmat_infinite1(:,:,i_elmt)==0.0_CUSTOM_REAL)
+    do i = 1,nedof_inf1
+      do j = 1,nedof_inf1
+        igdof = gdof_elmt1(imap_inf(i))
+        jgdof = gdof_elmt1(imap_inf(j))
+        if (igdof > 0.and.jgdof > 0) then
+          ncount = ncount+1
+          row0(ncount)=igdof
+          col0(ncount)=jgdof
+          grow0(ncount)=ggdof_elmt1(imap_inf(i))
+          gcol0(ncount)=ggdof_elmt1(imap_inf(j))
+        endif
+      enddo
+    enddo
+  enddo
+
+
+  ! stage 1: assemble duplicates
+  ! sort global indices
+  allocate(ind0(ncount),iorder(ncount))
+  ind0 = neq1*(row0(1:ncount)-1)+col0(1:ncount)
+  call i_uniinv(ind0,iorder)
+  nsparse1=maxval(iorder)
+  if (myrank == 0) write(*,'(a,1x,i0,1x,a,1x,i0)')'  neq1:',neq1,' Nsparse1:',nsparse1
+  call sync_all
+  allocate(krow_sparse1(nsparse1),kcol_sparse1(nsparse1))
+  allocate(kgrow_sparse1(nsparse1),kgcol_sparse1(nsparse1))
+
+  !kmat_sparse1=0.0_CUSTOM_REAL
+  krow_sparse1=-1
+  kcol_sparse1=-1
+  kgrow_sparse1=-1
+  kgcol_sparse1=-1
+  do i_count = 1,ncount!nmax
+    krow_sparse1(iorder(i_count))=row0(i_count)
+    kcol_sparse1(iorder(i_count))=col0(i_count)
+    kgrow_sparse1(iorder(i_count))=grow0(i_count)
+    kgcol_sparse1(iorder(i_count))=gcol0(i_count)
+  enddo
+  if (minval(krow_sparse1) < 1.or.minval(kcol_sparse1) < 1.or.                  &
+  minval(kgrow_sparse1) < 1.or.minval(kgcol_sparse1) < 1) then
+    write(*,*) 'ERROR: local and global indices are less than 1!'
+    stop
+  endif
+
+  deallocate(row0,col0,grow0,gcol0,kmat0,ind0,iorder)
+  deallocate(imap_ic,imap_oc,imap_cm,imap_trinf,imap_inf)
+
+
+
+  ! stage 2: assemble across processors
+
+  ! local DOF to global DOF mapping
+  allocate(l2gdof1(0:neq1))
+  l2gdof1=-1
+  l2gdof1(gdof_ic1)=ggdof_ic1(1,:)
+  l2gdof1(gdof_oc1)=ggdof_oc1(1,:)
+  l2gdof1(gdof_cm1)=ggdof_cm1(1,:)
+  l2gdof1(gdof_trinf1)=ggdof_trinf1(1,:)
+  l2gdof1(gdof_inf1)=ggdof_inf1(1,:)
+
+  do i = 1,nsparse1
+    if (kgrow_sparse1(i) /= l2gdof1(krow_sparse1(i)).or.kgcol_sparse1(i) /= l2gdof1(kcol_sparse1(i))) then
+      print *,'VERY STRANGE!!!!!'
+      stop
+    endif
+  enddo
+
+  l2gdof1 = l2gdof1-1 ! PETSC uses 0 indexing
+  gmin = minvec(l2gdof1(1:))
+  gmax = maxvec(l2gdof1(1:))
+  if (myrank == 0) write(*,'(a,1x,i0,1x,i0)')'  l2gdof1 range:',gmin,gmax
+  call sync_all
+  if (minval(l2gdof1(1:)) < 0) then
+    write(*,*) 'ERROR: local-to-global indices are less than 1!'
+    stop
+  endif
+
+  !===============================================================================
+  ! Level-2 solver
+  !===============================================================================
+  if (SOLVER_5GLL) then
+    nedof_ic = NEDOFU+NEDOFPHI
+    nedof_oc = NEDOFCHI+NEDOFP+NEDOFPHI
+    nedof_cm = NEDOFU+NEDOFPHI
+    nedof_trinf = NEDOFPHI
+    nedof_inf = NEDOFPHI
+
+    nmax=NSPEC_INNER_CORE*(nedof_ic*nedof_ic)+                           &
+        NSPEC_OUTER_CORE*(nedof_oc*nedof_oc)+                            &
+        NSPEC_CRUST_MANTLE*(nedof_cm*nedof_cm)+                          &
+        NSPEC_TRINFINITE*(nedof_trinf*nedof_trinf)+                      &
+        NSPEC_INFINITE*(nedof_inf*nedof_inf)
+    allocate(col0(nmax),row0(nmax),gcol0(nmax),grow0(nmax),kmat0(nmax))
+    !allocate(col0(nmax),row0(nmax),gcol0(nmax),grow0(nmax))
+    if (myrank == 0) print *,nedof_ic,nmax
+    allocate(imap_ic(nedof_ic),imap_oc(nedof_oc),imap_cm(nedof_cm), &
+    imap_trinf(nedof_trinf),imap_inf(nedof_inf))
+
+    imap_ic=(/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
+    imap_oc=(/ (i,i = 1,NGLLCUBE) /) !(/ imapchi, imapp, imapphi /)
+    imap_cm=(/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
+    imap_trinf=(/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
+    imap_inf=(/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
+
+    ! read global degrees of freedoms from DATABASE files
+    ! inner core
+    write(spm,*)myrank
+    fname='DATABASES_MPI/gdof_proc'//trim(adjustl(spm))
+    open(10,file=fname,action='read',status='old')
+    read(10,*)nglob_ic !NGLOB_INNER_CORE
+    allocate(ggdof_ic(NNDOF,nglob_ic))
+    read(10,*)ggdof_ic
+    read(10,*)nglob_oc !NGLOB_OUTER_CORE
+    allocate(ggdof_oc(NNDOF,nglob_oc))
+    read(10,*)ggdof_oc
+    read(10,*)nglob_cm !NGLOB_CRUST_MANTLE
+    allocate(ggdof_cm(NNDOF,nglob_cm))
+    read(10,*)ggdof_cm
+    read(10,*)nglob_trinf !NGLOB_TRINFINITE
+    allocate(ggdof_trinf(NNDOF,nglob_trinf))
+    read(10,*)ggdof_trinf
+    read(10,*)nglob_inf !NGLOB_INFINITE
+    allocate(ggdof_inf(NNDOF,nglob_inf))
+    read(10,*)ggdof_inf
+    close(10)
+
+    ngdof=maxscal(maxval( (/ maxval(ggdof_ic),maxval(ggdof_oc),maxval(ggdof_cm), &
+    maxval(ggdof_trinf),maxval(ggdof_inf) /) ))
+    if (myrank == 0) write(*,'(a,i12)')'  Total global degrees of freedom:',ngdof
+
+    ! stage 0: store all elements
+    ncount = 0
+    ! inner core
+    do i_elmt = 1,NSPEC_INNER_CORE
+      if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+      gdof_elmt = reshape(gdof_ic(inode_elmt_ic(:,i_elmt)),(/NEDOF/))
+      ggdof_elmt = reshape(ggdof_ic(:,inode_elmt_ic(:,i_elmt)),(/NEDOF/))
+      !if (myrank==0) print *,'ICkmat zeros:',count(storekmat_inner_core(:,:,i_elmt)==0.0_CUSTOM_REAL)
+      !if (myrank==0.and.i_elmt==1)print *,'kmat:',storekmat_inner_core(1,:,i_elmt)
+      do i = 1,nedof_ic
+        do j = 1,nedof_ic
+          igdof = gdof_elmt(imap_ic(i))
+          jgdof = gdof_elmt(imap_ic(j))
+          if (igdof > 0.and.jgdof > 0.and.storekmat_inner_core(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+            ncount = ncount+1
+            row0(ncount)=igdof
+            col0(ncount)=jgdof
+            grow0(ncount)=ggdof_elmt(imap_ic(i))
+            gcol0(ncount)=ggdof_elmt(imap_ic(j))
+          endif
+        enddo
+      enddo
+    enddo
+    call sync_all
+    ! outer core
+    do i_elmt = 1,NSPEC_OUTER_CORE
+      gdof_elmt = reshape(gdof_oc(inode_elmt_oc(:,i_elmt)),(/NEDOF/))
+      ggdof_elmt = reshape(ggdof_oc(:,inode_elmt_oc(:,i_elmt)),(/NEDOF/))
+      do i = 1,nedof_oc
+        do j = 1,nedof_oc
+          igdof = gdof_elmt(imap_oc(i))
+          jgdof = gdof_elmt(imap_oc(j))
+          if (igdof > 0.and.jgdof > 0.and.storekmat_outer_core(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+            ncount = ncount+1
+            row0(ncount)=igdof
+            col0(ncount)=jgdof
+            grow0(ncount)=ggdof_elmt(imap_oc(i))
+            gcol0(ncount)=ggdof_elmt(imap_oc(j))
+          endif
+        enddo
+      enddo
+    enddo
+    ! crust mantle
+    do i_elmt = 1,NSPEC_CRUST_MANTLE
+      gdof_elmt = reshape(gdof_cm(inode_elmt_cm(:,i_elmt)),(/NEDOF/))
+      ggdof_elmt = reshape(ggdof_cm(:,inode_elmt_cm(:,i_elmt)),(/NEDOF/))
+      !if (myrank==0) print *,'CMkmat zeros:',count(storekmat_crust_mantle(:,:,i_elmt)==0.0_CUSTOM_REAL)
+      do i = 1,nedof_cm
+        do j = 1,nedof_cm
+          igdof = gdof_elmt(imap_cm(i))
+          jgdof = gdof_elmt(imap_cm(j))
+          if (igdof > 0.and.jgdof > 0.and.storekmat_crust_mantle(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+            ncount = ncount+1
+            row0(ncount)=igdof
+            col0(ncount)=jgdof
+            grow0(ncount)=ggdof_elmt(imap_cm(i))
+            gcol0(ncount)=ggdof_elmt(imap_cm(j))
+          endif
+        enddo
+      enddo
+    enddo
+    ! transition infinite
+    do i_elmt = 1,NSPEC_TRINFINITE
+      gdof_elmt = reshape(gdof_trinf(inode_elmt_trinf(:,i_elmt)),(/NEDOF/))
+      ggdof_elmt = reshape(ggdof_trinf(:,inode_elmt_trinf(:,i_elmt)),(/NEDOF/))
+      !if (myrank==0) print *,'TRINFkmat zeros:',count(storekmat_trinfinite(:,:,i_elmt)==0.0_CUSTOM_REAL)
+      do i = 1,nedof_trinf
+        do j = 1,nedof_trinf
+          igdof = gdof_elmt(imap_trinf(i))
+          jgdof = gdof_elmt(imap_trinf(j))
+          if (igdof > 0.and.jgdof > 0.and.storekmat_trinfinite(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
+            ncount = ncount+1
+            row0(ncount)=igdof
+            col0(ncount)=jgdof
+            grow0(ncount)=ggdof_elmt(imap_trinf(i))
+            gcol0(ncount)=ggdof_elmt(imap_trinf(j))
+          endif
+        enddo
+      enddo
+    enddo
+    ! infinite
+    do i_elmt = 1,NSPEC_INFINITE
+      gdof_elmt = reshape(gdof_inf(inode_elmt_inf(:,i_elmt)),(/NEDOF/))
+      ggdof_elmt = reshape(ggdof_inf(:,inode_elmt_inf(:,i_elmt)),(/NEDOF/))
+      !if (myrank==0) print *,'INFkmat zeros:',count(storekmat_infinite(:,:,i_elmt)==0.0_CUSTOM_REAL)
+      do i = 1,nedof_inf
+        do j = 1,nedof_inf
+          igdof = gdof_elmt(imap_inf(i))
+          jgdof = gdof_elmt(imap_inf(j))
+          if (igdof > 0.and.jgdof > 0) then
+            ncount = ncount+1
+            row0(ncount)=igdof
+            col0(ncount)=jgdof
+            grow0(ncount)=ggdof_elmt(imap_inf(i))
+            gcol0(ncount)=ggdof_elmt(imap_inf(j))
+          endif
+        enddo
+      enddo
+    enddo
+
+    ! stage 1: assemble duplicates
+    ! sort global indices
+    allocate(ind0(ncount),iorder(ncount))
+    ind0 = neq*(row0(1:ncount)-1)+col0(1:ncount)
+    call i_uniinv(ind0,iorder)
+    nsparse=maxval(iorder)
+    if (myrank == 0) write(*,'(a,1x,i0,1x,a,1x,i0)')'  neq:',neq,' Nsparse:',nsparse
+
+    allocate(krow_sparse(nsparse),kcol_sparse(nsparse))
+    allocate(kgrow_sparse(nsparse),kgcol_sparse(nsparse))
+
+    krow_sparse=-1
+    kcol_sparse=-1
+    kgrow_sparse=-1
+    kgcol_sparse=-1
+    do i_count = 1,ncount
+      krow_sparse(iorder(i_count))=row0(i_count)
+      kcol_sparse(iorder(i_count))=col0(i_count)
+      kgrow_sparse(iorder(i_count))=grow0(i_count)
+      kgcol_sparse(iorder(i_count))=gcol0(i_count)
+    enddo
+    if (minval(krow_sparse) < 1.or.minval(kcol_sparse) < 1.or.                  &
+    minval(kgrow_sparse) < 1.or.minval(kgcol_sparse) < 1) then
+      write(*,*) 'ERROR: local and global indices are less than 1!'
+      stop
+    endif
+
+
+    deallocate(row0,col0,grow0,gcol0,kmat0,ind0,iorder)
+    deallocate(imap_ic,imap_oc,imap_cm,imap_trinf,imap_inf)
+
+
+    ! stage 2: assemble across processors
+
+    ! local DOF to global DOF mapping
+    allocate(l2gdof(0:neq))
+    l2gdof=-1
+    l2gdof(gdof_ic)=ggdof_ic(1,:)
+    l2gdof(gdof_oc)=ggdof_oc(1,:)
+    l2gdof(gdof_cm)=ggdof_cm(1,:)
+    l2gdof(gdof_trinf)=ggdof_trinf(1,:)
+    l2gdof(gdof_inf)=ggdof_inf(1,:)
+
+    l2gdof = l2gdof-1 ! PETSC uses 0 indexing
+
+    if (myrank == 0) write(*,'(a,1x,i0,1x,i0)')'  l2gdof range:',minval(l2gdof(1:)),maxval(l2gdof(1:))
+    call sync_all
+    if (minval(l2gdof(1:)) < 1) then
+      write(*,*) 'ERROR: local-to-global indices are less than 1!'
+      stop
+    endif
+  endif !if (SOLVER_5GLL) then
+  if (myrank == 0) write(*,'(a)')'--------------------------------------------------'
+
+  return
+
+! not used yet...
+!contains
+!
+!  ! subroutine within the prepare_solver_sparse subroutine
+!  subroutine is_symmetric(myrank,mat)
+!
+!  implicit none
+!  integer,parameter :: kreal = selected_real_kind(15)
+!  integer,intent(in) :: myrank
+!  real(kind=kreal),intent(in) :: mat(:,:)
+!  real(kind=kreal) :: dx
+!  integer :: i,j,n,nc
+!
+!  n=ubound(mat,1)
+!  nc=ubound(mat,2)
+!  if (nc /= n) then
+!    write(*,*) 'ERROR: non-square matrix!',n,nc
+!    stop
+!  endif
+!
+!  do i = 1,n-1
+!    do j = i+1,n
+!      dx = mat(i,j)-mat(j,i)
+!      if (abs(dx) > 1.0e-20_kreal) then
+!        if (myrank == 0) write(*,*) 'ERROR: non-symmetric matrix!',mat(i,j),mat(j,i),dx
+!        stop
+!      endif
+!    enddo
+!  enddo
+!  end subroutine is_symmetric
+
+  end subroutine prepare_solver_sparse
+
+#endif
diff --git a/src/specfem3D/SIEM_solver_mpi.F90 b/src/specfem3D/SIEM_solver_mpi.F90
new file mode 100644
index 000000000..98c9bacd5
--- /dev/null
+++ b/src/specfem3D/SIEM_solver_mpi.F90
@@ -0,0 +1,876 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+! TODO: r0 not necessary
+! collection of solvers
+! REVISION
+!   HNG, Jul 12,2011; HNG, Apr 09,2010
+
+module solver_mpi
+  use constants_solver, only: CUSTOM_REAL,CG_MAXITER,CG_TOL,CG_TOL1
+  use math_library_mpi
+
+contains
+
+!-------------------------------------------------------------------------------
+! petsc SP solver preconditioned conjugate-gradient solver
+
+  subroutine pcpetsc_cg_solver(myid,neq,u_g,f,dprecon_g,cg_iter)
+
+  !use math_library
+  implicit none
+  integer,intent(in) :: myid,neq
+  real(kind=kreal),dimension(0:neq),intent(inout) :: u_g
+  real(kind=kreal),dimension(0:neq),intent(in) :: f,dprecon_g
+  integer,intent(out) :: cg_iter
+
+  real(kind=kreal) :: alpha,beta,rz,maxp,maxu
+  real(kind=kreal),dimension(0:neq) :: kp,p,p_g,r,r0,z,z_g!,r_g
+
+  real(kind=kreal),parameter :: zero=0.0_kreal,zerotol=1.0e-12_kreal
+
+  ! all global array variables are both MPI and regionally assembled.
+  ! local array variables are regionally assembled.
+  ! for MPI assembly of such array, we have to scatter again to region with
+  ! regionally assembled values.
+
+  ! PCG solver
+
+  ! check if RHS is 0
+  if (maxvec(abs(f)) <= zerotol) then
+    u_g = zero
+    return
+  endif
+  kp = zero
+  if (maxval(abs(u_g)) > zero) then
+    call product_stiffness_vector(neq,u_g,kp)
+  endif
+  ! assemble kp across the regions
+  r = f-kp
+  z = dprecon_g*r
+
+  call scatter_and_assemble(neq,z,z_g)
+  p = z
+  ! pcg iteration
+  pcg: do cg_iter = 1,CG_MAXITER
+    call scatter_and_assemble(neq,p,p_g)
+
+    call product_stiffness_vector(neq,p_g,kp)
+
+    rz=dot_product_par(r,z_g)
+    alpha=rz/dot_product_par(p_g,kp)
+    u_g = u_g+alpha*p_g
+
+    maxp = maxvec(abs(p_g)); maxu = maxvec(abs(u_g))
+    !if (abs(alpha)*maxvec(abs(p_g))/maxvec(abs(u_g)) <= CG_TOL) then
+    if (abs(alpha)*maxp/maxu <= CG_TOL) then
+      return
+    endif
+    r0 = r
+    r = r-alpha*kp
+
+    ! solve using single precision petsc solver
+    !z=dprecon_g*r
+    z = dprecon_g*r
+
+
+    call scatter_and_assemble(neq,z,z_g)
+    !beta=dot_product_par(r,z_g)/rz ! Fletcher–Reeves
+    beta=dot_product_par(r-r0,z_g)/rz !  Polak–Ribière
+    p = z+beta*p
+  enddo pcg
+  if (myid == 0) write(*,'(a)')'ERROR: PCG solver doesn''t converge!'
+  call sync_all
+  call close_process
+
+  return
+
+  end subroutine pcpetsc_cg_solver
+
+!
+!===============================================================================
+!
+
+! conjugate-gradient solver
+
+  subroutine cg_solver(myid,neq,u_g,f,cg_iter)
+
+  !use math_library
+  implicit none
+  integer,intent(in) :: myid,neq
+  real(kind=kreal),dimension(0:neq),intent(inout) :: u_g
+  real(kind=kreal),dimension(0:neq),intent(in) :: f
+  integer,intent(out) :: cg_iter
+
+  real(kind=kreal) :: alpha,beta,pkp,rz,maxp,maxu
+  real(kind=kreal),dimension(0:neq) :: kp,p,p_g,r,r0,r_g
+
+  real(kind=kreal),parameter :: zero=0.0_kreal,zerotol=1.0e-30_kreal
+
+  ! all global array variables are both MPI and regionally assembled.
+  ! local array variables are regionally assembled.
+  ! for MPI assembly of such array, we have to scatter again to region with
+  ! regionally assembled values.
+
+  ! PCG solver
+
+  ! check if RHS is 0
+  if (maxvec(abs(f)) <= zerotol) then
+    u_g = zero
+    return
+  endif
+  kp = zero
+  if (maxval(abs(u_g)) > zero) then
+    call product_stiffness_vector(neq,u_g,kp)
+  endif
+  ! assemble kp across the regions
+  r = f-kp
+
+  call scatter_and_assemble(neq,r,r_g)
+
+  p = r
+  ! pcg iteration
+  pcg: do cg_iter = 1,CG_MAXITER
+    call scatter_and_assemble(neq,p,p_g)
+
+    call product_stiffness_vector(neq,p_g,kp)
+
+    rz=dot_product_par(r,r_g)
+    pkp=dot_product_par(p_g,kp)
+    alpha=rz/pkp !rz/dot_product_par(p_g,kp)
+    u_g = u_g+alpha*p_g
+
+    maxp = maxvec(abs(p_g)); maxu = maxvec(abs(u_g))
+    !if (abs(alpha)*maxvec(abs(p_g))/maxvec(abs(u_g)) <= CG_TOL) then
+    if (abs(alpha)*maxp/maxu <= CG_TOL) then
+      return
+    endif
+    r0 = r
+    r = r-alpha*kp
+
+    call scatter_and_assemble(neq,r,r_g)
+    beta=dot_product_par(r,r_g)/rz ! Fletcher–Reeves
+    !beta=dot_product_par(r-r0,r_g)/rz !  Polak–Ribière
+    p = r+beta*p
+    !if (myid==1) write(*,'(i3,f25.18,f25.18,f25.18)') cg_iter,alpha,beta,rz
+  enddo pcg
+  if (myid == 0) write(*,'(a,e14.6)')'ERROR: CG solver doesn''t converge! &
+  &Tolerance:',abs(alpha)*maxp/maxu
+  call sync_all
+  call close_process
+  !close(1111)
+
+  return
+
+  end subroutine cg_solver
+
+!
+!============================================
+!
+
+! conjugate-gradient solver
+
+  subroutine cg_solver3(myid,neq,u_g,f,cg_iter)
+
+  !use math_library
+  implicit none
+  integer,intent(in) :: myid,neq
+  real(kind=kreal),dimension(0:neq),intent(inout) :: u_g
+  real(kind=kreal),dimension(0:neq),intent(in) :: f
+  integer,intent(out) :: cg_iter
+
+  real(kind=kreal) :: alpha,beta,pkp,rz,maxf,maxp,maxu
+  real(kind=kreal),dimension(0:neq) :: kp,p,p_g,r0,r,r_g
+
+  real(kind=kreal),parameter :: zero=0.0_kreal,zerotol=1.0e-30_kreal
+
+
+  ! all global array variables are both MPI and regionally assembled.
+  ! local array variables are regionally assembled.
+  ! for MPI assembly of such array, we have to scatter again to region with
+  ! regionally assembled values.
+
+  ! PCG solver
+  maxf = maxvec(abs(f))
+  if (myid == 0) print *,'load:',maxf !maxvec(abs(f))
+  ! check if RHS is 0
+  if (maxf <= zerotol) then
+    u_g = zero
+    return
+  endif
+  kp = zero
+  if (maxval(abs(u_g)) > zero) then
+    call product_stiffness_vector3(neq,u_g,kp)
+  endif
+  ! assemble kp across the regions
+  r = f-kp
+
+  call scatter_and_assemble3(neq,r,r_g)
+  p = r
+  ! pcg iteration
+  pcg: do cg_iter = 1,CG_MAXITER
+    call scatter_and_assemble3(neq,p,p_g)
+
+    call product_stiffness_vector3(neq,p_g,kp)
+
+    rz=dot_product_par(r,r_g)
+    pkp=dot_product_par(p_g,kp)
+    !if (abs(pkp)==zero)return
+    alpha = rz/pkp
+    u_g = u_g+alpha*p_g
+
+    maxp = maxvec(abs(p_g)); maxu = maxvec(abs(u_g))
+    !if (abs(alpha)*maxvec(abs(p_g))/maxvec(abs(u_g)) <= CG_TOL) then
+    if (abs(alpha)*maxp/maxu <= CG_TOL1) then
+      if (myid == 0) print *,'tol:',abs(alpha)*maxp/maxu
+      return
+    endif
+    r0 = r !PR: Polak–Ribière
+    r = r-alpha*kp
+
+    call scatter_and_assemble3(neq,r,r_g)
+    beta=dot_product_par(r,r_g)/rz ! Fletcher–Reeves
+    !beta=dot_product_par(r-r0,r_g)/rz !PR: Polak–Ribière
+    p = r+beta*p
+  enddo pcg
+  if (myid == 0) write(*,'(a,e14.6)')'ERROR: CG solver doesn''t converge! &
+  &Tolerance:',abs(alpha)*maxp/maxu
+  call sync_all
+  call close_process
+
+  return
+
+  end subroutine cg_solver3
+
+!
+!===============================================================================
+!
+
+! diagonally preconditioned conjugate-gradient solver
+
+  subroutine diagpcg_solver(myid,neq,u_g,f,dprecon_g,cg_iter)
+
+  !use math_library
+  implicit none
+  integer,intent(in) :: myid,neq
+  real(kind=kreal),dimension(0:neq),intent(inout) :: u_g
+  real(kind=kreal),dimension(0:neq),intent(in) :: f,dprecon_g
+  integer,intent(out) :: cg_iter
+
+  real(kind=kreal) :: alpha,beta,rz,maxp,maxu
+  real(kind=kreal),dimension(0:neq) :: kp,p,p_g,r,r0,z,z_g!,r_g
+
+  real(kind=kreal),parameter :: zero=0.0_kreal,zerotol=1.0e-12_kreal
+
+
+  ! all global array variables are both MPI and regionally assembled.
+  ! local array variables are regionally assembled.
+  ! for MPI assembly of such array, we have to scatter again to region with
+  ! regionally assembled values.
+
+  ! PCG solver
+
+  ! check if RHS is 0
+  if (maxvec(abs(f)) <= zerotol) then
+    u_g = zero
+    return
+  endif
+  kp = zero
+  if (maxval(abs(u_g)) > zero) then
+    call product_stiffness_vector(neq,u_g,kp)
+  endif
+  ! assemble kp across the regions
+  r = f-kp
+  z = dprecon_g*r
+
+  call scatter_and_assemble(neq,z,z_g)
+  p = z
+  ! pcg iteration
+  pcg: do cg_iter = 1,CG_MAXITER
+    call scatter_and_assemble(neq,p,p_g)
+
+    call product_stiffness_vector(neq,p_g,kp)
+
+    rz=dot_product_par(r,z_g)
+    alpha=rz/dot_product_par(p_g,kp)
+    u_g = u_g+alpha*p_g
+
+    maxp = maxvec(abs(p_g)); maxu = maxvec(abs(u_g))
+    !if (abs(alpha)*maxvec(abs(p_g))/maxvec(abs(u_g)) <= CG_TOL) then
+    if (abs(alpha)*maxp/maxu <= CG_TOL) then
+      return
+    endif
+    r0 = r
+    r = r-alpha*kp
+    z = dprecon_g*r
+    call scatter_and_assemble(neq,z,z_g)
+    !beta=dot_product_par(r,z_g)/rz ! Fletcher–Reeves
+    beta=dot_product_par(r-r0,z_g)/rz !  Polak–Ribière
+    p = z+beta*p
+  enddo pcg
+  if (myid == 0) write(*,'(a)')'ERROR: PCG solver doesn''t converge!'
+  call sync_all
+  call close_process
+
+  return
+
+  end subroutine diagpcg_solver
+
+!
+!============================================
+!
+
+! diagonally preconditioned conjugate-gradient solver
+
+  subroutine diagpcg_solver3(myid,neq,u_g,f,dprecon_g,cg_iter)
+
+  !use math_library
+  implicit none
+  integer,intent(in) :: myid,neq
+  real(kind=kreal),dimension(0:neq),intent(inout) :: u_g
+  real(kind=kreal),dimension(0:neq),intent(in) :: f,dprecon_g
+  integer,intent(out) :: cg_iter
+
+  real(kind=kreal) :: alpha,beta,rz,maxp,maxu
+  real(kind=kreal),dimension(0:neq) :: kp,p,p_g,r0,r,z,z_g!,r_g
+
+  real(kind=kreal),parameter :: zero=0.0_kreal,zerotol=1.0e-12_kreal
+
+  ! all global array variables are both MPI and regionally assembled.
+  ! local array variables are regionally assembled.
+  ! for MPI assembly of such array, we have to scatter again to region with
+  ! regionally assembled values.
+
+  ! PCG solver
+
+  ! check if RHS is 0
+  if (maxvec(abs(f)) <= zerotol) then
+    u_g = zero
+    print *,'max load:',maxval(abs(f))
+    return
+  endif
+  kp = zero
+  if (maxval(abs(u_g)) > zero) then
+    call product_stiffness_vector3(neq,u_g,kp)
+  endif
+  ! assemble kp across the regions
+  r = f-kp
+  z = dprecon_g*r
+
+  call scatter_and_assemble3(neq,z,z_g)
+
+  p = z
+  ! pcg iteration
+  pcg: do cg_iter = 1,CG_MAXITER
+    call scatter_and_assemble3(neq,p,p_g)
+    !call assemble_ghosts(myid,ngpart,maxngnode,nndof,neq,p,p_g) !,gdof)
+    !print *,'pcg_bp4'
+
+    call product_stiffness_vector3(neq,p_g,kp)
+
+    rz=dot_product_par(r,z_g)
+    alpha=rz/dot_product_par(p_g,kp)
+    u_g = u_g+alpha*p_g
+
+    maxp = maxvec(abs(p_g)); maxu = maxvec(abs(u_g))
+    !if (abs(alpha)*maxvec(abs(p_g))/maxvec(abs(u_g)) <= CG_TOL) then
+    if (abs(alpha)*maxp/maxu <= CG_TOL) then
+      return
+    endif
+    r0 = r !PR: Polak–Ribière
+    r = r-alpha*kp
+    z = dprecon_g*r
+    !print *,'pcg_bp8'
+    call scatter_and_assemble3(neq,z,z_g)
+    beta=dot_product_par(r-r0,z_g)/rz !PR: Polak–Ribière
+    p = z+beta*p
+  enddo pcg
+  if (myid == 0) write(*,'(a)')'ERROR: PCG solver doesn''t converge!'
+  call sync_all
+  call close_process
+
+  return
+
+  end subroutine diagpcg_solver3
+
+!
+!============================================
+!
+
+!TODO: this subroutine can be made non-blocking decomposing inner and outer element
+
+  subroutine product_stiffness_vector(neq,p_g,kp)
+
+  use specfem_par, only: ADD_TRINF,NGLLCUBE,NSPEC_INNER_CORE,NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE, &
+    NSPEC_TRINFINITE,NSPEC_INFINITE,NGLOB_INNER_CORE,NGLOB_OUTER_CORE,NGLOB_CRUST_MANTLE, &
+    NGLOB_TRINFINITE,NGLOB_INFINITE
+  use specfem_par_crustmantle, only: gdof_cm,inode_elmt_cm,storekmat_crust_mantle
+  use specfem_par_outercore, only: gdof_oc,inode_elmt_oc,storekmat_outer_core
+  use specfem_par_innercore, only: gdof_ic,inode_elmt_ic,storekmat_inner_core
+  use specfem_par_trinfinite, only: gdof_trinf,inode_elmt_trinf,storekmat_trinfinite
+  use specfem_par_infinite, only: gdof_inf,inode_elmt_inf,storekmat_infinite
+
+  implicit none
+  integer,intent(in) :: neq
+  real(kind=kreal),intent(in) :: p_g(0:neq)
+  real(kind=kreal),intent(out) :: kp(0:neq)
+
+  real(kind=kreal) :: km(NGLLCUBE,NGLLCUBE),km_trinf(NGLLCUBE,NGLLCUBE),km_inf(NGLLCUBE,NGLLCUBE)
+  real(kind=kreal) :: kp_ic(NGLOB_INNER_CORE),kp_oc(NGLOB_OUTER_CORE), &
+  kp_cm(NGLOB_CRUST_MANTLE),kp_trinf(NGLOB_TRINFINITE),kp_inf(NGLOB_INFINITE)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+  integer :: i_elmt,inode(NGLLCUBE),igdof(NGLLCUBE),inode_trinf(NGLLCUBE),igdof_trinf(NGLLCUBE), &
+  inode_inf(NGLLCUBE),igdof_inf(NGLLCUBE)
+
+  ! inner core
+  kp_ic = zero
+  do i_elmt = 1,NSPEC_INNER_CORE
+    inode=inode_elmt_ic(:,i_elmt)
+    igdof=gdof_ic(inode)
+    km=storekmat_inner_core(:,:,i_elmt)
+    kp_ic(inode)=kp_ic(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! outer core
+  kp_oc = zero
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    inode=inode_elmt_oc(:,i_elmt)
+    igdof=gdof_oc(inode)
+    km=storekmat_outer_core(:,:,i_elmt)
+    kp_oc(inode)=kp_oc(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! crust mantle
+  kp_cm = zero
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    inode=inode_elmt_cm(:,i_elmt)
+    igdof=gdof_cm(inode)
+    km=storekmat_crust_mantle(:,:,i_elmt)
+    kp_cm(inode)=kp_cm(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  kp_trinf = zero
+  do i_elmt = 1,NSPEC_TRINFINITE
+    inode_trinf=inode_elmt_trinf(:,i_elmt)
+    igdof_trinf=gdof_trinf(inode_trinf)
+    km_trinf=storekmat_trinfinite(:,:,i_elmt)
+    kp_trinf(inode_trinf)=kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
+  enddo
+  endif
+
+  ! infinite
+  kp_inf = zero
+  do i_elmt = 1,NSPEC_INFINITE
+    inode_inf=inode_elmt_inf(:,i_elmt)
+    igdof_inf=gdof_inf(inode_inf)
+    km_inf=storekmat_infinite(:,:,i_elmt)
+    kp_inf(inode_inf)=kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
+  enddo
+
+  ! assemble acroos the regions but not across the MPIs
+  kp = zero
+  ! crust_mantle
+  kp(gdof_cm)=kp(gdof_cm)+kp_cm
+
+  ! outer core
+  kp(gdof_oc)=kp(gdof_oc)+kp_oc
+
+  ! inner core
+  kp(gdof_ic)=kp(gdof_ic)+kp_ic
+
+  ! transition infinite
+  if (ADD_TRINF)kp(gdof_trinf)=kp(gdof_trinf)+kp_trinf
+
+  ! infinite
+  kp(gdof_inf)=kp(gdof_inf)+kp_inf
+
+  kp(0)=zero
+
+  return
+
+  end subroutine product_stiffness_vector
+
+!
+!============================================
+!
+
+  subroutine scatter_and_assemble(neq,array,array_g)
+
+  use specfem_par, only: ADD_TRINF,NPROCTOT_VAL,NGLOB_INNER_CORE,NGLOB_OUTER_CORE, &
+  NGLOB_CRUST_MANTLE,NGLOB_TRINFINITE,NGLOB_INFINITE,NGLOB_CRUST_MANTLE, &
+  num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+  nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+  my_neighbors_crust_mantle, &
+  num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+  nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+  my_neighbors_outer_core, &
+  num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+  nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+  my_neighbors_inner_core, &
+  num_interfaces_trinfinite,max_nibool_interfaces_trinfinite, &
+  nibool_interfaces_trinfinite,ibool_interfaces_trinfinite,my_neighbors_trinfinite, &
+  num_interfaces_infinite,max_nibool_interfaces_infinite, &
+  nibool_interfaces_infinite,ibool_interfaces_infinite,my_neighbors_infinite
+  use specfem_par_crustmantle, only: gdof_cm
+  use specfem_par_outercore, only: gdof_oc
+  use specfem_par_innercore, only: gdof_ic
+  use specfem_par_trinfinite, only: gdof_trinf
+  use specfem_par_infinite, only: gdof_inf
+
+  implicit none
+  integer,intent(in) :: neq
+  real(kind=kreal),intent(in) :: array(0:neq)
+  real(kind=kreal),intent(out) :: array_g(0:neq)
+
+  real(kind=kreal) :: array_ic(NGLOB_INNER_CORE),array_oc(NGLOB_OUTER_CORE), &
+  array_cm(NGLOB_CRUST_MANTLE),array_trinf(NGLOB_TRINFINITE),array_inf(NGLOB_INFINITE)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+
+  ! scatter array
+  array_ic=array(gdof_ic)
+  array_oc=array(gdof_oc)
+  array_cm=array(gdof_cm)
+  if (ADD_TRINF)array_trinf = array(gdof_trinf)
+  array_inf=array(gdof_inf)
+
+  ! assemble across the MPI processes in a region
+  ! crust_mantle
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_CRUST_MANTLE,array_cm, &
+  num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+  nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+  my_neighbors_crust_mantle)
+
+  ! outer core
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_OUTER_CORE,array_oc, &
+  num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+  nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+  my_neighbors_outer_core)
+
+  ! inner core
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INNER_CORE,array_ic, &
+  num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+  nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+  my_neighbors_inner_core)
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_TRINFINITE,array_trinf, &
+  num_interfaces_trinfinite,max_nibool_interfaces_trinfinite, &
+  nibool_interfaces_trinfinite,ibool_interfaces_trinfinite,my_neighbors_trinfinite)
+  endif
+
+  ! infinite
+  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INFINITE,array_inf, &
+  num_interfaces_infinite,max_nibool_interfaces_infinite, &
+  nibool_interfaces_infinite,ibool_interfaces_infinite,my_neighbors_infinite)
+
+  ! gather from all regions but not assemble since it is already assembled across
+  ! the regions assemble across the different regions in a process
+  array_g = zero
+  ! crust_mantle
+  array_g(gdof_cm)=array_cm
+
+  ! outer core
+  array_g(gdof_oc)=array_oc
+
+  ! inner core
+  array_g(gdof_ic)=array_ic
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  array_g(gdof_trinf)=array_trinf
+  endif
+
+  ! infinite
+  array_g(gdof_inf)=array_inf
+
+  array_g(0)=zero
+
+  return
+
+  end subroutine scatter_and_assemble
+
+!
+!============================================
+!
+
+! TODO: this subroutine can be made non-blocking decomposing inner and outer
+! element
+
+  subroutine product_stiffness_vector3(neq,p_g,kp)
+
+  use specfem_par, only: ADD_TRINF,NGLLCUBE_INF,NSPEC_INNER_CORE,NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE, &
+  NSPEC_TRINFINITE,NSPEC_INFINITE,nnode_ic1,nnode_oc1,nnode_cm1,nnode_trinf1,nnode_inf1
+  use specfem_par_crustmantle, only: gdof_cm1,inode_elmt_cm1,storekmat_crust_mantle1
+  use specfem_par_outercore, only: gdof_oc1,inode_elmt_oc1,storekmat_outer_core1
+  use specfem_par_innercore, only: gdof_ic1,inode_elmt_ic1,storekmat_inner_core1
+  use specfem_par_trinfinite, only: gdof_trinf1,inode_elmt_trinf1,storekmat_trinfinite1
+  use specfem_par_infinite, only: gdof_inf1,inode_elmt_inf1,storekmat_infinite1
+
+  implicit none
+  integer,intent(in) :: neq
+  real(kind=kreal),intent(in) :: p_g(0:neq)
+  real(kind=kreal),intent(out) :: kp(0:neq)
+
+  real(kind=kreal) :: km(NGLLCUBE_INF,NGLLCUBE_INF),km_trinf(NGLLCUBE_INF,NGLLCUBE_INF),km_inf(NGLLCUBE_INF,NGLLCUBE_INF)
+  real(kind=kreal) :: kp_ic(nnode_ic1),kp_oc(nnode_oc1),kp_cm(nnode_cm1), &
+  kp_trinf(nnode_trinf1),kp_inf(nnode_inf1)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+  integer :: i_elmt,inode(NGLLCUBE_INF),igdof(NGLLCUBE_INF),inode_trinf(NGLLCUBE_INF),igdof_trinf(NGLLCUBE_INF),inode_inf(NGLLCUBE_INF),igdof_inf(NGLLCUBE_INF)
+
+  ! inner core
+  kp_ic = zero
+  do i_elmt = 1,NSPEC_INNER_CORE
+    inode=inode_elmt_ic1(:,i_elmt)
+    igdof=gdof_ic1(inode)
+    km=storekmat_inner_core1(:,:,i_elmt)
+    kp_ic(inode)=kp_ic(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! outer core
+  kp_oc = zero
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    inode=inode_elmt_oc1(:,i_elmt)
+    igdof=gdof_oc1(inode)
+    km=storekmat_outer_core1(:,:,i_elmt)
+    kp_oc(inode)=kp_oc(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! crust mantle
+  kp_cm = zero
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    inode=inode_elmt_cm1(:,i_elmt)
+    igdof=gdof_cm1(inode)
+    km=storekmat_crust_mantle1(:,:,i_elmt)
+    kp_cm(inode)=kp_cm(inode)+matmul(km,p_g(igdof))
+  enddo
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  kp_trinf = zero
+  do i_elmt = 1,NSPEC_TRINFINITE
+    inode_trinf=inode_elmt_trinf1(:,i_elmt)
+    igdof_trinf=gdof_trinf1(inode_trinf)
+    km_trinf=storekmat_trinfinite1(:,:,i_elmt)
+    kp_trinf(inode_trinf)=kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
+  enddo
+  endif
+
+  ! infinite
+  kp_inf = zero
+  do i_elmt = 1,NSPEC_INFINITE
+    inode_inf=inode_elmt_inf1(:,i_elmt)
+    igdof_inf=gdof_inf1(inode_inf)
+    km_inf=storekmat_infinite1(:,:,i_elmt)
+    kp_inf(inode_inf)=kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
+  enddo
+
+  ! assemble acroos the regions but not across the MPIs
+  ! assemble across the different regions in a process
+  kp = zero
+  ! crust_mantle
+  kp(gdof_cm1)=kp(gdof_cm1)+kp_cm
+
+  ! outer core
+  kp(gdof_oc1)=kp(gdof_oc1)+kp_oc
+
+  ! inner core
+  kp(gdof_ic1)=kp(gdof_ic1)+kp_ic
+
+  ! transitio infinite
+  if (ADD_TRINF) then
+  kp(gdof_trinf1)=kp(gdof_trinf1)+kp_trinf
+  endif
+
+  ! infinite
+  kp(gdof_inf1)=kp(gdof_inf1)+kp_inf
+
+  kp(0)=zero
+
+  return
+
+  end subroutine product_stiffness_vector3
+
+!
+!============================================
+!
+
+  subroutine scatter_and_assemble3(neq,array,array_g)
+
+  use specfem_par, only: ADD_TRINF,num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+  nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
+  my_neighbors_crust_mantle1, &
+  num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
+  nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+  my_neighbors_outer_core1, &
+  num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
+  nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
+  my_neighbors_inner_core1, &
+  num_interfaces_trinfinite1,max_nibool_interfaces_trinfinite1, &
+  nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1, &
+  my_neighbors_trinfinite1, &
+  num_interfaces_infinite1,max_nibool_interfaces_infinite1, &
+  nibool_interfaces_infinite1,ibool_interfaces_infinite1, &
+  my_neighbors_infinite1, &
+  NPROCTOT_VAL,nnode_ic1,nnode_oc1,nnode_cm1,nnode_trinf1,nnode_inf1
+
+  use specfem_par_crustmantle, only: gdof_cm1
+  use specfem_par_outercore, only: gdof_oc1
+  use specfem_par_innercore, only: gdof_ic1
+  use specfem_par_trinfinite, only: gdof_trinf1
+  use specfem_par_infinite, only: gdof_inf1
+
+  implicit none
+  integer,intent(in) :: neq
+  real(kind=kreal),intent(in) :: array(0:neq)
+  real(kind=kreal),intent(out) :: array_g(0:neq)
+
+  real(kind=kreal) :: array_ic(nnode_ic1),array_oc(nnode_oc1),array_cm(nnode_cm1), &
+  array_trinf(nnode_trinf1),array_inf(nnode_inf1)
+
+  real(kind=kreal),parameter :: zero=0.0_kreal
+
+  ! scatter array
+  array_ic=array(gdof_ic1)
+  array_oc=array(gdof_oc1)
+  array_cm=array(gdof_cm1)
+  if (ADD_TRINF)array_trinf = array(gdof_trinf1)
+  array_inf=array(gdof_inf1)
+
+  ! assemble across the MPI processes in a region
+  ! crust_mantle
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_cm1,array_cm, &
+  num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+  nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
+  my_neighbors_crust_mantle1)
+
+  ! outer core
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_oc1,array_oc, &
+  num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
+  nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+  my_neighbors_outer_core1)
+
+  ! inner core
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_ic1,array_ic, &
+  num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
+  nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
+  my_neighbors_inner_core1)
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_trinf1,array_trinf, &
+  num_interfaces_trinfinite1,max_nibool_interfaces_trinfinite1, &
+  nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1,my_neighbors_trinfinite1)
+  endif
+
+  ! infinite
+  call assemble_MPI_scalar(NPROCTOT_VAL,nnode_inf1,array_inf, &
+  num_interfaces_infinite1,max_nibool_interfaces_infinite1, &
+  nibool_interfaces_infinite1,ibool_interfaces_infinite1,my_neighbors_infinite1)
+
+  ! gather from all regions but not assemble since it is already assembled across
+  ! the regions assemble across the different regions in a process
+  array_g = zero
+  ! crust_mantle
+  array_g(gdof_cm1)=array_cm
+
+  ! outer core
+  array_g(gdof_oc1)=array_oc
+
+  ! inner core
+  array_g(gdof_ic1)=array_ic
+
+  ! transition infinite
+  if (ADD_TRINF)array_g(gdof_trinf1)=array_trinf
+  ! infinite
+  array_g(gdof_inf1)=array_inf
+
+  array_g(0)=zero
+
+  return
+
+  end subroutine scatter_and_assemble3
+
+!
+!============================================
+!
+
+! interpolate solution to original mesh to compute initial guess
+
+  subroutine interpolate3to5(nelmt,nnode,nnode1,inode_elmt,nmir,inode_map,isgll, &
+                             igll_on,x3,x5)
+
+  use gll_library1, only: gll_quadrature3inNGLL,zwgljd
+  use constants_solver, only: ndim,NGLLX,NGLLX_INF,NGLLCUBE,NGLLCUBE_INF
+  implicit none
+  integer,intent(in) :: nelmt,nnode,nnode1,inode_elmt(NGLLCUBE,nelmt),nmir(nnode), &
+  inode_map(2,nnode),igll_on(NGLLCUBE_INF)
+  logical,intent(in) :: isgll(NGLLCUBE)
+  real(kind=kreal),intent(in) :: x3(nnode1) ! array for 3 GLLX points
+  real(kind=kreal),intent(out) :: x5(nnode) ! aray for 5 GLLX points
+  double precision :: lagrange_gll3inNGLL(NGLLCUBE,27),xigll(NGLLX),wxgll(NGLLX), &
+  xigll1(NGLLX_INF),wxgll1(NGLLX_INF)
+
+  integer :: i_node,ielmt,igll,inode1,inodes1(NGLLCUBE_INF)
+
+  call zwgljd(xigll1,wxgll1,NGLLX_INF,0.d0,0.d0)
+  call zwgljd(xigll,wxgll,NGLLX,0.d0,0.d0)
+  call gll_quadrature3inNGLL(ndim,NGLLX,NGLLCUBE,xigll,xigll1,lagrange_gll3inNGLL)
+
+  ! inner core
+  x5 = 0.0_kreal
+  do i_node = 1,nnode!NGLOB_INNER_CORE
+    inode1=nmir(i_node)
+    ielmt=inode_map(1,i_node)
+    if (ielmt <= 0) then
+      cycle ! skip fictitious nodes
+    endif
+    igll=inode_map(2,i_node)
+    if (isgll(igll)) then
+      x5(i_node)=x3(inode1)
+    else
+      ! interpolate values
+      inodes1 = nmir(inode_elmt(igll_on,ielmt))
+      x5(i_node)=sum(lagrange_gll3inNGLL(igll,:)*x3(inodes1))
+    endif
+  enddo
+  end subroutine interpolate3to5
+
+
+end module solver_mpi
+
+#endif
+
diff --git a/src/specfem3D/SIEM_solver_petsc.F90 b/src/specfem3D/SIEM_solver_petsc.F90
new file mode 100644
index 000000000..7f6093aab
--- /dev/null
+++ b/src/specfem3D/SIEM_solver_petsc.F90
@@ -0,0 +1,1896 @@
+!=====================================================================
+!
+!                       S p e c f e m 3 D  G l o b e
+!                       ----------------------------
+!
+!     Main historical authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!                and CNRS / University of Marseille, France
+!                 (there are currently many more authors!)
+! (c) Princeton University and CNRS / University of Marseille, April 2014
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! TODO: full gravity is not working yet, needs to fully implement solver...
+#ifdef USE_PETSC_NOT_WORKING_YET
+
+!AUTHORS:
+!Hom Nath Gharti
+!Stefano Zhampini
+!REFERENCE:
+!PETSC documentation
+
+!-------------------------------------------------------------------------------
+module solver_petsc
+
+  !#include "petsc/finclude/petscsys.h"
+  !#include "petsc/finclude/petscvec.h"
+  !#include "petsc/finclude/petscvec.h90"
+  !#include "petsc/finclude/petscmat.h"
+  #include "petsc/finclude/petscksp.h"
+  !#include "petsc/finclude/petscpc.h"
+
+  use constants_solver, only: kreal => CUSTOM_REAL,nproc => NPROCTOT_VAL, &
+    KSP_ATOL,KSP_DTOL,KSP_RTOL,KSP_MAXITER, &
+    KSP_ATOL1,KSP_DTOL1,KSP_RTOL1,KSP_MAXITER1
+  use specfem_par, only: myrank
+  use specfem_par, only: neq,ngdof,nsparse,kmat_sparse,krow_sparse,kcol_sparse, &
+    kgrow_sparse,kgcol_sparse
+  use specfem_par, only: neq1,ngdof1,nsparse1,kmat_sparse1,krow_sparse1, &
+    kcol_sparse1,kgrow_sparse1,kgcol_sparse1,l2gdof1
+
+  use math_library_mpi, only: maxvec,minvec
+
+  use specfem_par, only: num_interfaces_inner_core1, &
+    max_nibool_interfaces_inner_core1, &
+    my_neighbors_inner_core1,nibool_interfaces_inner_core1, &
+    ibool_interfaces_inner_core1,num_interfaces_outer_core1, &
+    max_nibool_interfaces_outer_core1,my_neighbors_outer_core1, &
+    nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+    num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+    my_neighbors_crust_mantle1,nibool_interfaces_crust_mantle1, &
+    ibool_interfaces_crust_mantle1,num_interfaces_trinfinite1, &
+    max_nibool_interfaces_trinfinite1,my_neighbors_trinfinite1, &
+    nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1, &
+    num_interfaces_infinite1, &
+    max_nibool_interfaces_infinite1,my_neighbors_infinite1, &
+    nibool_interfaces_infinite1,ibool_interfaces_infinite1
+
+  use petscksp
+
+  implicit none
+
+  PetscInt,parameter :: COMMAND = 0,CG = 1,SUPERLU = 2,MUMPS = 3 ! solver type
+  PetscBool      flg,flg_ch,flg_lu,flg_ilu
+  PetscInt       ival,icntl
+  PetscReal      val
+  ! Level-1 solver
+  Vec              xvec1,bvec1,uvec1,local_vec1
+  Mat              Amat1,Fmat1
+  KSP              ksp1
+  PC               pc1
+  PetscInt         iter1
+  PetscInt         solver_type1 ! solver type
+  ! For communications from local to global
+  VecScatter             pscat1,vscat1
+  ! Stores l2g map info
+  ISLocalToGlobalMapping l2gmap
+  !PetscBool        flg
+
+  ! ADJOINT SIMULATIONS
+  ! FOR NOW WE ASSUME THAT THE FORWARD AND ADJOINT SIMULATIONS ARE SOLVED
+  ! WITH A CG METHOD, AND THAT ONLY LEVEL 1 SOLVER IS USED
+  ! Adjoint Level-1 solver
+  ! notes: no b_uvec1 since it seems to just be created and destroyed
+  !        no bAmat1 since we can use the same stiffness matrix
+  !        I think we can probably use b_pc1 = pc1 but unsure
+  Vec              b_xvec1, b_bvec1, b_local_vec1
+  KSP              b_ksp1
+  Mat              b_Fmat1 ! not used unless MUMPS chosen (not implemented)
+  PC               b_pc1
+  PetscInt         b_iter1
+  PetscInt         b_solver_type1 ! solver type
+  ! For communications from local to global
+  VecScatter             b_pscat1, b_vscat1
+
+
+  ! Level-2 solver
+  Vec              xvec,bvec,uvec
+  Mat              Amat
+  KSP              ksp
+  PC               pc
+  PetscErrorCode   ierr
+  PetscInt         iter
+  PetscInt :: nzeros_max,nzeros_min,nzerosoff_max
+  PetscInt :: ngdof_part1
+  PetscInt :: ig0,ig1
+
+contains
+
+!===============================================================================
+! Level-1 solver
+!===============================================================================
+  subroutine petsc_initialize1()
+  use specfem_par, only: ADD_TRINF,NNDOF, SIMULATION_TYPE
+  use specfem_par_innercore, only: ggdof_ic1
+  use specfem_par_outercore, only: ggdof_oc1
+  use specfem_par_crustmantle, only: ggdof_cm1
+  use specfem_par_trinfinite, only: ggdof_trinf1
+  use specfem_par_infinite, only: ggdof_inf1
+  implicit none
+  Vec         nnzv,nzeror_gvec1,nzeror_dvec1,nzeror_ovec1,iproc_gvec1, &
+              interface_gvec1,ninterface_dvec1,ninterface_ovec1,nself_gvec1
+  PetscInt :: i,istart,iend,n,n1,ncol_part1,nrow_part1
+  PetscInt :: nnzmax,lsize,idxinsert(neq1),ldof1(neq1)
+  PetscInt,allocatable :: nzeros(:),ig_array1(:)
+  PetscScalar,allocatable :: rproc_array1(:)
+  PetscScalar rval,valinsert(neq1),nnzv_v(1)
+  PetscOffset nnzv_i
+  PetscInt, allocatable :: nnz(:)
+  IS global_is,local_is, b_global_is, b_local_is
+
+  PetscInt :: icount,igdof,ind,maxrank0,ng,ng0,ng1,np0
+  PetscInt,allocatable :: inzeror_array1(:),iproc_array1(:),nzeros_row(:)
+  PetscInt,allocatable :: nnzero_diag1(:),nnzero_offdiag1(:)
+  PetscInt,allocatable :: nnzero_diag1r(:),nnzero_offdiag1r(:)
+  PetscScalar,pointer :: nzeror_array1(:),rproc_array(:)
+  PetscScalar,pointer :: nzeror_darray1(:),nzeror_oarray1(:),rnself_array1(:)
+  PetscReal :: fac_ni,max_ni,pmax,pmin,rnid,rnioffd,rnd,rnoffd,rproc,zero
+
+  PetscInt :: ir,ic,igr,igc,ir0,ic0,igr0,igc0
+  PetscInt :: nd,noffd,nid,nioffd
+  PetscInt :: i_bool,i_ndof
+
+  PetscInt :: nibool,ng_interface
+  PetscInt,allocatable :: ibool_interface(:),ig_interface(:),isg_interface(:), &
+                          nself_array1(:)
+  PetscInt, allocatable :: ninterface_darray1(:),ninterface_oarray1(:)
+  PetscScalar,allocatable :: rg_interface(:),rnself_lgarray1(:)
+  PetscScalar,pointer :: rninterface_darray1(:),rninterface_oarray1(:)
+
+  character(len=10) :: char_myrank
+  character(len=60) :: outf_name
+
+
+  if (myrank == 0) write(*,*)
+  if (myrank == 0) write(*,*) ' ---------- Initialise PETSC: ---------- '
+  if (myrank == 0) write(*,*)
+
+
+  call PetscInitialize(PETSC_NULL_CHARACTER,ierr)
+
+  ! count number of nonzeros per row
+  allocate(nzeros(neq1))
+  nzeros = 0
+  do i = 1,nsparse1
+    nzeros(krow_sparse1(i))=nzeros(krow_sparse1(i))+1
+  enddo
+  nzeros_max=maxvec(nzeros)
+  nzeros_min=minvec(nzeros)
+  nzerosoff_max = nzeros_max
+  !nzeros_max=4*nzeros_max
+  !nzeros=nzeros
+  !nzeros=5*nzeros
+  if (myrank == 0) write(*,*) 'nzeros in 1th index:',nzeros(1)
+  if (myrank == 0) write(*,*) 'ngdof1:',ngdof1,' nzeros_max:',nzeros_max,' nzeros_min:', &
+  nzeros_min,count(krow_sparse1 == 1)
+
+  ! precompute ownership range OR partion layout
+  !
+  ng1 = ngdof1/nproc
+  ng0 = ceiling(real(ngdof1)/real(nproc))
+
+  np0 = ngdof1-nproc*ng1
+
+  if (np0 == 0) then
+  ! ng0=ng1
+  ! all processors have equal gdofs
+    ng = ng0
+    ig0 = myrank*ng0 ! 0-based index
+    ig1 = ig0+ng0-1
+  else if (np0 > 0) then
+  ! first np0 processors have ng0 gdofs each and remainging processors have ng1
+  ! gdofs each
+    maxrank0 = np0-1 ! myrank is 0-based
+    if (myrank <= maxrank0) then
+      ng = ng0
+      ig0 = myrank*ng0 ! 0-based index
+      ig1 = ig0+ng0-1
+    else !myrank > maxrank0
+      ng = ng1
+      ig0=np0*ng0+(myrank-np0)*ng1 ! 0-based index
+      ig1 = ig0+ng1-1
+    endif
+  else
+  ! Error
+    write(*,*) 'ERROR: illegal value of "np0"!'
+    stop
+  endif
+  !if (myrank==0) write(*,*) 'OK0:',ng0,ng1,ng,ig0,ig1
+  !call sync_all
+  allocate(nzeros_row(ng))
+  nzeros_row = 0
+  !if (myrank==0) then
+  !  open(1,file='test_file_proc1',action='write',status='replace')
+  !  write(1,*)ng,ig0,ig1
+  !endif
+  do i = 1,nsparse1
+    if (kgrow_sparse1(i)-1 >= ig0 .and. kgrow_sparse1(i)-1 <= ig1) then
+      ind=kgrow_sparse1(i)-ig0 ! Fortran indexing
+      !if (myrank==0) write(1,*) ind,kgrow_sparse1(i)
+      nzeros_row(ind)=nzeros_row(ind)+1
+    endif
+  enddo
+  !if (myrank==0) close(1)
+  !nzeros_row=2*nzeros_row
+  !if (myrank==0) write(*,*) 'OK1:',nzeros_row(1),minval(nzeros_row),maxval(nzeros_row)
+  !call sync_all
+  write(char_myrank,'(i4)')myrank
+  !outf_name='precomp_nonzeros'//trim(adjustl(char_myrank))
+  !open(1,file=outf_name,action='write',status='replace')
+  !write(1,'(i4)')nzeros_row
+  !close(1)
+
+  call VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE,ngdof1,xvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,bvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,uvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,nzeror_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,nzeror_dvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,nzeror_ovec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,iproc_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,interface_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,nself_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,ninterface_dvec1,ierr)
+  CHKERRA(ierr)
+  call VecDuplicate(xvec1,ninterface_ovec1,ierr)
+  CHKERRA(ierr)
+
+  if (SIMULATION_TYPE == 3) then
+    ! Create backward xvector and associated b vec:
+    call VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE,ngdof1,b_xvec1,ierr)
+    CHKERRA(ierr)
+    call VecDuplicate(b_xvec1,b_bvec1,ierr)
+    CHKERRA(ierr)
+  endif
+
+
+  ! loval vector
+  call VecCreateSeq(PETSC_COMM_SELF,neq1,local_vec1,ierr)
+  if (SIMULATION_TYPE == 3) then
+    call VecCreateSeq(PETSC_COMM_SELF,neq1,b_local_vec1,ierr)
+  endif
+
+  ! objects needed for global vector scattering to local vector
+  ! create local and global IS (index set) objects from the array of local and
+  ! global indices
+  call ISCreateGeneral(PETSC_COMM_WORLD,neq1,l2gdof1(1:),PETSC_COPY_VALUES, &
+  global_is,ierr)
+  CHKERRA(ierr)
+  call ISCreateStride(PETSC_COMM_SELF,neq1,0,1,local_is,ierr);
+  CHKERRA(ierr)
+
+  if (SIMULATION_TYPE == 3) then
+    call ISCreateGeneral(PETSC_COMM_WORLD,neq1,l2gdof1(1:),PETSC_COPY_VALUES, &
+    b_global_is,ierr)
+    CHKERRA(ierr)
+    call ISCreateStride(PETSC_COMM_SELF,neq1,0,1,b_local_is,ierr);
+    CHKERRA(ierr)
+  endif
+
+
+
+  ! create VecScatter object which is needed to scatter PETSc parallel vectors
+  call VecScatterCreate(bvec1,global_is,local_vec1,local_is,vscat1,ierr)
+  CHKERRA(ierr)
+
+  if (SIMULATION_TYPE == 3) then
+    call VecScatterCreate(b_bvec1, b_global_is, b_local_vec1, b_local_is, b_vscat1, ierr)
+    CHKERRA(ierr)
+  endif
+  call ISDestroy(global_is,ierr) ! no longer necessary
+  call ISDestroy(local_is,ierr)  ! no longer necessary
+  call ISDestroy(b_global_is,ierr) ! no longer necessary
+  call ISDestroy(b_local_is,ierr)  ! no longer necessary
+
+
+  ! assign owner processor ID to each gdof (or row)
+  allocate(ig_array1(ng),rproc_array1(ng))
+  ig_array1=(/ (i,i = ig0,ig1) /)
+  !rproc=real(myrank)
+  rproc_array1=real(myrank)
+  call VecSetValues(iproc_gvec1,ng,ig_array1,rproc_array1,INSERT_VALUES,ierr);
+  CHKERRA(ierr)
+  deallocate(ig_array1,rproc_array1)
+  call VecAssemblyBegin(iproc_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecAssemblyEnd(iproc_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecMin(iproc_gvec1,PETSC_NULL_INTEGER,pmin,ierr)
+  call VecMax(iproc_gvec1,PETSC_NULL_INTEGER,pmax,ierr)
+  if (myrank == 0) write(*,*) 'iproc range',pmin,pmax; call sync_all
+  !call VecGetArrayF90(iproc_gvec1,rproc_array,ierr)
+  !CHKERRA(ierr)
+  !allocate(iproc_array(ng))
+  !iproc_array=int(rproc_array(1:n))
+  !call VecRestoreArrayF90(iproc_gvec1,rproc_array,ierr)
+  !CHKERRA(ierr)
+  ! copy solution to local array
+  allocate(iproc_array1(neq1),rproc_array1(neq1))
+  call scatter_globalvec1(iproc_gvec1, rproc_array1)
+  iproc_array1=int(rproc_array1)
+  if (myrank == 0) write(*,*) 'vector3 iproc',minval(iproc_array1),maxval(iproc_array1)
+  call sync_all
+  !!TODO: use local scatter
+  !call VecScatterCreateToAll(iproc_gvec1,pscat1,iproc_garray,ierr);
+  !call VecScatterBegin(pscat1,iproc_gvec1,iproc_garray,INSERT_VALUES,SCATTER_FORWARD,ierr);
+  !call VecScatterEnd(pscat1,iproc_gvec1,iproc_garray,INSERT_VALUES,SCATTER_FORWARD,ierr);
+  !call VecScatterDestroy(pscat1);
+
+  ! assign interface ID to each gdofs
+  rval = 1.0
+  ! inner core
+  do i = 1,num_interfaces_inner_core1
+      nibool=nibool_interfaces_inner_core1(i)
+      allocate(ibool_interface(nibool))
+      ibool_interface=ibool_interfaces_inner_core1(1:nibool,i)
+      !ng_interface=nibool*NNDOF
+      !allocate(ig_interface(ng_interface),rg_interface(ng_interface))
+      !ig_interface=reshape(ggdof_ic1(1,ibool_interface), (/ ng_interface /) )
+      !ig_interface=ig_interface-1
+      !rg_interface=1.0
+      !call VecSetValues(interface_gvec1,ng_interface,ig_interface,rg_interface,INSERT_VALUES,ierr);
+      !deallocate(ibool_interface,ig_interface,rg_interface)
+      do i_bool = 1,nibool
+        do i_ndof = 1,NNDOF
+          igdof = ggdof_ic1(i_ndof,ibool_interface(i_bool))-1
+          if (igdof >= 0) call VecSetValues(interface_gvec1,1,igdof,rval, &
+          INSERT_VALUES,ierr);
+        enddo
+      enddo
+      deallocate(ibool_interface)
+  enddo
+  !if (myrank==0) write(*,*) 'OK'
+  !call sync_all
+  !! stop all the MPI processes, and exit
+  !call MPI_FINALIZE(ierr)
+
+  ! outer core
+  do i = 1,num_interfaces_outer_core1
+      nibool=nibool_interfaces_outer_core1(i)
+      allocate(ibool_interface(nibool))
+      ibool_interface=ibool_interfaces_outer_core1(1:nibool,i)
+      !ng_interface=nibool*NNDOF
+      !allocate(ig_interface(ng_interface),rg_interface(ng_interface))
+      !ig_interface=reshape(ggdof_ic1(1,ibool_interface), (/ ng_interface /) )
+      !ig_interface=ig_interface-1
+      !rg_interface=1.0
+      !call VecSetValues(interface_gvec1,ng_interface,ig_interface,rg_interface,INSERT_VALUES,ierr);
+      !deallocate(ibool_interface,ig_interface,rg_interface)
+      do i_bool = 1,nibool
+        do i_ndof = 1,NNDOF
+          igdof = ggdof_oc1(i_ndof,ibool_interface(i_bool))-1
+          if (igdof >= 0) call VecSetValues(interface_gvec1,1,igdof,rval, &
+          INSERT_VALUES,ierr);
+        enddo
+      enddo
+      deallocate(ibool_interface)
+  enddo
+
+  ! crust mantle
+  do i = 1,num_interfaces_crust_mantle1
+      nibool=nibool_interfaces_crust_mantle1(i)
+      allocate(ibool_interface(nibool))
+      ibool_interface=ibool_interfaces_crust_mantle1(1:nibool,i)
+      !ng_interface=nibool*NNDOF
+      !allocate(ig_interface(ng_interface),rg_interface(ng_interface))
+      !ig_interface=reshape(ggdof_ic1(1,ibool_interface), (/ ng_interface /) )
+      !ig_interface=ig_interface-1
+      !rg_interface=1.0
+      !call VecSetValues(interface_gvec1,ng_interface,ig_interface,rg_interface,INSERT_VALUES,ierr);
+      !deallocate(ibool_interface,ig_interface,rg_interface)
+      do i_bool = 1,nibool
+        do i_ndof = 1,NNDOF
+          igdof = ggdof_cm1(i_ndof,ibool_interface(i_bool))-1
+          if (igdof >= 0) call VecSetValues(interface_gvec1,1,igdof,rval, &
+          INSERT_VALUES,ierr);
+        enddo
+      enddo
+      deallocate(ibool_interface)
+  enddo
+
+  ! transition infinite
+  if (ADD_TRINF) then
+  do i = 1,num_interfaces_trinfinite1
+      nibool=nibool_interfaces_trinfinite1(i)
+      allocate(ibool_interface(nibool))
+      ibool_interface=ibool_interfaces_trinfinite1(1:nibool,i)
+      !ng_interface=nibool*NNDOF
+      !allocate(ig_interface(ng_interface),rg_interface(ng_interface))
+      !ig_interface=reshape(ggdof_ic1(1,ibool_interface), (/ ng_interface /) )
+      !ig_interface=ig_interface-1
+      !rg_interface=1.0
+      !call VecSetValues(interface_gvec1,ng_interface,ig_interface,rg_interface,INSERT_VALUES,ierr);
+      !deallocate(ibool_interface,ig_interface,rg_interface)
+      do i_bool = 1,nibool
+        do i_ndof = 1,NNDOF
+          igdof = ggdof_trinf1(i_ndof,ibool_interface(i_bool))-1
+          if (igdof >= 0) call VecSetValues(interface_gvec1,1,igdof,rval, &
+          INSERT_VALUES,ierr);
+        enddo
+      enddo
+      deallocate(ibool_interface)
+  enddo
+  endif
+
+  ! infinite
+  do i = 1,num_interfaces_infinite1
+      nibool=nibool_interfaces_infinite1(i)
+      allocate(ibool_interface(nibool))
+      ibool_interface=ibool_interfaces_infinite1(1:nibool,i)
+      !ng_interface=nibool*NNDOF
+      !allocate(ig_interface(ng_interface),rg_interface(ng_interface))
+      !ig_interface=reshape(ggdof_ic1(1,ibool_interface), (/ ng_interface /) )
+      !ig_interface=ig_interface-1
+      !rg_interface=1.0
+      !call VecSetValues(interface_gvec1,ng_interface,ig_interface,rg_interface,INSERT_VALUES,ierr);
+      !deallocate(ibool_interface,ig_interface,rg_interface)
+      do i_bool = 1,nibool
+        do i_ndof = 1,NNDOF
+          igdof = ggdof_inf1(i_ndof,ibool_interface(i_bool))-1
+          if (igdof >= 0) call VecSetValues(interface_gvec1,1,igdof,rval, &
+          INSERT_VALUES,ierr);
+        enddo
+      enddo
+      deallocate(ibool_interface)
+  enddo
+
+  call VecAssemblyBegin(interface_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecAssemblyEnd(interface_gvec1,ierr)
+  CHKERRA(ierr)
+
+  !call sync_all
+  !! stop all the MPI processes, and exit
+  !call MPI_FINALIZE(ierr)
+
+  ! copy solution to local array
+  allocate(isg_interface(neq1),rg_interface(neq1))
+  call scatter_globalvec1(interface_gvec1,rg_interface)
+  isg_interface=int(rg_interface)
+
+  ! estimate correction for the number of nonzero entries in the diagonal and
+  ! nondiagonal portion
+  ! self interface
+  !rval=-1.0
+  !call VecSet(nself_gvec1,rval,ierr) ! subtract self
+  rval = 1.0
+  do i = 1,neq1
+    if (isg_interface(i) == 1) then
+      call VecSetValues(nself_gvec1,1,l2gdof1(i),rval,ADD_VALUES,ierr);
+    endif
+  enddo
+  call VecAssemblyBegin(nself_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecAssemblyEnd(nself_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecGetLocalSize(nself_gvec1,n,ierr)
+
+  allocate(rnself_lgarray1(neq1))
+  call scatter_globalvec1(nself_gvec1, rnself_lgarray1)
+  call VecGetArrayF90(nself_gvec1,rnself_array1,ierr)
+  allocate(nself_array1(n))
+  nself_array1 = int(rnself_array1(1:n))
+  where(nself_array1 > 0)nself_array1=nself_array1-1 ! subtract self
+  call VecRestoreArrayF90(nself_gvec1,rnself_array1,ierr)
+  call VecDestroy(nself_gvec1,ierr)
+
+  if (myrank == 0) write(*,*) 'maximum value of nself:',maxval(nself_array1)
+  !call sync_all
+  !! stop all the MPI processes, and exit
+  !call MPI_FINALIZE(ierr)
+  !stop
+  !! count nonzero entries in the diagonal and nondiagonal portion
+  !zero=0.
+  !call VecSet(nzeror_dvec1,zero,ierr)
+  !call VecSet(nzeror_ovec1,zero,ierr)
+
+  !outf_name='isg_interface'//trim(adjustl(char_myrank))
+  !open(1,file=outf_name,action='write',status='replace')
+  !write(1,'(i4)')isg_interface
+  !close(1)
+
+  !if (myrank==0) open(11,file='test_interface',action='write',status='replace')
+  ! factor for maximum number of interfaces for each nondiagonal entry of the
+  ! stiffness matrix
+  ! the factor below is valid ONLY for rectagular partitioning of the global model
+  max_ni = 8.0
+  fac_ni = 0.0
+
+  ! first element
+  igr0=kgrow_sparse1(1)-1
+  igc0=kgcol_sparse1(1)-1
+  ir0=krow_sparse1(1)
+  ic0=kcol_sparse1(1)
+  nd = 0; noffd = 0
+  rnid = 0.; rnioffd = 0.
+  if (iproc_array1(ir0) == iproc_array1(ic0)) then
+    nd = 1;
+    if (igr0 /= igc0 .and. rnself_lgarray1(ir0) > 1.0 .and. rnself_lgarray1(ic0) > 1.0) then
+      fac_ni = min(max_ni,min(rnself_lgarray1(ir0),rnself_lgarray1(ic0)))
+      rnid = 1.0/fac_ni
+    endif
+  else
+    noffd = 1
+    if (igr0 /= igc0 .and. rnself_lgarray1(ir0) > 1.0 .and. rnself_lgarray1(ic0) > 1.0) then
+      fac_ni = min(max_ni,min(rnself_lgarray1(ir0),rnself_lgarray1(ic0)))
+      rnioffd = 1.0/fac_ni
+    endif
+  endif
+  do i = 2,nsparse1
+    igr=kgrow_sparse1(i)-1
+    igc=kgcol_sparse1(i)-1
+    ir=krow_sparse1(i)
+    ic=kcol_sparse1(i)
+    if (l2gdof1(ir) /= igr.or.l2gdof1(ic) /= igc) then
+      write(*,*) 'strange:',l2gdof1(ir),igr,l2gdof1(ic),igc
+      stop
+    endif
+    if (igr /= igr0) then
+      ! new row starts
+      ! set values computed so far
+      rnd=real(nd)
+      rnoffd=real(noffd)
+      call VecSetValues(nzeror_dvec1,1,igr0,rnd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+      call VecSetValues(nzeror_ovec1,1,igr0,rnoffd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+
+      call VecSetValues(ninterface_dvec1,1,igr0,rnid,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+      call VecSetValues(ninterface_ovec1,1,igr0,rnioffd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+
+      ! reset
+      nd = 0; noffd = 0
+      rnid = 0.; rnioffd = 0.
+      igr0=igr !kgrow_sparse1(i)-1
+      igc0=igc !kgcol_sparse1(i)-1
+      ir0=ir !krow_sparse1(i)
+      ic0=ic !kcol_sparse1(i)
+
+      if (iproc_array1(ir0) == iproc_array1(ic0)) then
+        nd = 1;
+        if (igr0 /= igc0 .and. rnself_lgarray1(ir0) > 0.0 .and. rnself_lgarray1(ic0) > 0.0) then
+          fac_ni = min(max_ni,min(rnself_lgarray1(ir0),rnself_lgarray1(ic0)))
+          rnid = 1.0/fac_ni
+        endif
+      else
+        noffd = 1
+        if (igr0 /= igc0 .and. rnself_lgarray1(ir0) > 0.0 .and. rnself_lgarray1(ic0) > 0.0) then
+          fac_ni = min(max_ni,min(rnself_lgarray1(ir0),rnself_lgarray1(ic0)))
+          rnioffd = 1.0/fac_ni
+        endif
+      endif
+    else
+      !if (myrank==0) write(11,*) ir,ic,isg_interface(ir),isg_interface(ic)
+      ! count
+      if (iproc_array1(ir) == iproc_array1(ic)) then
+        nd = nd+1;
+        if (igr /= igc .and. rnself_lgarray1(ir) > 0.0 .and. rnself_lgarray1(ic) > 0.0) then
+          fac_ni = min(max_ni,min(rnself_lgarray1(ir),rnself_lgarray1(ic)))
+          rnid=rnid+(1.0/fac_ni)
+        endif
+      else
+        noffd = noffd+1
+        if (igr /= igc .and. rnself_lgarray1(ir) > 0.0 .and. rnself_lgarray1(ic) > 0.0) then
+          fac_ni = min(max_ni,min(rnself_lgarray1(ir),rnself_lgarray1(ic)))
+          rnioffd=rnioffd+(1.0/fac_ni)
+        endif
+      endif
+    endif
+    if (i == nsparse1) then
+      ! for last
+      rnd=real(nd)
+      rnoffd=real(noffd)
+      call VecSetValues(nzeror_dvec1,1,igr0,rnd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+      call VecSetValues(nzeror_ovec1,1,igr0,rnoffd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+
+      call VecSetValues(ninterface_dvec1,1,igr0,rnid,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+      call VecSetValues(ninterface_ovec1,1,igr0,rnioffd,ADD_VALUES,ierr)
+      CHKERRA(ierr)
+    endif
+  enddo
+  deallocate(krow_sparse1,kcol_sparse1)
+  ! Assemble vectors globally
+  call VecAssemblyBegin(nzeror_dvec1,ierr)
+  call VecAssemblyEnd(nzeror_dvec1,ierr)
+  call VecAssemblyBegin(nzeror_ovec1,ierr)
+  call VecAssemblyEnd(nzeror_ovec1,ierr)
+
+  call VecAssemblyBegin(ninterface_dvec1,ierr)
+  call VecAssemblyEnd(ninterface_dvec1,ierr)
+  call VecAssemblyBegin(ninterface_ovec1,ierr)
+  call VecAssemblyEnd(ninterface_ovec1,ierr)
+
+  ! apply correction for repeatition due to interfaces
+  call VecGetLocalSize(nzeror_dvec1,n,ierr)
+  call VecGetArrayF90(nzeror_dvec1,nzeror_darray1,ierr)
+  allocate(nnzero_diag1(n))
+  nnzero_diag1 = int(nzeror_darray1(1:n))
+  nnzero_diag1 = nnzero_diag1-nself_array1
+
+  if (myrank == 0) write(*,*) n,minval(nzeror_darray1),maxval(nzeror_darray1), &
+  minval(nnzero_diag1),maxval(nnzero_diag1)
+  call sync_all
+
+  call VecRestoreArrayF90(nzeror_dvec1,nzeror_darray1,ierr)
+  call VecDestroy(nzeror_dvec1,ierr)
+
+  call VecGetArrayF90(nzeror_ovec1,nzeror_oarray1,ierr)
+  allocate(nnzero_offdiag1(n))
+  nnzero_offdiag1 = int(nzeror_oarray1(1:n))
+
+  call VecRestoreArrayF90(nzeror_ovec1,nzeror_oarray1,ierr)
+  call VecDestroy(nzeror_ovec1,ierr)
+
+  ! correction
+  ! I do not know why but there are some DOFs where the correction exceeds by 4 or
+  ! 8 therefore to be safe we need to subtract this from all
+  call VecGetArrayF90(ninterface_dvec1,rninterface_darray1,ierr)
+  !where(rninterface_darray1>0.0 .and. rninterface_darray1 < 1.0)rninterface_darray1=1.0
+  allocate(ninterface_darray1(n))
+  ninterface_darray1 = int(rninterface_darray1(1:n))
+  call VecRestoreArrayF90(ninterface_dvec1,rninterface_darray1,ierr)
+  call VecDestroy(ninterface_dvec1,ierr)
+  where(ninterface_darray1 > 0)ninterface_darray1=ninterface_darray1-4
+  where(ninterface_darray1 < 0)ninterface_darray1=0
+
+
+  call VecGetArrayF90(ninterface_ovec1,rninterface_oarray1,ierr)
+  !where(rninterface_oarray1>0.0 .and. rninterface_oarray1 < 1.0)rninterface_oarray1=1.0
+  allocate(ninterface_oarray1(n))
+  ninterface_oarray1 = int(rninterface_oarray1(1:n))
+  call VecRestoreArrayF90(ninterface_ovec1,rninterface_oarray1,ierr)
+  call VecDestroy(ninterface_ovec1,ierr)
+  where(ninterface_oarray1 > 0)ninterface_oarray1=ninterface_oarray1-8
+  where(ninterface_oarray1 < 0)ninterface_oarray1=0
+
+
+  nnzero_diag1 = nnzero_diag1-ninterface_darray1
+  nnzero_offdiag1 = nnzero_offdiag1-ninterface_oarray1
+
+  do i = 1,nsparse1
+    rval = 1.
+    igdof=kgrow_sparse1(i)-1 ! Fortran index
+    call VecSetValues(nzeror_gvec1,1,igdof,rval,ADD_VALUES,ierr);
+    CHKERRA(ierr)
+  enddo
+  call VecAssemblyBegin(nzeror_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecAssemblyEnd(nzeror_gvec1,ierr)
+  CHKERRA(ierr)
+  call VecGetLocalSize(nzeror_gvec1,n,ierr)
+  CHKERRA(ierr)
+  if (myrank == 0) write(*,*) 'size of vector:',ng,n,minval(kgrow_sparse1),ig0
+  call VecGetArrayF90(nzeror_gvec1,nzeror_array1,ierr)
+  CHKERRA(ierr)
+  allocate(inzeror_array1(n))
+  inzeror_array1 = int(nzeror_array1(1:n))
+  call VecRestoreArrayF90(nzeror_gvec1,nzeror_array1,ierr)
+  CHKERRA(ierr)
+  call VecDestroy(nzeror_gvec1,ierr)
+  CHKERRA(ierr)
+
+
+  ! Create the stiffness matrix (same for forward/adjoint simulations)
+  call MatCreate(PETSC_COMM_WORLD,Amat1,ierr)
+  call MatSetType(Amat1,MATMPIAIJ,ierr)
+  CHKERRA(ierr)
+  call MatSetSizes(Amat1,PETSC_DECIDE,PETSC_DECIDE,ngdof1,ngdof1,ierr)
+  CHKERRA(ierr)
+
+  call MatMPIAIJSetPreallocation(Amat1,nzeros_max,inzeror_array1,nzeros_max, &
+  inzeror_array1,ierr)
+  CHKERRA(ierr)
+
+  call MatSetFromOptions(Amat1,ierr)
+  CHKERRA(ierr)
+
+  call MatGetOwnershipRange(Amat1,istart,iend,ierr)
+  CHKERRA(ierr)
+  call sync_all
+  if (istart /= ig0 .or. iend-1 /= ig1) then
+    write(*,*) 'ERROR: ownership range mismatch!'
+    write(*,*) 'ownership range:',myrank,istart,ig0,iend-1,ig1,nzeros_row(1)
+    stop
+  endif
+  deallocate(nzeros)
+
+  ! Create forward solver
+  !solver_type1=CG
+  !call create_linear_solver(solver_type1, ksp1, Amat1, pc1, Fmat1)
+
+  !call KSPSetTolerances(ksp1,KSP_RTOL1,KSP_ATOL1,KSP_DTOL1,KSP_MAXITER1,ierr)
+  !CHKERRA(ierr)
+
+  !  Set runtime options, e.g.,
+  !    -ksp_type < type> -pc_type < type> -ksp_monitor -ksp_KSP_RTOL < KSP_RTOL>
+  !  These options will override those specified above as long as
+  !  KSPSetFromOptions() is called _after_ any other customization
+  !  routines.
+  !call KSPSetFromOptions(ksp1,ierr)
+
+
+  ! Create adjoint solver:
+  !if (SIMULATION_TYPE==3) then
+  !  call create_linear_solver(solver_type1, b_ksp1, Amat1, pc1, Fmat1)
+  !  call KSPSetTolerances(b_ksp1,KSP_RTOL1,KSP_ATOL1,KSP_DTOL1,KSP_MAXITER1,ierr)
+  !  CHKERRA(ierr)
+  !  call KSPSetFromOptions(b_ksp1,ierr)
+  !  if (myrank==0) then
+  !    write(*,*) ' Created adjoint linear KSP solver...'
+  !  endif
+  !endif
+
+
+
+
+  !-------------------------------------------------------------------------------
+  ! Create the linear solver and set various options
+  !-------------------------------------------------------------------------------
+  ! define solver type
+  ! COMMAND: define from the command
+  ! SUPERLU: SuperLU solver
+  ! MUMPS: MUMPS solver
+  !solver_type1=CG
+  ! Create linear solver context
+
+  call KSPCreate(PETSC_COMM_WORLD,ksp1,ierr)
+  call KSPSetOperators(ksp1,Amat1,Amat1,ierr) ! version >= 3.5
+
+  if (SIMULATION_TYPE == 3) then
+    call KSPSetInitialGuessNonzero(ksp1,PETSC_FALSE,ierr)
+  else
+    call KSPSetInitialGuessNonzero(ksp1,PETSC_TRUE,ierr)
+  endif
+
+  CHKERRA(ierr)
+  call KSPSetDiagonalScale(ksp1,PETSC_TRUE,ierr)
+  CHKERRA(ierr)
+  call KSPSetReusePreconditioner(ksp1,PETSC_TRUE,ierr)
+  call KSPSetType(ksp1,KSPCG,ierr);
+  CHKERRA(ierr)
+  call KSPGetPC(ksp1,pc1,ierr)
+  CHKERRA(ierr)
+  call PCFactorSetShiftType(pc1,MAT_SHIFT_POSITIVE_DEFINITE,ierr)
+  CHKERRA(ierr)
+  call KSPSetTolerances(ksp1,KSP_RTOL1,KSP_ATOL1,KSP_DTOL1,KSP_MAXITER1,ierr)
+  CHKERRA(ierr)
+  call KSPSetFromOptions(ksp1,ierr)
+
+  ! BACKWARD SOLVER
+  call KSPCreate(PETSC_COMM_WORLD,b_ksp1,ierr)
+  call KSPSetOperators(b_ksp1,Amat1,Amat1,ierr) ! version >= 3.5
+  call KSPSetInitialGuessNonzero(b_ksp1,PETSC_FALSE,ierr)
+  CHKERRA(ierr)
+  call KSPSetDiagonalScale(b_ksp1,PETSC_TRUE,ierr)
+  CHKERRA(ierr)
+  call KSPSetReusePreconditioner(b_ksp1,PETSC_TRUE,ierr)
+  call KSPSetType(b_ksp1,KSPCG,ierr);
+  CHKERRA(ierr)
+  call KSPGetPC(b_ksp1,b_pc1,ierr)
+  CHKERRA(ierr)
+  call PCFactorSetShiftType(b_pc1,MAT_SHIFT_POSITIVE_DEFINITE,ierr)
+  CHKERRA(ierr)
+  call KSPSetTolerances(b_ksp1,KSP_RTOL1,KSP_ATOL1,KSP_DTOL1,KSP_MAXITER1,ierr)
+  CHKERRA(ierr)
+  call KSPSetFromOptions(b_ksp1,ierr)
+
+  if (myrank == 0) then
+    write(*,*) ' ---------- Finished PETSC initialisation ---------- '
+  endif
+
+  end subroutine petsc_initialize1
+
+!
+!===============================================================================
+!
+
+  subroutine create_linear_solver(stype, l_ksp, l_Amat, l_pc, l_fmat)
+
+  ! Create the linear solver and set various options
+  ! stype: Solver type - options available are
+  !   COMMAND   define from the command
+  !   SUPERLU   SuperLU solver
+  !   MUMPS     MUMPS solver
+  ! l_ksp: the local KSP (ksp1 or b_ksp1 etc)
+  ! l_Amat: local A matrix - I think always Amat1
+  ! l_pc:   local preconditioner e.g pc1
+  use specfem_par, only: myrank, SIMULATION_TYPE
+  implicit none
+
+
+
+  PetscInt     stype
+  KSP          l_ksp
+  Mat          l_Amat, l_fmat
+  PC           l_pc
+
+
+  ! Create linear solver context
+  call KSPCreate(PETSC_COMM_WORLD,l_ksp,ierr)
+  ! Set operators. Here the matrix that defines the linear system
+  ! also serves as the preconditioning matrix.
+  !call KSPSetOperators(ksp1,Amat1,Amat1,SAME_PRECONDITIONER,ierr) ! version < 3.5
+  call KSPSetOperators(l_ksp,l_Amat,l_Amat,ierr) ! version >= 3.5
+
+  call KSPSetInitialGuessNonzero(l_ksp,PETSC_TRUE,ierr)
+  CHKERRA(ierr)
+  !since the euqutions are nondimensionalized, the scaling is unnecessary?
+  call KSPSetDiagonalScale(l_ksp,PETSC_TRUE,ierr)
+  CHKERRA(ierr)
+  call KSPSetReusePreconditioner(l_ksp,PETSC_TRUE,ierr)
+
+  if (stype == COMMAND) then
+    if (myrank == 0) write(*,*) 'Solver type: provided via command'
+  else if (stype == CG) then
+    ! CONJUGATE GRADIENT
+    if (myrank == 0) write(*,*) 'Solver type: CG'
+    call KSPSetType(l_ksp,KSPCG,ierr);
+    CHKERRA(ierr)
+    ! Fetch preconditioner
+    call KSPGetPC(l_ksp,l_pc,ierr)
+    CHKERRA(ierr)
+    call PCFactorSetShiftType(l_pc,MAT_SHIFT_POSITIVE_DEFINITE,ierr)
+    CHKERRA(ierr)
+  else if (stype == SUPERLU) then
+    ! SUPER LU
+    if (myrank == 0) write(*,*) 'Solver type: SUPERLU'
+    if (SIMULATION_TYPE == 3) then
+      write(*,*) ' ERROR: SUPERLU not implemented for adjoint sims yet.'
+      stop
+    endif
+    flg_ilu    = PETSC_FALSE;
+    flg_lu     = PETSC_FALSE;
+    ! version < 3.8.0
+      ! call
+      ! PetscOptionsGetBool(PETSC_NULL_CHARACTER,"-use_superlu_lu",flg_lu,flg,ierr);
+        call PetscOptionsGetBool(PETSC_NULL_OPTIONS,PETSC_NULL_CHARACTER, &
+          "-use_superlu_lu",flg_lu,flg,ierr);
+    CHKERRA(ierr)
+    !PetscOptionsGetBool(PETSC_NULL_CHARACTER,"-use_superlu_ilu",flg_ilu,flg,ierr);
+    if (flg_lu .or. flg_ilu) then
+      call KSPSetType(l_ksp,KSPPREONLY,ierr);
+      CHKERRA(ierr)
+      call KSPGetPC(l_ksp,l_pc,ierr);
+      CHKERRA(ierr)
+      if (flg_lu) then
+        call PCSetType(l_pc,PCLU,ierr);
+        CHKERRA(ierr)
+      else if (flg_ilu) then
+        call PCSetType(l_pc,PCILU,ierr);
+        CHKERRA(ierr)
+      endif
+      call PCFactorSetShiftType(l_pc,MAT_SHIFT_POSITIVE_DEFINITE,ierr)
+      CHKERRA(ierr)
+      ! version < 3.9
+      !call PCFactorSetMatSolverPackage(l_pc,MATSOLVERSUPERLU,ierr);
+      call PCFactorSetMatSolverType(l_pc,MATSOLVERSUPERLU,ierr);
+      CHKERRA(ierr)
+      ! version < 3.9
+      !call PCFactorSetUpMatSolverPackage(l_pc,ierr); ! call MatGetFactor() to create F
+      call PCFactorSetUpMatSolverType(l_pc,ierr); ! call MatGetFactor() to create F
+      CHKERRA(ierr)
+      call PCFactorGetMatrix(l_pc,l_fmat,ierr);
+      CHKERRA(ierr)
+      !call MatSuperluSetILUDropTol(l_fmat,1.e-8,ierr);
+      !CHKERRA(ierr)
+    endif
+  else if (stype == MUMPS) then
+    if (myrank == 0) write(*,*) 'Solver type: MUMPS'
+    write(*,*) 'ERROR - WE commented out MUMPS stuff due to syntax error'
+    stop
+    flg_lu    = PETSC_FALSE;
+    flg_ch    = PETSC_FALSE;
+    ! version < 3.8.0
+    ! call PetscOptionsGetBool(PETSC_NULL_CHARACTER,"-use_mumps_ch",flg_ch,flg,ierr);
+     call PetscOptionsGetBool(PETSC_NULL_OPTIONS,PETSC_NULL_CHARACTER, &
+          "-use_mumps_ch",flg_ch,flg,ierr);
+    if (flg_lu .or. flg_ch) then
+      call KSPSetType(l_ksp,KSPPREONLY,ierr);
+      call KSPGetPC(l_ksp,l_pc,ierr);
+      if (flg_lu) then
+        call PCSetType(l_pc,PCLU,ierr);
+      else if (flg_ch) then
+        call MatSetOption(l_Amat,MAT_SPD,PETSC_TRUE,ierr); ! set MUMPS id%SYM=1
+        call PCSetType(l_pc,PCCHOLESKY,ierr);
+      endif
+      call PCFactorSetShiftType(l_pc,MAT_SHIFT_POSITIVE_DEFINITE,ierr)
+      CHKERRA(ierr)
+      ! version < 3.9
+      !call PCFactorSetMatSolverPackage(l_pc,MATSOLVERMUMPS,ierr);
+      !call PCFactorSetUpMatSolverPackage(l_pc,ierr); ! call MatGetFactor() to create F
+      !call PCFactorSetMatSolverType(l_pc,MATSOLVERMUMPS,ierr);
+      !call PCFactorSetUpMatSolverType(l_pc,ierr); ! call MatGetFactor() to create F
+      !call PCFactorGetMatrix(l_pc,l_fmat,ierr);
+      icntl = 7; ival = 2;
+      !call MatMumpsSetIcntl(l_fmat,icntl,ival,ierr);
+      icntl = 1; val = 0.0;
+      !call MatMumpsSetCntl(l_fmat,icntl,val,ierr);
+    endif
+  endif
+
+  end subroutine create_linear_solver
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_set_matrix1()
+
+  use math_library_mpi, only: maxscal,minscal
+  use specfem_par, only: IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
+  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+  use specfem_par, only: NEDOF
+  use specfem_par, only: NGLLCUBE_INF,NEDOF1
+  use specfem_par_innercore, only: ggdof_ic1,storekmat_inner_core1, &
+  idoubling_inner_core,inode_elmt_ic1
+  use specfem_par_outercore, only: ggdof_oc1,storekmat_outer_core1,inode_elmt_oc1
+  use specfem_par_crustmantle, only: ggdof_cm1,storekmat_crust_mantle1, &
+  inode_elmt_cm1
+  use specfem_par_trinfinite, only: ggdof_trinf1,storekmat_trinfinite1, &
+  inode_elmt_trinf1
+  use specfem_par_infinite, only: ggdof_inf1,storekmat_infinite1,inode_elmt_inf1
+
+  implicit none
+  integer :: i,i_elmt,j,ncount
+  integer :: ggdof_elmt(NEDOF1),idof(NEDOF1),igdof(NEDOF1)
+
+  PetscInt irow,istart,iend,ndiag,noffdiag
+  integer :: ncols,ncols_val
+  integer,allocatable :: cols(:)
+  real(kind=8),allocatable :: vals(:)
+
+  character(len=10) :: char_myrank
+  character(len=60) :: outf_name
+  Vec Adiag1,lAdiag1
+  PetscScalar,pointer :: arrayAdiag1(:)
+  PetscReal :: maxdiag1,mindiag1
+  PetscInt n
+
+  call MatZeroEntries(Amat1,ierr)
+  CHKERRA(ierr)
+
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    ggdof_elmt = reshape(ggdof_ic1(:,inode_elmt_ic1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF1
+      do j = 1,NEDOF1
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_inner_core1(i,j,i_elmt) /= 0.0_kreal) then
+        !ncount=ncount+1
+        !idof(ncount)=i
+        !igdof(ncount)=ggdof_elmt(i)
+        call MatSetValues(Amat1,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_inner_core1(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+    !call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+    !storekmat_inner_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+    !CHKERRA(ierr)
+  enddo
+  !! inner core
+  !do i_elmt=1,NSPEC_INNER_CORE
+  !  if (idoubling_inner_core(i_elmt)==IFLAG_IN_FICTITIOUS_CUBE)cycle
+  !  ggdof_elmt=reshape(ggdof_ic(:,inode_elmt_ic(:,i_elmt)),(/NEDOF/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_inner_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_inner_core1)
+  !if (myrank==0) write(*,*) 'IC kmat done1!'; call sync_all
+  !if (myrank==0) then
+  !  open(1111,file='debug.log',action='write')
+  !  write(1111,*)ggdof_oc1
+  !  !do i_elmt=1,NSPEC_OUTER_CORE
+  !  !  ggdof_elmt=reshape(ggdof_oc1(:,inode_elmt_oc1(:,i_elmt)),(/NEDOF1/))
+  !  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  !  ncount=0; idof=-1; igdof=-1
+  !  !  do i=1,NEDOF1
+  !  !    do j=1,NEDOF1
+  !  !    if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+  !  !    storekmat_outer_core1(i,j,i_elmt) /= 0.0_kreal) then
+  !  !      write(1111,*)ggdof_elmt(i),ggdof_elmt(j)
+  !  !    endif
+  !  !    enddo
+  !  !  enddo
+  !  !enddo
+  !  close(1111)
+  !endif
+  !call sync_all
+
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    ggdof_elmt = reshape(ggdof_oc1(:,inode_elmt_oc1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF1
+      do j = 1,NEDOF1
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_outer_core1(i,j,i_elmt) /= 0.0_kreal) then
+        call MatSetValues(Amat1,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_outer_core1(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+  enddo
+  !! outer core
+  !do i_elmt=1,NSPEC_OUTER_CORE
+  !  ggdof_elmt=reshape(ggdof_oc(:,inode_elmt_oc(:,i_elmt)),(/NEDOF/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_outer_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_outer_core1)
+  !if (myrank==0) write(*,*) 'OC kmat done1!'; call sync_all
+  ! crust mantle
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    ggdof_elmt = reshape(ggdof_cm1(:,inode_elmt_cm1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF1
+      do j = 1,NEDOF1
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_crust_mantle1(i,j,i_elmt) /= 0.0_kreal) then
+        call MatSetValues(Amat1,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_crust_mantle1(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+  enddo
+  !do i_elmt=1,NSPEC_CRUST_MANTLE
+  !  ggdof_elmt=reshape(ggdof_cm1(:,inode_elmt_cm1(:,i_elmt)),(/NEDOF1/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF1
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  !if (myrank==0) write(*,*) 'hi homnath3in!',i_elmt,minval(igdof(1:ncount)), &
+  !  !maxval(igdof(1:ncount)) !,storekmat_crust_mantle(idof(1:ncount),idof(1:ncount),i_elmt)
+  !  call MatSetValues(Amat1,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_crust_mantle1(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_crust_mantle1)
+  !if (myrank==0) write(*,*) 'CM kmat done1!'; call sync_all
+  ! trinfinite
+  do i_elmt = 1,NSPEC_TRINFINITE
+    ggdof_elmt = reshape(ggdof_trinf1(:,inode_elmt_trinf1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF1
+      do j = 1,NEDOF1
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_trinfinite1(i,j,i_elmt) /= 0.0_kreal) then
+        call MatSetValues(Amat1,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_trinfinite1(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+  enddo
+  !do i_elmt=1,NSPEC_TRINFINITE
+  !  ggdof_elmt=reshape(ggdof_trinf1(:,inode_elmt_trinf1(:,i_elmt)),(/NEDOF1/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF1
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat1,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_trinfinite1(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_trinfinite1)
+  !if (myrank==0) write(*,*) 'TRINF kmat done1!'; call sync_all
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    ggdof_elmt = reshape(ggdof_inf1(:,inode_elmt_inf1(:,i_elmt)),(/NEDOF1/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF1
+      do j = 1,NEDOF1
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_infinite1(i,j,i_elmt) /= 0.0_kreal) then
+        call MatSetValues(Amat1,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_infinite1(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+        !if (myrank==0) write(*,*) 'hello in INF1:',i_elmt,'/',NSPEC_INFINITE,i,j,NGLLCUBE_INF,NEDOF1!ggdof_elmt(i),ggdof_elmt(j)
+      endif
+      enddo
+    enddo
+  !  do i=1,NEDOF1
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat1,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_infinite1(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  enddo
+  call sync_all
+  !if (myrank==0) write(*,*) 'INF kmat done1:0!'; call sync_all
+  deallocate(storekmat_infinite1)
+  !if (myrank==0) write(*,*) 'INF kmat done1!'; call sync_all
+
+  call MatAssemblyBegin(Amat1,MAT_FINAL_ASSEMBLY,ierr)
+  CHKERRA(ierr)
+  call MatAssemblyEnd(Amat1,MAT_FINAL_ASSEMBLY,ierr)
+  CHKERRA(ierr)
+  call MatSetOption(Amat1,MAT_SYMMETRIC,PETSC_TRUE,ierr);
+  CHKERRA(ierr)
+  !if (myrank==0) write(*,*) 'matrix setting & assembly complete11!'
+  call sync_all
+  call MatGetOwnershipRange(Amat1,istart,iend,ierr)
+  CHKERRA(ierr)
+
+  !! check diagonal of the matrix
+  !call MatCreateVecs(Amat1,Adiag1,PETSC_NULL_OBJECT,ierr)
+  !call MatGetDiagonal(Amat1,Adiag1,ierr)
+  !
+  !call VecCreateSeq(PETSC_COMM_SELF,neq1,lAdiag1,ierr)
+  !
+  !call VecScatterBegin(vscat1,Adiag1,lAdiag1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  !CHKERRA(ierr)
+  !call VecScatterEnd(vscat1,Adiag1,lAdiag1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  !CHKERRA(ierr)
+  !call VecGetSize(lAdiag1,n,ierr)
+  !call VecGetArrayF90(lAdiag1,arrayAdiag1,ierr)
+  !CHKERRA(ierr)
+  !maxdiag1=maxscal(maxval(arrayAdiag1))
+  !mindiag1=minscal(minval(arrayAdiag1))
+  !write(*,*) 'maxdiag1 in petsc:',mindiag1,maxdiag1
+  !call VecRestoreArrayF90(lAdiag1,arrayAdiag1,ierr)
+  !CHKERRA(ierr)
+
+
+  !allocate(cols(nzeros_max),vals(nzeros_max))
+  allocate(cols(nzeros_max))
+  !call MatGetRow(Amat1,istart,ncols,cols,vals,ierr);
+  write(char_myrank,'(i4)')myrank
+  outf_name='tmp/nonzeros'//trim(adjustl(char_myrank))
+  open(1,file=outf_name,action='write',status='replace')
+  do i = istart,iend-1
+    cols=-1
+    !call MatGetRow(Amat1,i,ncols,PETSC_NULL_INTEGER,PETSC_NULL_SCALAR,ierr);
+    call MatGetRow(Amat1,i,ncols,cols,PETSC_NULL_SCALAR,ierr);
+    CHKERRA(ierr)
+    !ncols_val=ncols
+    !if (myrank==0) then
+    !write(*,*) 'nzeros in 0th row:',myrank,istart,ncols
+    ndiag=count(cols >= ig0.and.cols <= ig1)
+    noffdiag = ncols-ndiag
+    write(1,*)ndiag,noffdiag,ncols
+    !endif
+    !call MatRestoreRow(Amat1,istart,ncols,cols,vals,ierr);
+    !call sync_all
+    call MatRestoreRow(Amat1,i,ncols,PETSC_NULL_INTEGER,PETSC_NULL_SCALAR,ierr);
+    CHKERRA(ierr)
+  enddo
+  close(1)
+  call sync_all()
+
+  end subroutine petsc_set_matrix1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_set_vector1(rload1)
+
+  use specfem_par, only: l2gdof1
+  use specfem_par, only: IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
+  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+  use specfem_par, only: NEDOF
+  use specfem_par, only: NGLLCUBE_INF,NEDOF1
+  use specfem_par_innercore, only: ggdof_ic1,storekmat_inner_core1, &
+  idoubling_inner_core,inode_elmt_ic1
+  use specfem_par_outercore, only: ggdof_oc1,storekmat_outer_core1, &
+  inode_elmt_oc1
+  use specfem_par_crustmantle, only: ggdof_cm1,storekmat_crust_mantle1, &
+  inode_elmt_cm1
+  use specfem_par_trinfinite, only: ggdof_trinf1,storekmat_trinfinite1, &
+  inode_elmt_trinf1
+  use specfem_par_infinite, only: ggdof_inf1,storekmat_infinite1,inode_elmt_inf1
+  implicit none
+  PetscScalar,intent(in) :: rload1(0:)
+  PetscScalar      zero
+
+  zero = 0.0
+  call VecSet(bvec1,zero,ierr)
+  call VecSetValues(bvec1,neq1,l2gdof1(1:),rload1(1:),ADD_VALUES,ierr);
+
+  ! assemble vector
+  call VecAssemblyBegin(bvec1,ierr)
+  call VecAssemblyEnd(bvec1,ierr)
+  !if (myrank==0) write(*,*) 'vector setting & assembly complete!'
+
+  end subroutine petsc_set_vector1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_set_backward_vector1(b_rload1)
+
+  use specfem_par, only: l2gdof1, neq1
+  implicit none
+  PetscScalar,intent(in) :: b_rload1(0:)
+  PetscScalar      zero
+
+  zero = 0.0
+  call VecSet(b_bvec1,zero,ierr)
+  call VecSetValues(b_bvec1,neq1,l2gdof1(1:),b_rload1(1:),ADD_VALUES,ierr);
+
+  ! assemble vector
+  call VecAssemblyBegin(b_bvec1,ierr)
+  call VecAssemblyEnd(b_bvec1,ierr)
+
+  end subroutine petsc_set_backward_vector1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_solve1(sdata1,iter,ireason)
+
+  implicit none
+  PetscInt iter
+  PetscScalar sdata1(:)
+
+  PetscInt ireason
+
+
+  call KSPSolve(ksp1,bvec1,xvec1,ierr)
+  call KSPGetConvergedReason(ksp1,ireason,ierr)
+  call KSPGetIterationNumber(ksp1,iter,ierr)
+
+  call scatter_globalvec1(xvec1, sdata1)
+
+  end subroutine petsc_solve1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_backward_solve1(b_sdata1,b_iter,b_ireason)
+
+  implicit none
+  PetscInt b_iter
+  PetscScalar b_sdata1(:)
+  PetscInt b_ireason
+
+  call KSPSolve(b_ksp1,b_bvec1,b_xvec1,ierr)
+  call KSPGetConvergedReason(b_ksp1,b_ireason,ierr)
+  call KSPGetIterationNumber(b_ksp1,b_iter,ierr)
+
+  call scatter_globalvec1_backward(b_xvec1, b_sdata1)
+
+  end subroutine petsc_backward_solve1
+
+!
+!===============================================================================
+!
+
+!  subroutine scatter_globalvec1(global_vec, larray, l_vec1, l_vscat1)
+!  ! l_vec1 is local_vec1 or b_local_vec1 for forward/adjoint solver
+!  ! l_vscat1 is vscat1 or b_vscat1
+!  implicit none
+!  ! I/O variables
+!  VecScatter l_vscat1
+!  Vec l_vec1
+!  Vec global_vec
+!  PetscScalar larray(:)
+!  ! Local variables
+!  PetscInt n
+!  PetscScalar,pointer :: array_data(:)
+!
+!  call VecScatterBegin(l_vscat1,global_vec,l_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+!  CHKERRA(ierr)
+!  call VecScatterEnd(l_vscat1,global_vec,l_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+!  CHKERRA(ierr)
+!  call VecGetSize(l_vec1,n,ierr)
+!  call VecGetArrayF90(l_vec1,array_data,ierr)
+!  CHKERRA(ierr)
+!  larray(1:n)=array_data(1:n)
+!  call VecRestoreArrayF90(l_vec1,array_data,ierr)
+!  CHKERRA(ierr)
+!  return
+!  end subroutine scatter_globalvec1
+
+!
+!===============================================================================
+!
+
+  subroutine scatter_globalvec1(global_vec,larray)
+
+  implicit none
+
+  Vec global_vec
+  PetscScalar larray(:)
+  PetscInt n
+
+  PetscScalar,pointer :: array_data(:)
+
+  call VecScatterBegin(vscat1,global_vec,local_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  CHKERRA(ierr)
+  call VecScatterEnd(vscat1,global_vec,local_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  CHKERRA(ierr)
+  call VecGetSize(local_vec1,n,ierr)
+  call VecGetArrayF90(local_vec1,array_data,ierr)
+  CHKERRA(ierr)
+  larray(1:n)=array_data(1:n)
+  call VecRestoreArrayF90(local_vec1,array_data,ierr)
+  CHKERRA(ierr)
+  return
+
+  end subroutine scatter_globalvec1
+
+!
+!===============================================================================
+!
+
+  subroutine scatter_globalvec1_backward(b_global_vec,b_larray)
+
+  implicit none
+
+  Vec b_global_vec
+  PetscScalar b_larray(:)
+  PetscInt b_n
+
+  PetscScalar,pointer :: b_array_data(:)
+
+  call VecScatterBegin(b_vscat1, b_global_vec, b_local_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  CHKERRA(ierr)
+  call VecScatterEnd(b_vscat1, b_global_vec, b_local_vec1,INSERT_VALUES,SCATTER_FORWARD,ierr)
+  CHKERRA(ierr)
+  call VecGetSize(b_local_vec1, b_n,ierr)
+  call VecGetArrayF90(b_local_vec1, b_array_data,ierr)
+  CHKERRA(ierr)
+  b_larray(1:b_n)=b_array_data(1:b_n)
+  call VecRestoreArrayF90(b_local_vec1, b_array_data,ierr)
+  CHKERRA(ierr)
+  return
+
+  end subroutine scatter_globalvec1_backward
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_zero_initialguess1()
+
+  implicit none
+  PetscScalar      zero
+
+  zero = 0.0
+  call VecSet(xvec1,zero,ierr)
+
+  ! assemble vector
+  call VecAssemblyBegin(xvec1,ierr)
+  call VecAssemblyEnd(xvec1,ierr)
+
+  end subroutine petsc_zero_initialguess1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_zero_backwards_initialguess1()
+
+  implicit none
+  PetscScalar      zero
+
+  zero = 0.0
+  call VecSet(b_xvec1,zero,ierr)
+
+  ! assemble vector
+  call VecAssemblyBegin(b_xvec1,ierr)
+  call VecAssemblyEnd(b_xvec1,ierr)
+
+  end subroutine petsc_zero_backwards_initialguess1
+
+!
+!===============================================================================
+!
+
+!  subroutine petsc_set_initialguess1(loc_pgrav1)
+!  implicit none
+!  PetscScalar,intent(in) :: loc_pgrav1(0:)
+!  PetscScalar      zero
+!
+!  zero=0.0
+!  call VecSet(bvec1,zero,ierr)
+!  call VecSetValues(bvec1,neq1,l2gdof1(1:),loc_pgrav1(1:),INSERT_VALUES,ierr);
+!
+!  ! assemble vector
+!  call VecAssemblyBegin(bvec1,ierr)
+!  call VecAssemblyEnd(bvec1,ierr)
+!
+!  end subroutine petsc_set_initialguess1
+
+!
+!===============================================================================
+!
+
+!  subroutine petsc_set_backward_initialguess1(loc_pgrav1)
+!    implicit none
+!    PetscScalar,intent(in) :: loc_pgrav1(0:)
+!    PetscScalar      zero
+!
+!    zero=0.0
+!    call VecSet(b_bvec1,zero,ierr)
+!    call VecSetValues(b_bvec1, neq1, l2gdof1(1:), loc_pgrav1(1:), INSERT_VALUES, ierr);
+!
+!    ! assemble vector
+!    call VecAssemblyBegin(b_bvec1,ierr)
+!    call VecAssemblyEnd(b_bvec1,ierr)
+!
+!  end subroutine petsc_set_backward_initialguess1
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_finalize1()
+
+  use specfem_par, only: SIMULATION_TYPE
+
+  implicit none
+
+  ! Free work space.  All PETSc objects should be destroyed when they
+  ! are no longer needed.
+
+  call VecDestroy(xvec1,ierr)
+  call VecDestroy(uvec1,ierr)
+  call VecDestroy(bvec1,ierr)
+  call MatDestroy(Amat1,ierr)
+  call KSPDestroy(ksp1,ierr)
+  call VecScatterDestroy(vscat1,ierr)
+  call PetscFinalize(ierr)
+
+  if (SIMULATION_TYPE == 3) then
+    call VecDestroy(b_xvec1,ierr)
+    call VecDestroy(b_bvec1,ierr)
+    call KSPDestroy(b_ksp1,ierr)
+  call VecScatterDestroy(b_vscat1,ierr)
+  endif
+
+  end subroutine petsc_finalize1
+
+
+!===============================================================================
+! Level-2 solver
+!===============================================================================
+
+  subroutine petsc_initialize()
+
+  implicit none
+  PetscInt :: istart,iend
+  PetscInt :: nzeros_max,nzeros_min
+  PetscInt, allocatable :: nzeros(:)
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  ! Beginning of program
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+  call PetscInitialize(PETSC_NULL_CHARACTER,ierr)
+  !call PetscOptionsGetInt(PETSC_NULL_CHARACTER,'-n',ngdof,flg,ierr)
+  !if (myrank==0) write(*,*) 'hi0!'
+  !call sync_all
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  ! Compute the matrix and right-hand-side vector that define
+  ! the linear system, Ax = b.
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+  ! Create matrix. When using MatCreate(), the matrix format can
+  ! be specified at runtime.
+
+  ! count number of nonzeros per row
+  allocate(nzeros(neq))
+  nzeros = 0
+  nzeros(krow_sparse)=nzeros(krow_sparse)+1
+  nzeros_max=maxvec(nzeros)
+  nzeros_min=minvec(nzeros)
+  !nzeros_max=2*nzeros_max
+  !nzeros=nzeros
+  if (myrank == 0) write(*,*) 'ngdof:',ngdof,' nzeros_max:',nzeros_max,' nzeros_min:',nzeros_min
+  call MatCreate(PETSC_COMM_WORLD,Amat,ierr)
+  call MatSetType(Amat,MATMPIAIJ,ierr)
+  CHKERRA(ierr)
+  call MatSetSizes(Amat,PETSC_DECIDE,PETSC_DECIDE,ngdof,ngdof,ierr)
+  CHKERRA(ierr)
+  ! preallocation
+  !call MatMPIAIJSetPreallocation(Amat,nzeros_max,PETSC_NULL_INTEGER,nzeros_max, &
+  !PETSC_NULL_INTEGER,ierr)
+  call MatMPIAIJSetPreallocation(Amat,nzeros_max,nzeros,nzeros_max, &
+  20*nzeros,ierr)
+  CHKERRA(ierr)
+  !call MatSeqAIJSetPreallocation(Amat,nzeros_max,nzeros,ierr)
+  !CHKERRA(ierr)
+
+  !call MatCreateMPIAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,ngdof,ngdof, &
+  !nzeros_max,PETSC_NULL_INTEGER,nzeros_max,PETSC_NULL_INTEGER,Amat,ierr)
+
+  !if (myrank==0) write(*,*) 'Matrix size:',size(Amat,1),size(Amat,2)
+  call MatSetFromOptions(Amat,ierr)
+  CHKERRA(ierr)
+  !call MatSetUp(Amat,ierr)
+  !if (myrank==0) write(*,*) 'ierr1:',ierr
+
+  call MatGetOwnershipRange(Amat,istart,iend,ierr)
+  CHKERRA(ierr)
+
+  !if (myrank==0) write(*,*) 'ierr2:',ierr,istart,iend,iend-istart,minval(nzeros),maxval(nzeros)
+  if (myrank == 0) write(*,*) 'actual global index range:',minval(kgrow_sparse),maxval(kgrow_sparse)
+  write(*,*) 'global index:',myrank,istart,iend,iend-istart
+  call sync_all
+  !if (myrank==0) write(*,*) 'ierr3:',(iend-istart)*nzeros
+  !if (myrank==0) write(*,*) 'ierr4:',iend,sum(nzeros),sum((iend-istart)*nzeros)
+  deallocate(nzeros)
+  !call sync_all
+  call sync_all
+  if (myrank == 0) write(*,*) 'matrix'
+  !call sync_all
+
+
+  ! Create vectors.  Note that we form 1 vector from scratch and
+  ! then duplicate as needed.
+
+  !call VecCreateMPI(PETSC_COMM_WORLD,PETSC_DECIDE,ngdof,xvec,ierr)
+  call VecCreate(PETSC_COMM_WORLD,xvec,ierr)
+  call VecSetSizes(xvec,PETSC_DECIDE,ngdof,ierr)
+  call VecSetFromOptions(xvec,ierr)
+  call VecDuplicate(xvec,bvec,ierr)
+  call VecDuplicate(xvec,uvec,ierr)
+  if (myrank == 0) write(*,*) 'vector'
+
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  ! Create the linear solver and set various options
+  ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+  ! Create linear solver context
+
+  call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)
+  ! Set operators. Here the matrix that defines the linear system
+  ! also serves as the preconditioning matrix.
+  !call KSPSetOperators(ksp,Amat,Amat,SAME_PRECONDITIONER,ierr) ! version < 3.5
+  call KSPSetOperators(ksp,Amat,Amat,ierr) ! version >= 3.5
+
+  call KSPSetType(ksp,KSPCG,ierr);
+  if (myrank == 0) write(*,*) 'ksp0'
+  call KSPGetPC(ksp,pc,ierr)
+  call PCSetType(pc,PCHYPRE,ierr)
+  if (myrank == 0) write(*,*) 'ksp1'
+  call KSPSetTolerances(ksp,KSP_RTOL,KSP_ATOL,KSP_DTOL,KSP_MAXITER,ierr)
+  CHKERRA(ierr)
+  if (myrank == 0) write(*,*) 'ksp2'
+
+  !  Set runtime options, e.g.,
+  !    -ksp_type < type> -pc_type < type> -ksp_monitor -ksp_KSP_RTOL < KSP_RTOL>
+  !  These options will override those specified above as long as
+  !  KSPSetFromOptions() is called _after_ any other customization
+  !  routines.
+  call KSPSetFromOptions(ksp,ierr)
+
+  end subroutine petsc_initialize
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_set_matrix()
+
+  use specfem_par, only: NEDOF,IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
+  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+  use specfem_par_innercore, only: ggdof_ic,storekmat_inner_core, &
+  idoubling_inner_core,inode_elmt_ic
+  use specfem_par_outercore, only: ggdof_oc,storekmat_outer_core,inode_elmt_oc
+  use specfem_par_crustmantle, only: ggdof_cm,storekmat_crust_mantle,inode_elmt_cm
+  use specfem_par_trinfinite, only: ggdof_trinf,storekmat_trinfinite,inode_elmt_trinf
+  use specfem_par_infinite, only: ggdof_inf,storekmat_infinite,inode_elmt_inf
+
+  implicit none
+  integer :: i,i_elmt,j,ncount
+  integer :: ggdof_elmt(NEDOF),idof(NEDOF),igdof(NEDOF)
+  ! Set and assemble matrix.
+  !  - Note that MatSetValues() uses 0-based row and column numbers
+  !  in Fortran as well as in C (as set here in the array "col").
+    ! stage 0: store all elements
+
+  call MatZeroEntries(Amat,ierr)
+  CHKERRA(ierr)
+
+  ! inner core
+  do i_elmt = 1,NSPEC_INNER_CORE
+    if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
+    ggdof_elmt = reshape(ggdof_ic(:,inode_elmt_ic(:,i_elmt)),(/NEDOF/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF
+      do j = 1,NEDOF
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_inner_core(i,j,i_elmt) /= 0.0_kreal) then
+        !ncount=ncount+1
+        !idof(ncount)=i
+        !igdof(ncount)=ggdof_elmt(i)
+        call MatSetValues(Amat,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_inner_core(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+    !call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+    !storekmat_inner_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+    !CHKERRA(ierr)
+  enddo
+  !! inner core
+  !do i_elmt=1,NSPEC_INNER_CORE
+  !  if (idoubling_inner_core(i_elmt)==IFLAG_IN_FICTITIOUS_CUBE)cycle
+  !  ggdof_elmt=reshape(ggdof_ic(:,inode_elmt_ic(:,i_elmt)),(/NEDOF/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_inner_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_inner_core)
+  !if (myrank==0) write(*,*) 'IC kmat done!'; call sync_all
+  ! outer core
+  do i_elmt = 1,NSPEC_OUTER_CORE
+    ggdof_elmt = reshape(ggdof_oc(:,inode_elmt_oc(:,i_elmt)),(/NEDOF/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF
+      do j = 1,NEDOF
+      if (ggdof_elmt(i) >= 0.and.ggdof_elmt(j) >= 0.and.                          &
+      storekmat_outer_core(i,j,i_elmt) /= 0.0_kreal) then
+        if (myrank == 0) write(*,*) 'hello in OC:',i_elmt,ggdof_elmt(i),ggdof_elmt(j)
+        call sync_all
+        call MatSetValues(Amat,1,ggdof_elmt(i),1,ggdof_elmt(j), &
+        storekmat_outer_core(i,j,i_elmt),ADD_VALUES,ierr)
+        CHKERRA(ierr)
+      endif
+      enddo
+    enddo
+  enddo
+  !! outer core
+  !do i_elmt=1,NSPEC_OUTER_CORE
+  !  ggdof_elmt=reshape(ggdof_oc(:,inode_elmt_oc(:,i_elmt)),(/NEDOF/))
+  !  ggdof_elmt=ggdof_elmt-1 ! petsc index starts from 0
+  !  ncount=0; idof=-1; igdof=-1
+  !  do i=1,NEDOF
+  !    if (ggdof_elmt(i) >= 0) then
+  !      ncount=ncount+1
+  !      idof(ncount)=i
+  !      igdof(ncount)=ggdof_elmt(i)
+  !    endif
+  !  enddo
+  !  call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+  !  storekmat_outer_core(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+  !  CHKERRA(ierr)
+  !enddo
+  deallocate(storekmat_outer_core)
+  !if (myrank==0) write(*,*) 'OC kmat done!'; call sync_all
+  ! crust mantle
+  do i_elmt = 1,NSPEC_CRUST_MANTLE
+    ggdof_elmt = reshape(ggdof_cm(:,inode_elmt_cm(:,i_elmt)),(/NEDOF/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF
+      if (ggdof_elmt(i) >= 0) then
+        ncount = ncount+1
+        idof(ncount)=i
+        igdof(ncount)=ggdof_elmt(i)
+      endif
+    enddo
+    !if (myrank==0) write(*,*) 'hi homnath3in!',i_elmt,minval(igdof(1:ncount)), &
+    !maxval(igdof(1:ncount)) !,storekmat_crust_mantle(idof(1:ncount),idof(1:ncount),i_elmt)
+    call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+    storekmat_crust_mantle(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+    CHKERRA(ierr)
+  enddo
+  deallocate(storekmat_crust_mantle)
+  !if (myrank==0) write(*,*) 'CM kmat done!'; call sync_all
+  ! trinfinite
+  do i_elmt = 1,NSPEC_TRINFINITE
+    ggdof_elmt = reshape(ggdof_trinf(:,inode_elmt_trinf(:,i_elmt)),(/NEDOF/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF
+      if (ggdof_elmt(i) >= 0) then
+        ncount = ncount+1
+        idof(ncount)=i
+        igdof(ncount)=ggdof_elmt(i)
+      endif
+    enddo
+    call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+    storekmat_trinfinite(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+    CHKERRA(ierr)
+  enddo
+  deallocate(storekmat_trinfinite)
+  !if (myrank==0) write(*,*) 'TRINF kmat done!'; call sync_all
+  ! infinite
+  do i_elmt = 1,NSPEC_INFINITE
+    ggdof_elmt = reshape(ggdof_inf(:,inode_elmt_inf(:,i_elmt)),(/NEDOF/))
+    ggdof_elmt = ggdof_elmt-1 ! petsc index starts from 0
+    ncount = 0; idof=-1; igdof=-1
+    do i = 1,NEDOF
+      if (ggdof_elmt(i) >= 0) then
+        ncount = ncount+1
+        idof(ncount)=i
+        igdof(ncount)=ggdof_elmt(i)
+      endif
+    enddo
+    call MatSetValues(Amat,ncount,igdof(1:ncount),ncount,igdof(1:ncount), &
+    storekmat_infinite(idof(1:ncount),idof(1:ncount),i_elmt),ADD_VALUES,ierr)
+    CHKERRA(ierr)
+  enddo
+  deallocate(storekmat_infinite)
+  !if (myrank==0) write(*,*) 'INF kmat done!'; call sync_all
+
+  call MatAssemblyBegin(Amat,MAT_FINAL_ASSEMBLY,ierr)
+  call MatAssemblyEnd(Amat,MAT_FINAL_ASSEMBLY,ierr)
+  !if (myrank==0) write(*,*) 'matrix setting & assembly complete!'; call sync_all
+
+  end subroutine petsc_set_matrix
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_set_vector()
+
+  use specfem_par, only: l2gdof,load
+  implicit none
+  PetscScalar      zero !,none,one
+  ! Set exact solution; then compute right-hand-side vector.
+  !none=-1.0
+  !one=1.0
+  zero = 0.0
+  call VecSet(bvec,zero,ierr)
+  call VecSetValues(bvec,neq,l2gdof(1:),load(1:),ADD_VALUES,ierr);
+
+  ! assemble vector
+  call VecAssemblyBegin(bvec,ierr)
+  call VecAssemblyEnd(bvec,ierr)
+  !if (myrank==0) write(*,*) 'vector setting & assembly complete!'
+
+  end subroutine petsc_set_vector
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_solve(sdata,cg_iter)
+
+  implicit none
+  PetscScalar sdata(:)
+  PetscInt    cg_iter
+  PetscInt    ireason
+
+  call KSPSolve(ksp,bvec,xvec,ierr)
+
+  ! View solver info; we could instead use the option -ksp_view
+  call KSPView(ksp,PETSC_VIEWER_STDOUT_WORLD,ierr)
+
+  !-------------------------------------------------------------------------------
+  ! Check solution and clean up
+  !-------------------------------------------------------------------------------
+  ! Check the error
+  !call VecAXPY(xvec,none,uvec,ierr)
+  !call VecNorm(xvec,NORM_2,norm,ierr)
+  call KSPGetConvergedReason(ksp,ireason,ierr)
+  call KSPGetIterationNumber(ksp,cg_iter,ierr)
+  if (myrank < 1) then
+    write(*,*) 'converged reason',ireason
+    write(*,*) 'Iterations:',cg_iter
+  endif
+  !if (norm > 1.e-12) then
+  !  write(*,'(a,e11.4,a,i5)')'Norm of error:',norm,', Iterations:',its
+  !else
+  !  write(*,'(a,i5,a)')'Norm of error < 1.e-12, Iterations:',its
+  !endif
+
+  end subroutine petsc_solve
+
+!
+!===============================================================================
+!
+
+  subroutine petsc_finalize()
+
+  implicit none
+
+  ! Free work space.  All PETSc objects should be destroyed when they
+  ! are no longer needed.
+
+  call VecDestroy(xvec,ierr)
+  call VecDestroy(uvec,ierr)
+  call VecDestroy(bvec,ierr)
+  call MatDestroy(Amat,ierr)
+  call KSPDestroy(ksp,ierr)
+  call PetscFinalize(ierr)
+
+  end subroutine petsc_finalize
+
+end module solver_petsc
+
+#endif
+

From 23f46163d74b8d2c80df3137c298029fbb9e357b Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 12:52:00 +0200
Subject: [PATCH 06/11] moving routine band_instrument_code() to shared source
 file

---
 src/shared/get_timestep_and_layers.f90 | 30 ++++++++++++++++++++++++++
 src/specfem3D/write_seismograms.f90    | 30 --------------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/shared/get_timestep_and_layers.f90 b/src/shared/get_timestep_and_layers.f90
index 8a7b8aae2..133b29e29 100644
--- a/src/shared/get_timestep_and_layers.f90
+++ b/src/shared/get_timestep_and_layers.f90
@@ -1083,3 +1083,33 @@ subroutine get_minimum_period_estimate()
   estimated_min_wavelength = T_min_period * S_VELOCITY_MIN
 
   end subroutine get_minimum_period_estimate
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine band_instrument_code(DT,bic)
+
+! This subroutine is to choose the appropriate band and instrument codes for channel names of seismograms
+! based on the IRIS convention (first two letters of channel codes which were LH(Z/E/N) previously).
+! For consistency with observed data, we now use the IRIS convention for band codes (first letter in channel codes)of
+! SEM seismograms governed by their sampling rate.
+! Instrument code (second letter in channel codes) is fixed to "X" which is assigned by IRIS for synthetic seismograms.
+! See the manual for further explanations!
+! Ebru, November 2010
+
+  implicit none
+
+  double precision,intent(in) :: DT
+  character(len=2),intent(out) :: bic
+
+  bic = ''
+
+  if (1.0d0 <= DT)  bic = 'LX'
+  if (0.1d0 < DT .and. DT < 1.0d0) bic = 'MX'
+  if (0.0125d0 < DT .and. DT <= 0.1d0) bic = 'BX'
+  if (0.004d0 < DT .and. DT <= 0.0125d0) bic = 'HX'
+  if (0.001d0 < DT .and. DT <= 0.004d0) bic = 'CX'
+  if (DT <= 0.001d0) bic = 'FX'
+
+  end subroutine band_instrument_code
diff --git a/src/specfem3D/write_seismograms.f90 b/src/specfem3D/write_seismograms.f90
index 94e3e3454..8e549e302 100644
--- a/src/specfem3D/write_seismograms.f90
+++ b/src/specfem3D/write_seismograms.f90
@@ -860,33 +860,3 @@ subroutine write_seismograms_strain()
 
   end subroutine write_seismograms_strain
 
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine band_instrument_code(DT,bic)
-
-! This subroutine is to choose the appropriate band and instrument codes for channel names of seismograms
-! based on the IRIS convention (first two letters of channel codes which were LH(Z/E/N) previously).
-! For consistency with observed data, we now use the IRIS convention for band codes (first letter in channel codes)of
-! SEM seismograms governed by their sampling rate.
-! Instrument code (second letter in channel codes) is fixed to "X" which is assigned by IRIS for synthetic seismograms.
-! See the manual for further explanations!
-! Ebru, November 2010
-
-  implicit none
-
-  double precision,intent(in) :: DT
-  character(len=2),intent(out) :: bic
-
-  bic = ''
-
-  if (1.0d0 <= DT)  bic = 'LX'
-  if (0.1d0 < DT .and. DT < 1.0d0) bic = 'MX'
-  if (0.0125d0 < DT .and. DT <= 0.1d0) bic = 'BX'
-  if (0.004d0 < DT .and. DT <= 0.0125d0) bic = 'HX'
-  if (0.001d0 < DT .and. DT <= 0.004d0) bic = 'CX'
-  if (DT <= 0.001d0) bic = 'FX'
-
-  end subroutine band_instrument_code
-

From cd36f24be1c2a57b10257d056124c888314aa6a1 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 12:52:38 +0200
Subject: [PATCH 07/11] updates workflow (for macOS)

---
 .github/workflows/CI.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 5a4e42773..3606650be 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -119,8 +119,12 @@ jobs:
         echo "OMPI_MCA_rmaps_base_inherit=1" >> $GITHUB_ENV
         ## avoids MPI issue when running in MacOS
         echo "OMPI_MCA_btl=self,tcp" >> $GITHUB_ENV
+        # newer OpenMPI version (5.x)
+        echo "PRTE_MCA_rmaps_default_mapping_policy=:oversubscribe" >> $GITHUB_ENV
         # exports for xterm output (for make tests)
         echo "TERM=xterm" >> $GITHUB_ENV
+        # warning: (arm64)  could not find object file symbol for symbol ___mulsc3
+        echo "LDFLAGS=-lgcc" >> $GITHUB_ENV
         echo ""
         echo "exports:"
         export

From fe0923120fb6ab40a3a47e2ad39569acca237e64 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 12:53:29 +0200
Subject: [PATCH 08/11] more code formatting

---
 src/specfem3D/SIEM_prepare_solver.F90 | 514 ++++++++++++++------------
 src/specfem3D/prepare_gravity.f90     |   1 +
 2 files changed, 273 insertions(+), 242 deletions(-)

diff --git a/src/specfem3D/SIEM_prepare_solver.F90 b/src/specfem3D/SIEM_prepare_solver.F90
index 178072f0c..7d2e1a453 100644
--- a/src/specfem3D/SIEM_prepare_solver.F90
+++ b/src/specfem3D/SIEM_prepare_solver.F90
@@ -33,6 +33,12 @@ subroutine SIEM_prepare_solver()
   ! check if anything to do
   if (.not. FULL_GRAVITY) return
 
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) "preparing full gravity solver"
+    call flush_IMAIN()
+  endif
+
   ! safety stop
   stop 'FULL_GRAVITY not fully implemented yet'
 
@@ -407,9 +413,10 @@ subroutine prepare_solver_poisson()
 
   implicit none
 
-  if (myrank == 0 ) then
-    write(IMAIN,*) "preparing mass matrices."
-    write(IMAIN,*)
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) "  allocating poisson level-1 solver arrays"
+    call flush_IMAIN()
   endif
 
   ! indexify regions
@@ -430,103 +437,102 @@ subroutine prepare_solver_poisson()
            dprecon_infinite1(nnode_inf1))
 
   allocate(dprecon1(0:neq1),load1(0:neq1),pgrav_ic1(nnode_ic1), &
-  pgrav_oc1(nnode_oc1),pgrav_cm1(nnode_cm1),pgrav_trinf1(nnode_trinf1), &
-  pgrav_inf1(nnode_inf1))
+           pgrav_oc1(nnode_oc1),pgrav_cm1(nnode_cm1),pgrav_trinf1(nnode_trinf1), &
+           pgrav_inf1(nnode_inf1))
 
   if (SIMULATION_TYPE == 3) then
     allocate(b_load1(0:neq1), b_pgrav_ic1(nnode_ic1), b_pgrav_oc1(nnode_oc1), &
-    b_pgrav_cm1(nnode_cm1), b_pgrav_trinf1(nnode_trinf1), b_pgrav_inf1(nnode_inf1))
+             b_pgrav_cm1(nnode_cm1), b_pgrav_trinf1(nnode_trinf1), b_pgrav_inf1(nnode_inf1))
   endif
 
   ! crust mantle
   call poisson_stiffness3(IREGION_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
-  NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
-  zstore_crust_mantle,nnode_cm1,inode_elmt_cm1,storekmat_crust_mantle1, &
-  dprecon_crust_mantle1)
+                          NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
+                          zstore_crust_mantle,nnode_cm1,inode_elmt_cm1,storekmat_crust_mantle1, &
+                          dprecon_crust_mantle1)
 
   ! outer core
   call poisson_stiffness3(IREGION_OUTER_CORE,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
-  ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  nnode_oc1,inode_elmt_oc1,storekmat_outer_core1,dprecon_outer_core1)
+                          ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+                          nnode_oc1,inode_elmt_oc1,storekmat_outer_core1,dprecon_outer_core1)
 
   ! inner core
   call poisson_stiffness3(IREGION_INNER_CORE,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-  ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  nnode_ic1,inode_elmt_ic1,storekmat_inner_core1,dprecon_inner_core1)
+                          ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                          nnode_ic1,inode_elmt_ic1,storekmat_inner_core1,dprecon_inner_core1)
 
   if (ADD_TRINF) then
     ! transition infinite
     call poisson_stiffness3(IREGION_TRINFINITE,NSPEC_TRINFINITE,NGLOB_TRINFINITE, &
-    ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
-    nnode_trinf1,inode_elmt_trinf1,storekmat_trinfinite1,dprecon_trinfinite1)
+                            ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+                            nnode_trinf1,inode_elmt_trinf1,storekmat_trinfinite1,dprecon_trinfinite1)
   endif
 
   ! infinite layer
   call poisson_stiffnessINF3(NSPEC_INFINITE,NGLOB_INFINITE,ibool_infinite, &
-  xstore_infinite,ystore_infinite,zstore_infinite,nnode_inf1,inode_elmt_inf1, &
-  storekmat_infinite1,dprecon_infinite1)
-
-  call sync_all
+                             xstore_infinite,ystore_infinite,zstore_infinite,nnode_inf1,inode_elmt_inf1, &
+                             storekmat_infinite1,dprecon_infinite1)
 
+  call sync_all()
 
   ! assemble stiffness matrices
   ! assemble across the MPI processes in a region
   ! crust_mantle
   call assemble_MPI_scalar(NPROCTOT_VAL,nnode_cm1,dprecon_crust_mantle1, &
-       num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
-       nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
-       my_neighbors_crust_mantle1)
+                           num_interfaces_crust_mantle1,max_nibool_interfaces_crust_mantle1, &
+                           nibool_interfaces_crust_mantle1,ibool_interfaces_crust_mantle1, &
+                           my_neighbors_crust_mantle1)
 
   ! outer core
   call assemble_MPI_scalar(NPROCTOT_VAL,nnode_oc1,dprecon_outer_core1, &
-       num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
-       nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
-       my_neighbors_outer_core1)
+                           num_interfaces_outer_core1,max_nibool_interfaces_outer_core1, &
+                           nibool_interfaces_outer_core1,ibool_interfaces_outer_core1, &
+                           my_neighbors_outer_core1)
 
   ! inner core
   call assemble_MPI_scalar(NPROCTOT_VAL,nnode_ic1,dprecon_inner_core1, &
-       num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
-       nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
-       my_neighbors_inner_core1)
+                           num_interfaces_inner_core1,max_nibool_interfaces_inner_core1, &
+                           nibool_interfaces_inner_core1,ibool_interfaces_inner_core1, &
+                           my_neighbors_inner_core1)
 
   ! transition infinite
   if (ADD_TRINF) then
     call assemble_MPI_scalar(NPROCTOT_VAL,nnode_trinf1,dprecon_trinfinite1, &
-         num_interfaces_trinfinite1,max_nibool_interfaces_trinfinite1, &
-         nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1, &
-         my_neighbors_trinfinite1)
+                             num_interfaces_trinfinite1,max_nibool_interfaces_trinfinite1, &
+                             nibool_interfaces_trinfinite1,ibool_interfaces_trinfinite1, &
+                             my_neighbors_trinfinite1)
   endif
 
   ! infinite
   call assemble_MPI_scalar(NPROCTOT_VAL,nnode_inf1,dprecon_infinite1, &
-       num_interfaces_infinite1,max_nibool_interfaces_infinite1, &
-       nibool_interfaces_infinite1,ibool_interfaces_infinite1, &
-       my_neighbors_infinite1)
+                           num_interfaces_infinite1,max_nibool_interfaces_infinite1, &
+                           nibool_interfaces_infinite1,ibool_interfaces_infinite1, &
+                           my_neighbors_infinite1)
 
   call sync_all()
 
   ! assemble across the different regions in a process
   dprecon1 = zero
   ! crust_mantle
-  dprecon1(gdof_cm1)=dprecon1(gdof_cm1)+dprecon_crust_mantle1
+  dprecon1(gdof_cm1) = dprecon1(gdof_cm1)+dprecon_crust_mantle1
 
   ! outer core
-  dprecon1(gdof_oc1)=dprecon1(gdof_oc1)+dprecon_outer_core1
+  dprecon1(gdof_oc1) = dprecon1(gdof_oc1)+dprecon_outer_core1
 
   ! inner core
-  dprecon1(gdof_ic1)=dprecon1(gdof_ic1)+dprecon_inner_core1
+  dprecon1(gdof_ic1) = dprecon1(gdof_ic1)+dprecon_inner_core1
 
   ! transition infinite
   if (ADD_TRINF) then
-    dprecon1(gdof_trinf1)=dprecon1(gdof_trinf1)+dprecon_trinfinite1
+    dprecon1(gdof_trinf1) = dprecon1(gdof_trinf1)+dprecon_trinfinite1
   endif
 
   ! infinite
-  dprecon1(gdof_inf1)=dprecon1(gdof_inf1)+dprecon_infinite1
+  dprecon1(gdof_inf1) = dprecon1(gdof_inf1)+dprecon_infinite1
 
-  dprecon1(0)=0.0_CUSTOM_REAL
+  dprecon1(0) = 0.0_CUSTOM_REAL
 
-  call sync_all
+  call sync_all()
 
   ! invert preconditioner
   !dprecon1(1:)=1.0_CUSTOM_REAL/dprecon1(1:)
@@ -535,111 +541,119 @@ subroutine prepare_solver_poisson()
 
   ! Level-2 solver------------------
   if (SOLVER_5GLL) then
-  allocate(storekmat_crust_mantle(NGLLCUBE,NGLLCUBE,NSPEC_CRUST_MANTLE), &
-           dprecon_crust_mantle(NGLOB_CRUST_MANTLE))
-  allocate(storekmat_outer_core(NGLLCUBE,NGLLCUBE,NSPEC_OUTER_CORE), &
-           dprecon_outer_core(NGLOB_OUTER_CORE))
-  allocate(storekmat_inner_core(NGLLCUBE,NGLLCUBE,NSPEC_INNER_CORE), &
-           dprecon_inner_core(NGLOB_INNER_CORE))
-  if (ADD_TRINF) then
-    allocate(storekmat_trinfinite(NGLLCUBE,NGLLCUBE,NSPEC_TRINFINITE), &
-    dprecon_trinfinite(NGLOB_TRINFINITE))
-  endif
-  allocate(storekmat_infinite(NGLLCUBE,NGLLCUBE,NSPEC_INFINITE), &
-           dprecon_infinite(NGLOB_INFINITE))
-  allocate(dprecon(0:neq),load(0:neq))
 
-  ! better to make dprecon_* local rather than global
+    ! user output
+    if (myrank == 0) then
+      write(IMAIN,*) "  allocating poisson level-2 solver arrays"
+      call flush_IMAIN()
+    endif
 
-  ! crust mantle
-  call poisson_stiffness(IREGION_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
-  NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
-  zstore_crust_mantle,storekmat_crust_mantle,dprecon_crust_mantle)
-  ! outer core
-  call poisson_stiffness(IREGION_OUTER_CORE,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
-  ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  storekmat_outer_core,dprecon_outer_core)
-  ! inner core
-  call poisson_stiffness(IREGION_INNER_CORE,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-  ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  storekmat_inner_core,dprecon_inner_core)
+    allocate(storekmat_crust_mantle(NGLLCUBE,NGLLCUBE,NSPEC_CRUST_MANTLE), &
+             dprecon_crust_mantle(NGLOB_CRUST_MANTLE))
+    allocate(storekmat_outer_core(NGLLCUBE,NGLLCUBE,NSPEC_OUTER_CORE), &
+             dprecon_outer_core(NGLOB_OUTER_CORE))
+    allocate(storekmat_inner_core(NGLLCUBE,NGLLCUBE,NSPEC_INNER_CORE), &
+             dprecon_inner_core(NGLOB_INNER_CORE))
+    if (ADD_TRINF) then
+      allocate(storekmat_trinfinite(NGLLCUBE,NGLLCUBE,NSPEC_TRINFINITE), &
+      dprecon_trinfinite(NGLOB_TRINFINITE))
+    endif
+    allocate(storekmat_infinite(NGLLCUBE,NGLLCUBE,NSPEC_INFINITE), &
+             dprecon_infinite(NGLOB_INFINITE))
+    allocate(dprecon(0:neq),load(0:neq))
 
-  ! transition infinite
+    ! better to make dprecon_* local rather than global
 
-  if (ADD_TRINF) then
-    call poisson_stiffness(IREGION_TRINFINITE,NSPEC_TRINFINITE,NGLOB_TRINFINITE, &
-    ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
-    storekmat_trinfinite,dprecon_trinfinite)
-  endif
+    ! crust mantle
+    call poisson_stiffness(IREGION_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
+                           NGLOB_CRUST_MANTLE,ibool_crust_mantle,xstore_crust_mantle,ystore_crust_mantle, &
+                           zstore_crust_mantle,storekmat_crust_mantle,dprecon_crust_mantle)
+    ! outer core
+    call poisson_stiffness(IREGION_OUTER_CORE,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+                           ibool_outer_core,xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+                           storekmat_outer_core,dprecon_outer_core)
+    ! inner core
+    call poisson_stiffness(IREGION_INNER_CORE,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                           ibool_inner_core,xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                           storekmat_inner_core,dprecon_inner_core)
 
-  ! infinite layer
-  call poisson_stiffnessINF(NSPEC_INFINITE,NGLOB_INFINITE, &
-  ibool_infinite,xstore_infinite,ystore_infinite,zstore_infinite, &
-  storekmat_infinite,dprecon_infinite)
+    ! transition infinite
+    if (ADD_TRINF) then
+      call poisson_stiffness(IREGION_TRINFINITE,NSPEC_TRINFINITE,NGLOB_TRINFINITE, &
+                             ibool_trinfinite,xstore_trinfinite,ystore_trinfinite,zstore_trinfinite, &
+                             storekmat_trinfinite,dprecon_trinfinite)
+    endif
 
-  call sync_all
-  ! assemble stiffness matrices
-  ! assemble across the MPI processes in a region
-  ! crust_mantle
-  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_CRUST_MANTLE,dprecon_crust_mantle, &
-  num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
-  nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
-  my_neighbors_crust_mantle)
+    ! infinite layer
+    call poisson_stiffnessINF(NSPEC_INFINITE,NGLOB_INFINITE, &
+                              ibool_infinite,xstore_infinite,ystore_infinite,zstore_infinite, &
+                              storekmat_infinite,dprecon_infinite)
 
-  ! outer core
-  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_OUTER_CORE,dprecon_outer_core, &
-  num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
-  nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
-  my_neighbors_outer_core)
+    call sync_all()
 
-  ! inner core
-  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INNER_CORE,dprecon_inner_core, &
-  num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
-  nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
-  my_neighbors_inner_core)
+    ! assemble stiffness matrices
+    ! assemble across the MPI processes in a region
+    ! crust_mantle
+    call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_CRUST_MANTLE,dprecon_crust_mantle, &
+                             num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                             nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+                             my_neighbors_crust_mantle)
 
-  ! transition infinite
+    ! outer core
+    call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_OUTER_CORE,dprecon_outer_core, &
+                             num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                             nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+                             my_neighbors_outer_core)
 
-  if (ADD_TRINF) then
-  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_TRINFINITE,dprecon_trinfinite, &
-  num_interfaces_trinfinite,max_nibool_interfaces_trinfinite, &
-  nibool_interfaces_trinfinite,ibool_interfaces_trinfinite, &
-  my_neighbors_trinfinite)
-  endif
+    ! inner core
+    call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INNER_CORE,dprecon_inner_core, &
+                             num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                             nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+                             my_neighbors_inner_core)
 
-  ! infinite
-  call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INFINITE,dprecon_infinite, &
-  num_interfaces_infinite,max_nibool_interfaces_infinite, &
-  nibool_interfaces_infinite,ibool_interfaces_infinite, &
-  my_neighbors_infinite)
+    ! transition infinite
 
-  call sync_all()
+    if (ADD_TRINF) then
+      call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_TRINFINITE,dprecon_trinfinite, &
+                               num_interfaces_trinfinite,max_nibool_interfaces_trinfinite, &
+                               nibool_interfaces_trinfinite,ibool_interfaces_trinfinite, &
+                               my_neighbors_trinfinite)
+    endif
 
-  ! assemble across the different regions in a process
-  dprecon = zero
-  ! crust_mantle
-  dprecon(gdof_cm)=dprecon(gdof_cm)+dprecon_crust_mantle
+    ! infinite
+    call assemble_MPI_scalar(NPROCTOT_VAL,NGLOB_INFINITE,dprecon_infinite, &
+                             num_interfaces_infinite,max_nibool_interfaces_infinite, &
+                             nibool_interfaces_infinite,ibool_interfaces_infinite, &
+                             my_neighbors_infinite)
 
-  ! outer core
-  dprecon(gdof_oc)=dprecon(gdof_oc)+dprecon_outer_core
+    call sync_all()
 
-  ! inner core
-  dprecon(gdof_ic)=dprecon(gdof_ic)+dprecon_inner_core
+    ! assemble across the different regions in a process
+    dprecon = zero
+    ! crust_mantle
+    dprecon(gdof_cm) = dprecon(gdof_cm)+dprecon_crust_mantle
 
-  ! transition infinite
+    ! outer core
+    dprecon(gdof_oc) = dprecon(gdof_oc)+dprecon_outer_core
 
-  if (ADD_TRINF) then
-    dprecon(gdof_trinf)=dprecon(gdof_trinf)+dprecon_trinfinite
-  endif
+    ! inner core
+    dprecon(gdof_ic) = dprecon(gdof_ic)+dprecon_inner_core
 
-  ! infinite
-  dprecon(gdof_inf)=dprecon(gdof_inf)+dprecon_infinite
+    ! transition infinite
 
-  dprecon(0)=0.0_CUSTOM_REAL
+    if (ADD_TRINF) then
+      dprecon(gdof_trinf) = dprecon(gdof_trinf)+dprecon_trinfinite
+    endif
+
+    ! infinite
+    dprecon(gdof_inf) = dprecon(gdof_inf)+dprecon_infinite
 
-  call sync_all
-  !--------------------Level-2 solver
+    dprecon(0) = 0.0_CUSTOM_REAL
+
+    call sync_all()
+    !--------------------Level-2 solver
   endif ! if (SOLVER_5GLL) then
+
   return
 
   end subroutine prepare_solver_poisson
@@ -704,9 +718,10 @@ subroutine prepare_solver_sparse()
 
   ismpi = .true.
 
-  if (myrank == 0 ) then
-    write(IMAIN,*) '-------------------- Preparing sparse matrix: --------------------'
-    write(IMAIN,*)
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) "  preparing sparse matrix solver"
+    call flush_IMAIN()
   endif
 
   !===============================================================================
@@ -720,13 +735,15 @@ subroutine prepare_solver_sparse()
   nedof_inf1 = NEDOFPHI1
 
   ! Maximum DOF in array - number of elements * Element_dof^2
-  nmax1=NSPEC_INNER_CORE*(nedof_ic1*nedof_ic1)+                                &
-       NSPEC_OUTER_CORE*(nedof_oc1*nedof_oc1)+                                 &
-       NSPEC_CRUST_MANTLE*(nedof_cm1*nedof_cm1)+                               &
-       NSPEC_TRINFINITE*(nedof_trinf1*nedof_trinf1)+                           &
-       NSPEC_INFINITE*(nedof_inf1*nedof_inf1)
+  nmax1 = NSPEC_INNER_CORE*(nedof_ic1*nedof_ic1)+                                &
+          NSPEC_OUTER_CORE*(nedof_oc1*nedof_oc1)+                                 &
+          NSPEC_CRUST_MANTLE*(nedof_cm1*nedof_cm1)+                               &
+          NSPEC_TRINFINITE*(nedof_trinf1*nedof_trinf1)+                           &
+          NSPEC_INFINITE*(nedof_inf1*nedof_inf1)
 
   allocate(col0(nmax1),row0(nmax1),gcol0(nmax1),grow0(nmax1),kmat0(nmax1))
+
+  !debug
   if (myrank == 0) then
     print *,' -- Elemental DOFs for IC : ', nedof_ic1
     print *,' -- Maximum DOFs (nmax1)  : ', nmax1
@@ -734,14 +751,14 @@ subroutine prepare_solver_sparse()
 
   ! Allocate map for each region
   allocate(imap_ic(nedof_ic1),imap_oc(nedof_oc1),imap_cm(nedof_cm1), &
-  imap_trinf(nedof_trinf1),imap_inf(nedof_inf1))
+           imap_trinf(nedof_trinf1),imap_inf(nedof_inf1))
 
   ! I THINK THIS SYNTAX MEANS CREATE A RANGE?
-  imap_ic=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
-  imap_oc=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapchi1, imapp1, imapphi1 /)
-  imap_cm=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
-  imap_trinf=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
-  imap_inf=(/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
+  imap_ic = (/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
+  imap_oc = (/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapchi1, imapp1, imapphi1 /)
+  imap_cm = (/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapu1, imapphi1 /)
+  imap_trinf = (/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
+  imap_inf = (/ (i,i = 1,NGLLCUBE_INF) /) !(/ imapphi1 /)
 
   ! read global degrees of freedoms from DATABASE files
   write(spm,*)myrank
@@ -770,10 +787,11 @@ subroutine prepare_solver_sparse()
   close(10)
 
   ! Find maximum ID (dof value) for any of the regions
-  ngdof1=maxscal(maxval( (/ maxval(ggdof_ic1),maxval(ggdof_oc1), &
-         maxval(ggdof_cm1),maxval(ggdof_trinf1),maxval(ggdof_inf1) /) ))
-  if (myrank == 0) write(*,'(a,i12)')' -- Total global degrees of freedom1: ',ngdof1
+  ngdof1 = maxscal(maxval( (/ maxval(ggdof_ic1),maxval(ggdof_oc1), &
+           maxval(ggdof_cm1),maxval(ggdof_trinf1),maxval(ggdof_inf1) /) ))
 
+  !debug
+  if (myrank == 0) write(*,'(a,i12)')' -- Total global degrees of freedom1: ',ngdof1
 
   ! stage 0: store all elements
   ncount = 0
@@ -782,7 +800,6 @@ subroutine prepare_solver_sparse()
     ! Skip fictitious inner core cube
     if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE)cycle
 
-
     ! Note: gdof_ic1 defined in specfem_par_innercore
     ! Fetch gdof for IC element only on NGLLCUBE_INF points
     gdof_elmt1 = reshape(gdof_ic1(inode_elmt_ic1(:,i_elmt)),(/NEDOF1/))
@@ -797,16 +814,16 @@ subroutine prepare_solver_sparse()
         if (igdof > 0.and.jgdof > 0.and.storekmat_inner_core1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
           ncount = ncount+1
           ! Local (MPI?) map?
-          row0(ncount)=igdof
-          col0(ncount)=jgdof
+          row0(ncount) = igdof
+          col0(ncount) = jgdof
           ! Global map?
-          grow0(ncount)=ggdof_elmt1(imap_ic(i))
-          gcol0(ncount)=ggdof_elmt1(imap_ic(j))
+          grow0(ncount) = ggdof_elmt1(imap_ic(i))
+          gcol0(ncount) = ggdof_elmt1(imap_ic(j))
         endif
       enddo
     enddo
   enddo
-  call sync_all
+  call sync_all()
 
   ! outer core
   do i_elmt = 1,NSPEC_OUTER_CORE
@@ -819,10 +836,10 @@ subroutine prepare_solver_sparse()
         if (igdof > 0.and.jgdof > 0.and.storekmat_outer_core1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
           !if (myrank==0) write(1111,*) igdof,jgdof,storekmat_outer_core1(i,j,i_elmt)
           ncount = ncount+1
-          row0(ncount)=igdof
-          col0(ncount)=jgdof
-          grow0(ncount)=ggdof_elmt1(imap_oc(i))
-          gcol0(ncount)=ggdof_elmt1(imap_oc(j))
+          row0(ncount) = igdof
+          col0(ncount) = jgdof
+          grow0(ncount) = ggdof_elmt1(imap_oc(i))
+          gcol0(ncount) = ggdof_elmt1(imap_oc(j))
         endif
       enddo
     enddo
@@ -840,10 +857,10 @@ subroutine prepare_solver_sparse()
         jgdof = gdof_elmt1(imap_cm(j))
         if (igdof > 0.and.jgdof > 0.and.storekmat_crust_mantle1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
           ncount = ncount+1
-          row0(ncount)=igdof
-          col0(ncount)=jgdof
-          grow0(ncount)=ggdof_elmt1(imap_cm(i))
-          gcol0(ncount)=ggdof_elmt1(imap_cm(j))
+          row0(ncount) = igdof
+          col0(ncount) = jgdof
+          grow0(ncount) = ggdof_elmt1(imap_cm(i))
+          gcol0(ncount) = ggdof_elmt1(imap_cm(j))
         endif
       enddo
     enddo
@@ -860,10 +877,10 @@ subroutine prepare_solver_sparse()
         jgdof = gdof_elmt1(imap_trinf(j))
         if (igdof > 0.and.jgdof > 0.and.storekmat_trinfinite1(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
           ncount = ncount+1
-          row0(ncount)=igdof
-          col0(ncount)=jgdof
-          grow0(ncount)=ggdof_elmt1(imap_trinf(i))
-          gcol0(ncount)=ggdof_elmt1(imap_trinf(j))
+          row0(ncount) = igdof
+          col0(ncount) = jgdof
+          grow0(ncount) = ggdof_elmt1(imap_trinf(i))
+          gcol0(ncount) = ggdof_elmt1(imap_trinf(j))
         endif
       enddo
     enddo
@@ -880,10 +897,10 @@ subroutine prepare_solver_sparse()
         jgdof = gdof_elmt1(imap_inf(j))
         if (igdof > 0.and.jgdof > 0) then
           ncount = ncount+1
-          row0(ncount)=igdof
-          col0(ncount)=jgdof
-          grow0(ncount)=ggdof_elmt1(imap_inf(i))
-          gcol0(ncount)=ggdof_elmt1(imap_inf(j))
+          row0(ncount) = igdof
+          col0(ncount) = jgdof
+          grow0(ncount) = ggdof_elmt1(imap_inf(i))
+          gcol0(ncount) = ggdof_elmt1(imap_inf(j))
         endif
       enddo
     enddo
@@ -895,60 +912,64 @@ subroutine prepare_solver_sparse()
   allocate(ind0(ncount),iorder(ncount))
   ind0 = neq1*(row0(1:ncount)-1)+col0(1:ncount)
   call i_uniinv(ind0,iorder)
-  nsparse1=maxval(iorder)
+  nsparse1 = maxval(iorder)
+
+  !debug
   if (myrank == 0) write(*,'(a,1x,i0,1x,a,1x,i0)')'  neq1:',neq1,' Nsparse1:',nsparse1
-  call sync_all
+  call sync_all()
+
   allocate(krow_sparse1(nsparse1),kcol_sparse1(nsparse1))
   allocate(kgrow_sparse1(nsparse1),kgcol_sparse1(nsparse1))
 
   !kmat_sparse1=0.0_CUSTOM_REAL
-  krow_sparse1=-1
-  kcol_sparse1=-1
-  kgrow_sparse1=-1
-  kgcol_sparse1=-1
+  krow_sparse1 = -1
+  kcol_sparse1 = -1
+  kgrow_sparse1 = -1
+  kgcol_sparse1 = -1
   do i_count = 1,ncount!nmax
-    krow_sparse1(iorder(i_count))=row0(i_count)
-    kcol_sparse1(iorder(i_count))=col0(i_count)
-    kgrow_sparse1(iorder(i_count))=grow0(i_count)
-    kgcol_sparse1(iorder(i_count))=gcol0(i_count)
+    krow_sparse1(iorder(i_count)) = row0(i_count)
+    kcol_sparse1(iorder(i_count)) = col0(i_count)
+    kgrow_sparse1(iorder(i_count)) = grow0(i_count)
+    kgcol_sparse1(iorder(i_count)) = gcol0(i_count)
   enddo
   if (minval(krow_sparse1) < 1.or.minval(kcol_sparse1) < 1.or.                  &
-  minval(kgrow_sparse1) < 1.or.minval(kgcol_sparse1) < 1) then
+      minval(kgrow_sparse1) < 1.or.minval(kgcol_sparse1) < 1) then
     write(*,*) 'ERROR: local and global indices are less than 1!'
-    stop
+    stop 'Error local and global indices are less than 1'
   endif
 
   deallocate(row0,col0,grow0,gcol0,kmat0,ind0,iorder)
   deallocate(imap_ic,imap_oc,imap_cm,imap_trinf,imap_inf)
 
-
-
   ! stage 2: assemble across processors
 
   ! local DOF to global DOF mapping
   allocate(l2gdof1(0:neq1))
-  l2gdof1=-1
-  l2gdof1(gdof_ic1)=ggdof_ic1(1,:)
-  l2gdof1(gdof_oc1)=ggdof_oc1(1,:)
-  l2gdof1(gdof_cm1)=ggdof_cm1(1,:)
-  l2gdof1(gdof_trinf1)=ggdof_trinf1(1,:)
-  l2gdof1(gdof_inf1)=ggdof_inf1(1,:)
+  l2gdof1 = -1
+  l2gdof1(gdof_ic1) = ggdof_ic1(1,:)
+  l2gdof1(gdof_oc1) = ggdof_oc1(1,:)
+  l2gdof1(gdof_cm1) = ggdof_cm1(1,:)
+  l2gdof1(gdof_trinf1) = ggdof_trinf1(1,:)
+  l2gdof1(gdof_inf1) = ggdof_inf1(1,:)
 
   do i = 1,nsparse1
     if (kgrow_sparse1(i) /= l2gdof1(krow_sparse1(i)).or.kgcol_sparse1(i) /= l2gdof1(kcol_sparse1(i))) then
       print *,'VERY STRANGE!!!!!'
-      stop
+      stop 'Error very strange sparse dof numbers should not occur'
     endif
   enddo
 
   l2gdof1 = l2gdof1-1 ! PETSC uses 0 indexing
   gmin = minvec(l2gdof1(1:))
   gmax = maxvec(l2gdof1(1:))
+
+  !debug
   if (myrank == 0) write(*,'(a,1x,i0,1x,i0)')'  l2gdof1 range:',gmin,gmax
-  call sync_all
+  call sync_all()
+
   if (minval(l2gdof1(1:)) < 0) then
     write(*,*) 'ERROR: local-to-global indices are less than 1!'
-    stop
+    stop 'Error local-to-global indices are less than 1'
   endif
 
   !===============================================================================
@@ -961,22 +982,25 @@ subroutine prepare_solver_sparse()
     nedof_trinf = NEDOFPHI
     nedof_inf = NEDOFPHI
 
-    nmax=NSPEC_INNER_CORE*(nedof_ic*nedof_ic)+                           &
-        NSPEC_OUTER_CORE*(nedof_oc*nedof_oc)+                            &
-        NSPEC_CRUST_MANTLE*(nedof_cm*nedof_cm)+                          &
-        NSPEC_TRINFINITE*(nedof_trinf*nedof_trinf)+                      &
-        NSPEC_INFINITE*(nedof_inf*nedof_inf)
+    nmax = NSPEC_INNER_CORE*(nedof_ic*nedof_ic)+                           &
+           NSPEC_OUTER_CORE*(nedof_oc*nedof_oc)+                            &
+           NSPEC_CRUST_MANTLE*(nedof_cm*nedof_cm)+                          &
+           NSPEC_TRINFINITE*(nedof_trinf*nedof_trinf)+                      &
+           NSPEC_INFINITE*(nedof_inf*nedof_inf)
     allocate(col0(nmax),row0(nmax),gcol0(nmax),grow0(nmax),kmat0(nmax))
     !allocate(col0(nmax),row0(nmax),gcol0(nmax),grow0(nmax))
-    if (myrank == 0) print *,nedof_ic,nmax
+
+    !debug
+    if (myrank == 0) print *,'nedof_ic = ',nedof_ic,nmax
+
     allocate(imap_ic(nedof_ic),imap_oc(nedof_oc),imap_cm(nedof_cm), &
-    imap_trinf(nedof_trinf),imap_inf(nedof_inf))
+             imap_trinf(nedof_trinf),imap_inf(nedof_inf))
 
-    imap_ic=(/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
-    imap_oc=(/ (i,i = 1,NGLLCUBE) /) !(/ imapchi, imapp, imapphi /)
-    imap_cm=(/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
-    imap_trinf=(/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
-    imap_inf=(/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
+    imap_ic = (/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
+    imap_oc = (/ (i,i = 1,NGLLCUBE) /) !(/ imapchi, imapp, imapphi /)
+    imap_cm = (/ (i,i = 1,NGLLCUBE) /) !(/ imapu, imapphi /)
+    imap_trinf = (/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
+    imap_inf = (/ (i,i = 1,NGLLCUBE) /) !(/ imapphi /)
 
     ! read global degrees of freedoms from DATABASE files
     ! inner core
@@ -1000,8 +1024,10 @@ subroutine prepare_solver_sparse()
     read(10,*)ggdof_inf
     close(10)
 
-    ngdof=maxscal(maxval( (/ maxval(ggdof_ic),maxval(ggdof_oc),maxval(ggdof_cm), &
-    maxval(ggdof_trinf),maxval(ggdof_inf) /) ))
+    ngdof = maxscal(maxval( (/ maxval(ggdof_ic),maxval(ggdof_oc),maxval(ggdof_cm), &
+                               maxval(ggdof_trinf),maxval(ggdof_inf) /) ))
+
+    !debug
     if (myrank == 0) write(*,'(a,i12)')'  Total global degrees of freedom:',ngdof
 
     ! stage 0: store all elements
@@ -1019,10 +1045,10 @@ subroutine prepare_solver_sparse()
           jgdof = gdof_elmt(imap_ic(j))
           if (igdof > 0.and.jgdof > 0.and.storekmat_inner_core(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
             ncount = ncount+1
-            row0(ncount)=igdof
-            col0(ncount)=jgdof
-            grow0(ncount)=ggdof_elmt(imap_ic(i))
-            gcol0(ncount)=ggdof_elmt(imap_ic(j))
+            row0(ncount) = igdof
+            col0(ncount) = jgdof
+            grow0(ncount) = ggdof_elmt(imap_ic(i))
+            gcol0(ncount) = ggdof_elmt(imap_ic(j))
           endif
         enddo
       enddo
@@ -1038,10 +1064,10 @@ subroutine prepare_solver_sparse()
           jgdof = gdof_elmt(imap_oc(j))
           if (igdof > 0.and.jgdof > 0.and.storekmat_outer_core(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
             ncount = ncount+1
-            row0(ncount)=igdof
-            col0(ncount)=jgdof
-            grow0(ncount)=ggdof_elmt(imap_oc(i))
-            gcol0(ncount)=ggdof_elmt(imap_oc(j))
+            row0(ncount) = igdof
+            col0(ncount) = jgdof
+            grow0(ncount) = ggdof_elmt(imap_oc(i))
+            gcol0(ncount) = ggdof_elmt(imap_oc(j))
           endif
         enddo
       enddo
@@ -1057,10 +1083,10 @@ subroutine prepare_solver_sparse()
           jgdof = gdof_elmt(imap_cm(j))
           if (igdof > 0.and.jgdof > 0.and.storekmat_crust_mantle(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
             ncount = ncount+1
-            row0(ncount)=igdof
-            col0(ncount)=jgdof
-            grow0(ncount)=ggdof_elmt(imap_cm(i))
-            gcol0(ncount)=ggdof_elmt(imap_cm(j))
+            row0(ncount) = igdof
+            col0(ncount) = jgdof
+            grow0(ncount) = ggdof_elmt(imap_cm(i))
+            gcol0(ncount) = ggdof_elmt(imap_cm(j))
           endif
         enddo
       enddo
@@ -1076,10 +1102,10 @@ subroutine prepare_solver_sparse()
           jgdof = gdof_elmt(imap_trinf(j))
           if (igdof > 0.and.jgdof > 0.and.storekmat_trinfinite(i,j,i_elmt) /= 0.0_CUSTOM_REAL) then
             ncount = ncount+1
-            row0(ncount)=igdof
-            col0(ncount)=jgdof
-            grow0(ncount)=ggdof_elmt(imap_trinf(i))
-            gcol0(ncount)=ggdof_elmt(imap_trinf(j))
+            row0(ncount) = igdof
+            col0(ncount) = jgdof
+            grow0(ncount) = ggdof_elmt(imap_trinf(i))
+            gcol0(ncount) = ggdof_elmt(imap_trinf(j))
           endif
         enddo
       enddo
@@ -1095,10 +1121,10 @@ subroutine prepare_solver_sparse()
           jgdof = gdof_elmt(imap_inf(j))
           if (igdof > 0.and.jgdof > 0) then
             ncount = ncount+1
-            row0(ncount)=igdof
-            col0(ncount)=jgdof
-            grow0(ncount)=ggdof_elmt(imap_inf(i))
-            gcol0(ncount)=ggdof_elmt(imap_inf(j))
+            row0(ncount) = igdof
+            col0(ncount) = jgdof
+            grow0(ncount) = ggdof_elmt(imap_inf(i))
+            gcol0(ncount) = ggdof_elmt(imap_inf(j))
           endif
         enddo
       enddo
@@ -1109,53 +1135,57 @@ subroutine prepare_solver_sparse()
     allocate(ind0(ncount),iorder(ncount))
     ind0 = neq*(row0(1:ncount)-1)+col0(1:ncount)
     call i_uniinv(ind0,iorder)
-    nsparse=maxval(iorder)
+    nsparse = maxval(iorder)
+
+    !debug
     if (myrank == 0) write(*,'(a,1x,i0,1x,a,1x,i0)')'  neq:',neq,' Nsparse:',nsparse
 
     allocate(krow_sparse(nsparse),kcol_sparse(nsparse))
     allocate(kgrow_sparse(nsparse),kgcol_sparse(nsparse))
 
-    krow_sparse=-1
-    kcol_sparse=-1
-    kgrow_sparse=-1
-    kgcol_sparse=-1
+    krow_sparse = -1
+    kcol_sparse = -1
+    kgrow_sparse = -1
+    kgcol_sparse = -1
     do i_count = 1,ncount
-      krow_sparse(iorder(i_count))=row0(i_count)
-      kcol_sparse(iorder(i_count))=col0(i_count)
-      kgrow_sparse(iorder(i_count))=grow0(i_count)
-      kgcol_sparse(iorder(i_count))=gcol0(i_count)
+      krow_sparse(iorder(i_count)) = row0(i_count)
+      kcol_sparse(iorder(i_count)) = col0(i_count)
+      kgrow_sparse(iorder(i_count)) = grow0(i_count)
+      kgcol_sparse(iorder(i_count)) = gcol0(i_count)
     enddo
     if (minval(krow_sparse) < 1.or.minval(kcol_sparse) < 1.or.                  &
-    minval(kgrow_sparse) < 1.or.minval(kgcol_sparse) < 1) then
+        minval(kgrow_sparse) < 1.or.minval(kgcol_sparse) < 1) then
       write(*,*) 'ERROR: local and global indices are less than 1!'
-      stop
+      stop 'Error local and global indices are less than 1'
     endif
 
-
     deallocate(row0,col0,grow0,gcol0,kmat0,ind0,iorder)
     deallocate(imap_ic,imap_oc,imap_cm,imap_trinf,imap_inf)
 
-
     ! stage 2: assemble across processors
 
     ! local DOF to global DOF mapping
     allocate(l2gdof(0:neq))
-    l2gdof=-1
-    l2gdof(gdof_ic)=ggdof_ic(1,:)
-    l2gdof(gdof_oc)=ggdof_oc(1,:)
-    l2gdof(gdof_cm)=ggdof_cm(1,:)
-    l2gdof(gdof_trinf)=ggdof_trinf(1,:)
-    l2gdof(gdof_inf)=ggdof_inf(1,:)
+    l2gdof = -1
+    l2gdof(gdof_ic) = ggdof_ic(1,:)
+    l2gdof(gdof_oc) = ggdof_oc(1,:)
+    l2gdof(gdof_cm) = ggdof_cm(1,:)
+    l2gdof(gdof_trinf) = ggdof_trinf(1,:)
+    l2gdof(gdof_inf) = ggdof_inf(1,:)
 
     l2gdof = l2gdof-1 ! PETSC uses 0 indexing
 
+    !debug
     if (myrank == 0) write(*,'(a,1x,i0,1x,i0)')'  l2gdof range:',minval(l2gdof(1:)),maxval(l2gdof(1:))
-    call sync_all
+    call sync_all()
+
     if (minval(l2gdof(1:)) < 1) then
       write(*,*) 'ERROR: local-to-global indices are less than 1!'
-      stop
+      stop 'Error local-to-global indices are less than 1'
     endif
   endif !if (SOLVER_5GLL) then
+
+  !debug
   if (myrank == 0) write(*,'(a)')'--------------------------------------------------'
 
   return
diff --git a/src/specfem3D/prepare_gravity.f90 b/src/specfem3D/prepare_gravity.f90
index 990ae20ba..0da494ae9 100644
--- a/src/specfem3D/prepare_gravity.f90
+++ b/src/specfem3D/prepare_gravity.f90
@@ -62,6 +62,7 @@ subroutine prepare_gravity()
   ! minimum radius in inner core (to avoid zero radius)
   double precision, parameter :: MINIMUM_RADIUS_INNER_CORE = 100.d0 ! in m
 
+  ! user output
   if (myrank == 0) then
     write(IMAIN,*) "preparing gravity arrays"
     call flush_IMAIN()

From f9a76d6100c5d08e7cbef563936eb0a9f9e9bd5a Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 12:54:41 +0200
Subject: [PATCH 09/11] updates make rules (putting all linker flags into one
 main MPILIBS)

---
 Makefile.in                                   |  4 ++++
 src/auxiliaries/rules.mk                      | 24 +++++++++----------
 src/gindex3D/rules.mk                         | 16 ++++++++++---
 src/meshfem3D/rules.mk                        |  2 +-
 src/specfem3D/rules.mk                        |  2 +-
 .../postprocess_sensitivity_kernels/rules.mk  | 20 ++++++++--------
 src/tomography/rules.mk                       | 14 +++++------
 tests/meshfem3D/test_models.makefile          |  2 +-
 tests/meshfem3D/test_save.makefile            |  2 +-
 tests/specfem3D/test_locate.makefile          |  2 +-
 tests/specfem3D/test_read.makefile            |  2 +-
 11 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 3e46eb730..734c2e42b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -54,6 +54,10 @@ CXXFLAGS = -I${SETUP} @CXXFLAGS@
 
 FCLINK = $(MPIFCCOMPILE_CHECK)
 
+# all linker flags
+LDFLAGS = @LDFLAGS@
+MPILIBS += $(LDFLAGS) @LIBS@
+
 #######################################
 ####
 #### MPI
diff --git a/src/auxiliaries/rules.mk b/src/auxiliaries/rules.mk
index 580c552fc..0cbdb1150 100644
--- a/src/auxiliaries/rules.mk
+++ b/src/auxiliaries/rules.mk
@@ -216,7 +216,7 @@ xcombine_paraview_strain_data_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_paraview_strain_data: $(xcombine_paraview_strain_data_OBJECTS) $(xcombine_paraview_strain_data_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_paraview_strain_data.auxsolver.o: $O/specfem3D_par.solverstatic_module.o
@@ -240,7 +240,7 @@ xcombine_surf_data_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_surf_data: $(xcombine_surf_data_OBJECTS) $(xcombine_surf_data_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_surf_data.auxsolver.o: $O/specfem3D_par.solverstatic_module.o
@@ -267,7 +267,7 @@ xcombine_surf_data_vtk_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_surf_data_vtk: $(xcombine_surf_data_vtk_OBJECTS) $(xcombine_surf_data_vtk_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_surf_data.auxsolver_vtk.o: $O/specfem3D_par.solverstatic_module.o
@@ -282,7 +282,7 @@ xcombine_surf_data_vtu_OBJECTS = \
 xcombine_surf_data_vtu_SHARED_OBJECTS = $(xcombine_surf_data_vtk_SHARED_OBJECTS)
 
 ${E}/xcombine_surf_data_vtu: $(xcombine_surf_data_vtu_OBJECTS) $(xcombine_surf_data_vtu_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_surf_data.auxsolver_vtu.o: $O/specfem3D_par.solverstatic_module.o
@@ -317,7 +317,7 @@ xcombine_vol_data_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_vol_data: $(xcombine_vol_data_OBJECTS) $(xcombine_vol_data_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxsolver.o: $O/specfem3D_par.solverstatic_module.o
@@ -360,7 +360,7 @@ xcombine_vol_data_adios_SHARED_OBJECTS = \
 $O/combine_vol_data.auxadios.o: $O/combine_vol_data_adios_impl.auxadios.o
 
 ${E}/xcombine_vol_data_adios: $(xcombine_vol_data_adios_OBJECTS) $(xcombine_vol_data_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxadios.o: $O/specfem3D_par.solverstatic_module.o
@@ -396,7 +396,7 @@ xcombine_vol_data_vtk_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_vol_data_vtk: $(xcombine_vol_data_vtk_OBJECTS) $(xcombine_vol_data_vtk_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxsolver_vtk.o: $O/specfem3D_par.solverstatic_module.o
@@ -438,7 +438,7 @@ xcombine_vol_data_vtk_adios_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_vol_data_vtk_adios: $(xcombine_vol_data_vtk_adios_OBJECTS) $(xcombine_vol_data_vtk_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxadios_vtk.o: $O/combine_vol_data_adios_impl.auxadios.o
@@ -454,7 +454,7 @@ xcombine_vol_data_vtu_OBJECTS = \
 xcombine_vol_data_vtu_SHARED_OBJECTS = $(xcombine_vol_data_vtk_SHARED_OBJECTS)
 
 ${E}/xcombine_vol_data_vtu: $(xcombine_vol_data_vtu_OBJECTS) $(xcombine_vol_data_vtu_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxsolver_vtu.o: $O/specfem3D_par.solverstatic_module.o
@@ -469,7 +469,7 @@ xcombine_vol_data_vtu_adios_OBJECTS = \
 xcombine_vol_data_vtu_adios_SHARED_OBJECTS = $(xcombine_vol_data_vtk_adios_SHARED_OBJECTS)
 
 ${E}/xcombine_vol_data_vtu_adios: $(xcombine_vol_data_vtu_adios_OBJECTS) $(xcombine_vol_data_vtu_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/combine_vol_data.auxadios_vtu.o: $O/combine_vol_data_adios_impl.auxadios.o
@@ -561,7 +561,7 @@ xextract_database_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xextract_database: $(xextract_database_OBJECTS) $(xextract_database_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ### additional dependencies
 $O/extract_database.aux.o: $O/specfem3D_par.solverstatic_module.o
@@ -704,7 +704,7 @@ endif
 
 
 ${E}/xwrite_profile: $(xwrite_profile_OBJECTS) $(xwrite_profile_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ## additional module dependencies
 $O/write_profile.aux.o: $O/meshfem3D_models.check.o
diff --git a/src/gindex3D/rules.mk b/src/gindex3D/rules.mk
index f2de77d91..778ae6f35 100644
--- a/src/gindex3D/rules.mk
+++ b/src/gindex3D/rules.mk
@@ -32,9 +32,9 @@ gindex3D_TARGETS = \
 	$(EMPTY_MACRO)
 
 gindex3D_OBJECTS = \
+	$O/create_gindex.gindex.o \
 	$O/gindex3D.gindex.o \
 	$O/gindex3D_par.gindex_module.o \
-	$O/create_gindex.gindex.o \
 	$O/initialize_gindex.gindex.o \
 	$(EMPTY_MACRO)
 
@@ -44,18 +44,18 @@ gindex3D_MODULES = \
 
 # These files come from the specfem3D directory
 gindex3D_SHARED_OBJECTS = \
-	$O/specfem3D_par.solverstatic_module.o \
+	$O/asdf_data.solverstatic_module.o \
 	$O/bcast_mesh_databases.solverstatic.o \
 	$O/locate_regular_points.solverstatic.o \
 	$O/read_arrays_solver.solverstatic.o \
 	$O/read_mesh_parameters.solverstatic.o \
 	$O/read_mesh_databases.solverstatic.o \
+	$O/specfem3D_par.solverstatic_module.o \
 	$(EMPTY_MACRO)
 
 # These files come from the shared directory
 gindex3D_SHARED_OBJECTS += \
 	$O/adios_manager.shared_adios_module.o \
-	$O/shared_par.shared_module.o \
 	$O/auto_ner.shared.o \
 	$O/broadcast_computed_parameters.shared.o \
 	$O/count_elements.shared.o \
@@ -79,6 +79,7 @@ gindex3D_SHARED_OBJECTS += \
 	$O/rotate_tensor.shared.o \
 	$O/rthetaphi_xyz.shared.o \
 	$O/save_header_file.shared.o \
+	$O/shared_par.shared_module.o \
 	$O/ylm.shared.o \
 	$(EMPTY_MACRO)
 
@@ -124,7 +125,16 @@ else ifeq ($(NETCDF),yes)
 	gindex3D_SHARED_OBJECTS += $O/read_write_netcdf.checknetcdf.o
 endif
 
+###
+### VTK
+###
 
+# conditional vtk linking
+ifeq ($(VTK),yes)
+	gindex3D_SHARED_OBJECTS += $(vtk_specfem3D_OBJECTS)
+else
+	gindex3D_SHARED_OBJECTS += $(vtk_specfem3D_STUBS)
+endif
 
 #######################################
 
diff --git a/src/meshfem3D/rules.mk b/src/meshfem3D/rules.mk
index 2b38a222a..ed0bb90eb 100644
--- a/src/meshfem3D/rules.mk
+++ b/src/meshfem3D/rules.mk
@@ -314,7 +314,7 @@ endif
 ####
 
 ${E}/xmeshfem3D: $(meshfem3D_SHARED_OBJECTS) $(meshfem3D_OBJECTS)
-	${FCLINK} -o $@ $+ $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${FCLINK} -o $@ $+ $(MPILIBS)
 
 #######################################
 
diff --git a/src/specfem3D/rules.mk b/src/specfem3D/rules.mk
index ebd2dbb53..20b3c690a 100644
--- a/src/specfem3D/rules.mk
+++ b/src/specfem3D/rules.mk
@@ -338,7 +338,7 @@ endif
 #### rules for executables
 ####
 
-SPECFEM_LINK_FLAGS = $(LDFLAGS) $(MPILIBS) $(LIBS)
+SPECFEM_LINK_FLAGS = $(MPILIBS)
 
 # cuda/opencl/hip
 SPECFEM_LINK_FLAGS += $(GPU_LINK)
diff --git a/src/tomography/postprocess_sensitivity_kernels/rules.mk b/src/tomography/postprocess_sensitivity_kernels/rules.mk
index 4563b4d9e..ba02fe553 100644
--- a/src/tomography/postprocess_sensitivity_kernels/rules.mk
+++ b/src/tomography/postprocess_sensitivity_kernels/rules.mk
@@ -141,7 +141,7 @@ xconvert_model_file_adios_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xconvert_model_file_adios: $(xconvert_model_file_adios_OBJECTS) $(xconvert_model_file_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ##
 ## xaddition_sem
@@ -186,7 +186,7 @@ xclip_sem_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xclip_sem: $(xclip_sem_OBJECTS) $(xclip_sem_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -210,7 +210,7 @@ xcombine_sem_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xcombine_sem: $(xcombine_sem_OBJECTS) $(xcombine_sem_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -266,7 +266,7 @@ $O/interpolate_model.postprocess.o: $O/search_kdtree.shared.o
 
 
 ${E}/xinterpolate_model: $(xinterpolate_model_OBJECTS) $(xinterpolate_model_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ##
 ## xinterpolate_model_adios
@@ -292,7 +292,7 @@ xinterpolate_model_adios_SHARED_OBJECTS += \
 $O/interpolate_model.postprocess_adios.o: $O/search_kdtree.shared.o $O/adios_manager.shared_adios_module.o
 
 ${E}/xinterpolate_model_adios: $(xinterpolate_model_adios_OBJECTS) $(xinterpolate_model_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -407,7 +407,7 @@ gpu_smooth_OBJECTS += $(cuda_smooth_DEVICE_OBJ) $(cuda_smooth_kernels_OBJS)
 ## compilation
 ##
 xsmooth_sem_SHARED_OBJECTS += $(gpu_xs_OBJECTS)
-xsmooth_sem_LIBS = $(LDFLAGS) $(MPILIBS)  # $(LDFLAGS) $(MPILIBS) $(LIBS)
+xsmooth_sem_LIBS = $(MPILIBS)  # $(LDFLAGS) $(MPILIBS) $(LIBS)
 xsmooth_sem_LIBS += $(GPU_LINK)
 
 INFO_SMOOTH="building xsmooth_sem $(BUILD_VERSION_TXT)"
@@ -450,7 +450,7 @@ xsmooth_sem_adios_SHARED_OBJECTS += \
 ##
 ## compilation
 ##
-xsmooth_sem_adios_LIBS = $(LDFLAGS) $(MPILIBS)  # $(LDFLAGS) $(MPILIBS) $(LIBS)
+xsmooth_sem_adios_LIBS = $(MPILIBS)  # $(LDFLAGS) $(MPILIBS) $(LIBS)
 xsmooth_sem_adios_LIBS += $(GPU_LINK)
 
 INFO_SMOOTH_ADIOS="building xsmooth_sem_adios $(BUILD_VERSION_TXT)"
@@ -504,7 +504,7 @@ xsmooth_laplacian_sem_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xsmooth_laplacian_sem: $(xsmooth_laplacian_sem_OBJECTS) $(xsmooth_laplacian_sem_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -529,7 +529,7 @@ xsmooth_laplacian_sem_adios_SHARED_OBJECTS += \
 	$(EMPTY_MACRO)
 
 ${E}/xsmooth_laplacian_sem_adios: $(xsmooth_laplacian_sem_adios_OBJECTS) $(xsmooth_laplacian_sem_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -583,7 +583,7 @@ xcreate_cross_section_SHARED_OBJECTS = \
 $O/create_cross_section.postprocess.o: $O/search_kdtree.shared.o
 
 ${E}/xcreate_cross_section: $(xcreate_cross_section_OBJECTS) $(xcreate_cross_section_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 #######################################
diff --git a/src/tomography/rules.mk b/src/tomography/rules.mk
index ddb2974e8..e6eb85b2b 100644
--- a/src/tomography/rules.mk
+++ b/src/tomography/rules.mk
@@ -142,7 +142,7 @@ xadd_model_iso_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xadd_model_iso: $(xadd_model_iso_OBJECTS) $(xadd_model_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -154,7 +154,7 @@ xadd_model_tiso_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xadd_model_tiso: $(xadd_model_tiso_OBJECTS) $(xadd_model_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ##
 ## xadd_model_tiso_cg
@@ -165,7 +165,7 @@ xadd_model_tiso_cg_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xadd_model_tiso_cg: $(xadd_model_tiso_cg_OBJECTS) $(xadd_model_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -177,7 +177,7 @@ xadd_model_tiso_iso_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xadd_model_tiso_iso: $(xadd_model_tiso_iso_OBJECTS) $(xadd_model_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -201,7 +201,7 @@ xsum_kernels_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xsum_kernels: $(xsum_kernels_OBJECTS) $(xsum_kernels_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 ##
 ## xsum_kernels_adios
@@ -225,7 +225,7 @@ xsum_kernels_adios_SHARED_OBJECTS = \
 $O/sum_kernels.tomo_adios.o: $O/adios_manager.shared_adios_module.o
 
 ${E}/xsum_kernels_adios: $(xsum_kernels_adios_OBJECTS) $(xsum_kernels_adios_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 
 ##
@@ -249,7 +249,7 @@ xsum_preconditioned_kernels_SHARED_OBJECTS = \
 	$(EMPTY_MACRO)
 
 ${E}/xsum_preconditioned_kernels: $(xsum_preconditioned_kernels_OBJECTS) $(xsum_preconditioned_kernels_SHARED_OBJECTS)
-	${MPIFCCOMPILE_CHECK} -o $@ $+ $(LDFLAGS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o $@ $+ $(MPILIBS)
 
 #######################################
 
diff --git a/tests/meshfem3D/test_models.makefile b/tests/meshfem3D/test_models.makefile
index e91367797..561f92f15 100644
--- a/tests/meshfem3D/test_models.makefile
+++ b/tests/meshfem3D/test_models.makefile
@@ -32,5 +32,5 @@ OBJECTS = \
 	$(EMPTY_MACRO)
 
 test_models:
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_models test_models.f90 -I./obj $(OBJECTS) $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_models test_models.f90 -I./obj $(OBJECTS) $(MPILIBS)
 
diff --git a/tests/meshfem3D/test_save.makefile b/tests/meshfem3D/test_save.makefile
index d8d16193f..2ddb30c99 100644
--- a/tests/meshfem3D/test_save.makefile
+++ b/tests/meshfem3D/test_save.makefile
@@ -13,5 +13,5 @@ OBJECTS = \
 	$(EMPTY_MACRO)
 
 test_save:
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_save test_save.f90 -I./obj $(OBJECTS) $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_save test_save.f90 -I./obj $(OBJECTS) $(MPILIBS)
 
diff --git a/tests/specfem3D/test_locate.makefile b/tests/specfem3D/test_locate.makefile
index b2f7e95f3..81168f4b0 100644
--- a/tests/specfem3D/test_locate.makefile
+++ b/tests/specfem3D/test_locate.makefile
@@ -13,5 +13,5 @@ OBJECTS = \
 	$(EMPTY_MACRO)
 
 test_locate:
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_locate test_locate.f90 -I./obj $(OBJECTS) $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_locate test_locate.f90 -I./obj $(OBJECTS) $(MPILIBS)
 
diff --git a/tests/specfem3D/test_read.makefile b/tests/specfem3D/test_read.makefile
index 8e3818af7..43de9eeff 100644
--- a/tests/specfem3D/test_read.makefile
+++ b/tests/specfem3D/test_read.makefile
@@ -16,5 +16,5 @@ OBJECTS = \
 OBJECTS += $(specfem3D_SHARED_OBJECTS)
 
 test_read:
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_read test_read.f90 -I./obj $(OBJECTS) $(LDFLAGS) $(MPILIBS) $(LIBS)
+	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -o ./bin/test_read test_read.f90 -I./obj $(OBJECTS) $(MPILIBS)
 

From 833a4f178ba1404a411bad2cf7f44d49e493a458 Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 21:12:07 +0200
Subject: [PATCH 10/11] updates gindex3D array de/allocations

---
 src/gindex3D/create_gindex.f90        | 265 +++++++++++++++++++-------
 src/gindex3D/gindex3D_par.f90         |   2 +-
 src/gindex3D/initialize_gindex.f90    |  44 ++++-
 src/specfem3D/SIEM_prepare_solver.F90 |  16 ++
 src/specfem3D/SIEM_solver_mpi.F90     |  32 ++--
 src/specfem3D/SIEM_solver_petsc.F90   |  23 +--
 6 files changed, 279 insertions(+), 103 deletions(-)

diff --git a/src/gindex3D/create_gindex.f90 b/src/gindex3D/create_gindex.f90
index 49f3988ec..8528fd15d 100644
--- a/src/gindex3D/create_gindex.f90
+++ b/src/gindex3D/create_gindex.f90
@@ -39,12 +39,26 @@ subroutine create_gindex()
   gnf_end = 0    ! global gdof for NGLLX = 5
   gnf_end1 = 0   ! global gdof for NGLLX_INF = 3
 
+  !debug
+  print *,'Number of solver processes: ',NPROCTOT
+
+  ! folder for temporary files created by gindex3D
+   if (myrank == 0) then
+    write(IMAIN,*) 'creating temporary directory: tmp_gindex3D/'
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+  call execute_command_line('mkdir -p tmp_gindex3D/')
+
   ! loop through the processors
-  do i_proc = 0,nproc-1
+  do i_proc = 0,NPROCTOT-1
     ! creates global DOFs for this process
     call create_gindex_for_process(i_proc)
   enddo
 
+  !debug
+  print *,'all done'
+
   ! closes the main output file
   if (myrank == 0) then
     write(IMAIN,*)
@@ -112,7 +126,7 @@ subroutine create_gindex_for_process(i_proc)
   integer,allocatable :: gghost(:,:),ighost(:)
   character(len = 20) :: fhead
   character(len = 12) :: spm,spn
-  character(len = 60) :: fname
+  character(len = 128) :: fname
 
   integer,allocatable :: nmir(:)
   integer,allocatable :: gdf_ic1(:,:),gdf_oc1(:,:),gdf_cm1(:,:),gdf_trinf1(:,:), &
@@ -142,6 +156,9 @@ subroutine create_gindex_for_process(i_proc)
   ! set new process id
   myrank = i_proc
 
+  !debug
+  print *,'Process: ',i_proc
+
   ! starts reading the databases
   call read_mesh_databases()
 
@@ -149,44 +166,130 @@ subroutine create_gindex_for_process(i_proc)
   myrank = myrank_org
 
   !deallocate unnecessary arrays
+  ! inner core
   if (allocated(rmassz_inner_core)) then
     deallocate(rmassz_inner_core)
+    if (ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION_VAL) then
+      deallocate(rmassx_inner_core,rmassy_inner_core)
+    else
+      nullify(rmassx_inner_core,rmassy_inner_core)
+    endif
+    deallocate(xstore_inner_core,ystore_inner_core,zstore_inner_core)
+    deallocate(xix_inner_core,xiy_inner_core,xiz_inner_core, &
+               etax_inner_core,etay_inner_core,etaz_inner_core, &
+               gammax_inner_core,gammay_inner_core,gammaz_inner_core)
+    deallocate(rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core)
+    deallocate(c11store_inner_core,c33store_inner_core,c12store_inner_core, &
+               c13store_inner_core,c44store_inner_core)
+
     deallocate(phase_ispec_inner_inner_core)
     deallocate(num_elem_colors_inner_core)
     deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
                request_send_vector_ic,request_recv_vector_ic)
   endif
 
+  ! outer core
   if (allocated(rmass_outer_core)) then
     deallocate(rmass_outer_core)
+    deallocate(xstore_outer_core,ystore_outer_core,zstore_outer_core)
+    deallocate(xix_outer_core,xiy_outer_core,xiz_outer_core, &
+               etax_outer_core,etay_outer_core,etaz_outer_core, &
+               gammax_outer_core,gammay_outer_core,gammaz_outer_core)
+    deallocate(rhostore_outer_core,kappavstore_outer_core)
+    deallocate(vp_outer_core)
+
     deallocate(phase_ispec_inner_outer_core)
     deallocate(num_elem_colors_outer_core)
     deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
                request_send_scalar_oc,request_recv_scalar_oc)
   endif
 
+  ! crust/mantle
   if (allocated(rmassz_crust_mantle)) then
     deallocate(rmassz_crust_mantle)
+    if ((NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) .or. &
+        (ROTATION_VAL .and. EXACT_MASS_MATRIX_FOR_ROTATION_VAL)) then
+      deallocate(rmassx_crust_mantle,rmassy_crust_mantle)
+    else
+      nullify(rmassx_crust_mantle,rmassy_crust_mantle)
+    endif
+    deallocate(rmass_ocean_load)
+    deallocate(xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle)
+    deallocate(xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+               etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+               gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle)
+    deallocate(rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle)
+    deallocate(kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle)
+    deallocate(c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+               c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+               c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+               c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+               c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+               c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+               c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle)
+    if (allocated(mu0store_crust_mantle)) deallocate(mu0store_crust_mantle)
+    deallocate(ispec_is_tiso_crust_mantle)
+    deallocate(rho_vp_crust_mantle,rho_vs_crust_mantle)
+
     deallocate(phase_ispec_inner_crust_mantle)
     deallocate(num_elem_colors_crust_mantle)
     deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
                request_send_vector_cm,request_recv_vector_cm)
   endif
 
-  if (allocated(phase_ispec_inner_trinfinite)) then
+  ! trinfinite
+  if (allocated(xstore_trinfinite)) then
+    deallocate(xstore_trinfinite,ystore_trinfinite,zstore_trinfinite)
+    deallocate(xix_trinfinite,xiy_trinfinite,xiz_trinfinite, &
+               etax_trinfinite,etay_trinfinite,etaz_trinfinite, &
+               gammax_trinfinite,gammay_trinfinite,gammaz_trinfinite)
+
     deallocate(phase_ispec_inner_trinfinite)
     deallocate(num_elem_colors_trinfinite)
     deallocate(buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite, &
                request_send_scalar_trinfinite,request_recv_scalar_trinfinite)
   endif
 
-  if (allocated(phase_ispec_inner_infinite)) then
+  ! infinite
+  if (allocated(xstore_infinite)) then
+    deallocate(xstore_infinite,ystore_infinite,zstore_infinite)
+    deallocate(xix_infinite,xiy_infinite,xiz_infinite, &
+               etax_infinite,etay_infinite,etaz_infinite, &
+               gammax_infinite,gammay_infinite,gammaz_infinite)
+  
     deallocate(phase_ispec_inner_infinite)
     deallocate(num_elem_colors_infinite)
     deallocate(buffer_send_scalar_infinite,buffer_recv_scalar_infinite, &
                request_send_scalar_infinite,request_recv_scalar_infinite)
   endif
 
+  ! coupling
+  if (allocated(ibelm_moho_top)) then
+    deallocate(ibelm_moho_top,ibelm_moho_bot, &
+               ibelm_400_top,ibelm_400_bot, &
+               ibelm_670_top,ibelm_670_bot, &
+               normal_moho,normal_400,normal_670)
+    deallocate(ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
+               ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
+               normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
+               normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
+               normal_bottom_crust_mantle,normal_top_crust_mantle, &
+               jacobian2D_bottom_crust_mantle,jacobian2D_top_crust_mantle, &
+               jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
+               jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle)
+    deallocate(ibelm_xmin_outer_core,ibelm_xmax_outer_core, &
+               ibelm_ymin_outer_core,ibelm_ymax_outer_core, &
+               normal_xmin_outer_core,normal_xmax_outer_core, &
+               normal_ymin_outer_core,normal_ymax_outer_core, &
+               normal_bottom_outer_core,normal_top_outer_core, &
+               jacobian2D_bottom_outer_core,jacobian2D_top_outer_core, &
+               jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core, &
+               jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core)
+    deallocate(ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
+               ibelm_ymin_inner_core,ibelm_ymax_inner_core, &
+               ibelm_bottom_inner_core)
+  endif
+
   !debug
   !print *,'Process:',i_proc,' neighbors:',num_interfaces_inner_core
 
@@ -209,20 +312,30 @@ subroutine create_gindex_for_process(i_proc)
   ignode_inf(:) = -1;   isgnode_inf(:) = .false.
 
   ! allocate necessary arrays
+  if (.not. allocated(inode_elmt_cm)) then
+    allocate(inode_elmt_cm(NGLLCUBE,NSPEC_CRUST_MANTLE))
+    allocate(inode_elmt_cm1(NGLLCUBE_INF,NSPEC_CRUST_MANTLE))
+  endif
+  if (.not. allocated(inode_elmt_ic)) then
+    allocate(inode_elmt_ic(NGLLCUBE,NSPEC_INNER_CORE))
+    allocate(inode_elmt_ic1(NGLLCUBE_INF,NSPEC_INNER_CORE))
+  endif
+  if (.not. allocated(inode_elmt_oc)) then
+    allocate(inode_elmt_oc(NGLLCUBE,NSPEC_OUTER_CORE))
+    allocate(inode_elmt_oc1(NGLLCUBE_INF,NSPEC_OUTER_CORE))
+  endif
   ! trinfinite arrays
   if (ADD_TRINF) then
-    allocate(ibool_trinfinite(NGLLX,NGLLY,NGLLZ,NSPEC_TRINFINITE))
-    allocate(ibelm_bottom_trinfinite(NSPEC2D_BOTTOM_TRINF))
-    allocate(ibelm_top_trinfinite(NSPEC2D_TOP_TRINF))
-    allocate(inode_elmt_trinf(NGLLCUBE,NSPEC_TRINFINITE))
-    allocate(inode_elmt_trinf1(NGLLCUBE_INF,NSPEC_TRINFINITE))
+    if (.not. allocated(inode_elmt_trinf)) then
+      allocate(inode_elmt_trinf(NGLLCUBE,NSPEC_TRINFINITE))
+      allocate(inode_elmt_trinf1(NGLLCUBE_INF,NSPEC_TRINFINITE))
+    endif
   endif
   ! infinite arrays
-  allocate(ibool_infinite(NGLLX,NGLLY,NGLLZ,NSPEC_INFINITE))
-  allocate(ibelm_bottom_infinite(NSPEC2D_BOTTOM_INF))
-  allocate(ibelm_top_infinite(NSPEC2D_TOP_INF))
-  allocate(inode_elmt_inf(NGLLCUBE,NSPEC_INFINITE))
-  allocate(inode_elmt_inf1(NGLLCUBE_INF,NSPEC_INFINITE))
+  if (.not. allocated(inode_elmt_inf)) then
+    allocate(inode_elmt_inf(NGLLCUBE,NSPEC_INFINITE))
+    allocate(inode_elmt_inf1(NGLLCUBE_INF,NSPEC_INFINITE))
+  endif
 
   ! count global node numbers
   nnode = NGLOB_INNER_CORE + NGLOB_OUTER_CORE + NGLOB_CRUST_MANTLE + NGLOB_TRINFINITE + NGLOB_INFINITE
@@ -491,7 +604,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -512,7 +625,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -533,7 +646,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -555,7 +668,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc < i_proc) then
         write(spn,*) j_proc
-        fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
         !print *,fname
         open(10,file=fname,action='read',status='old')
         read(10,*) nibool
@@ -577,7 +690,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -793,7 +906,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname !,i_proc,j_proc
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_inner_core(i)
@@ -813,7 +926,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_outer_core(i)
@@ -833,7 +946,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_crust_mantle(i)
@@ -854,7 +967,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc > i_proc) then
         write(spn,*) j_proc
-        fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
         !print *,fname
         open(10,file=fname,action='write',status='replace')
         write(10,*) nibool_interfaces_trinfinite(i)
@@ -875,7 +988,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_infinite(i)
@@ -964,7 +1077,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -985,7 +1098,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -1006,7 +1119,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -1028,7 +1141,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc < i_proc) then
         write(spn,*) j_proc
-        fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
         !print *,fname
         open(10,file=fname,action='read',status='old')
         read(10,*) nibool
@@ -1050,7 +1163,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc < i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       !print *,fname
       open(10,file=fname,action='read',status='old')
       read(10,*) nibool
@@ -1096,7 +1209,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname !,i_proc,j_proc
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_inner_core(i)
@@ -1116,7 +1229,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*)nibool_interfaces_outer_core(i)
@@ -1136,7 +1249,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_crust_mantle(i)
@@ -1157,7 +1270,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc > i_proc) then
         write(spn,*) j_proc
-        fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
         !print *,fname
         open(10,file=fname,action='write',status='replace')
         write(10,*) nibool_interfaces_trinfinite(i)
@@ -1178,7 +1291,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc > i_proc) then
       write(spn,*) j_proc
-      fname = 'tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname = 'tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*) nibool_interfaces_infinite(i)
@@ -1453,9 +1566,9 @@ subroutine create_gindex_for_process(i_proc)
 
   ! transtion infinite
   if (ADD_TRINF) then
-  do i_elmt = 1,NSPEC_TRINFINITE
-    inode_elmt_trinf1(:,i_elmt) = nmir_trinf(inode_elmt_trinf(igll_on,i_elmt))
-  enddo
+    do i_elmt = 1,NSPEC_TRINFINITE
+      inode_elmt_trinf1(:,i_elmt) = nmir_trinf(inode_elmt_trinf(igll_on,i_elmt))
+    enddo
   endif
 
   ! infinite
@@ -1714,7 +1827,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc < i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       open(10,file=fname,action='read',status='old')
       read(10,*)nibool
       allocate(gghost(NNDOF,nibool))
@@ -1736,7 +1849,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc < i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       open(10,file=fname,action='read',status='old')
       read(10,*)nibool
       allocate(gghost(NNDOF,nibool))
@@ -1758,7 +1871,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc < i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       open(10,file=fname,action='read',status='old')
       read(10,*)nibool
       allocate(gghost(NNDOF,nibool))
@@ -1781,7 +1894,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc < i_proc) then
         write(spn,*)j_proc
-        fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+        fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
         open(10,file=fname,action='read',status='old')
         read(10,*)nibool
         allocate(gghost(NNDOF,nibool))
@@ -1804,7 +1917,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc < i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spn))//'to'//trim(adjustl(spm))
       open(10,file=fname,action='read',status='old')
       read(10,*)nibool
       allocate(gghost(NNDOF,nibool))
@@ -1851,7 +1964,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_inner_core(i)
     if (j_proc > i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       open(10,file=fname,action='write',status='replace')
       write(10,*)nibool_interfaces_inner_core1(i)
       allocate(tmpmat(NNDOF,nibool_interfaces_inner_core1(i)))
@@ -1870,7 +1983,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_outer_core(i)
     if (j_proc > i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       open(10,file=fname,action='write',status='replace')
       write(10,*)nibool_interfaces_outer_core1(i)
       allocate(tmpmat(NNDOF,nibool_interfaces_outer_core1(i)))
@@ -1889,7 +2002,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_crust_mantle(i)
     if (j_proc > i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       open(10,file=fname,action='write',status='replace')
       write(10,*)nibool_interfaces_crust_mantle1(i)
       allocate(tmpmat(NNDOF,nibool_interfaces_crust_mantle1(i)))
@@ -1910,7 +2023,7 @@ subroutine create_gindex_for_process(i_proc)
       j_proc = my_neighbors_trinfinite(i)
       if (j_proc > i_proc) then
         write(spn,*)j_proc
-        fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+        fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
         !print *,fname
         open(10,file=fname,action='write',status='replace')
         write(10,*)nibool_interfaces_trinfinite1(i)
@@ -1932,7 +2045,7 @@ subroutine create_gindex_for_process(i_proc)
     j_proc = my_neighbors_infinite(i)
     if (j_proc > i_proc) then
       write(spn,*)j_proc
-      fname='tmp/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
+      fname='tmp_gindex3D/'//trim(fhead)//trim(adjustl(spm))//'to'//trim(adjustl(spn))
       !print *,fname
       open(10,file=fname,action='write',status='replace')
       write(10,*)nibool_interfaces_infinite1(i)
@@ -1947,6 +2060,7 @@ subroutine create_gindex_for_process(i_proc)
 
   gnf_end1 = maxval(gnf1)
   print *,'Largest gnf1 ID:',gnf_end1
+
   write(spm,*)i_proc
 
   fname='DATABASES_MPI/gdof1_proc'//trim(adjustl(spm))
@@ -1963,49 +2077,54 @@ subroutine create_gindex_for_process(i_proc)
   write(10,*)gdf_inf1
   close(10)
 
+  ! debug
+  print *,'done'
   print *,'*********************************************************'
 
   ! deallocate variables
   ! for NGLL=5
   deallocate(gnf,isgnf)
   deallocate(gdf_ic,gdf_oc,gdf_cm,gdf_trinf,gdf_inf)
-  !  deallocate(rmass_inner_core)
+
+  ! deallocate arrays before re-allocating in read_mesh_databases()
+  ! inner core
   deallocate(my_neighbors_inner_core,nibool_interfaces_inner_core)
   deallocate(ibool_interfaces_inner_core)
-  !  deallocate(phase_ispec_inner_inner_core)
-  !  deallocate(num_elem_colors_inner_core)
-  !  deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
-  !  request_send_vector_inner_core,request_recv_vector_inner_core)
+  deallocate(ibool_inner_core)
+  deallocate(idoubling_inner_core)
 
-  !  deallocate(rmass_outer_core)
+  ! outer core
   deallocate(my_neighbors_outer_core,nibool_interfaces_outer_core)
   deallocate(ibool_interfaces_outer_core)
-  !  deallocate(phase_ispec_inner_outer_core)
-  !  deallocate(num_elem_colors_outer_core)
-  !  deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
-  !  request_send_scalar_outer_core,request_recv_scalar_outer_core)
+  deallocate(ibool_outer_core)
 
-  !  deallocate(rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle)
+  ! crust/mantle
   deallocate(my_neighbors_crust_mantle,nibool_interfaces_crust_mantle)
   deallocate(ibool_interfaces_crust_mantle)
-  !  deallocate(phase_ispec_inner_crust_mantle)
-  !  deallocate(num_elem_colors_crust_mantle)
-  !  deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
-  !  request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
-
-  deallocate(my_neighbors_trinfinite,nibool_interfaces_trinfinite)
-  deallocate(ibool_interfaces_trinfinite)
-  !  deallocate(phase_ispec_inner_trinfinite)
-  !  deallocate(num_elem_colors_trinfinite)
-  !  deallocate(buffer_send_scalar_trinfinite,buffer_recv_scalar_trinfinite, &
-  !  request_send_scalar_trinfinite,request_recv_scalar_trinfinite)
+  deallocate(ibool_crust_mantle)
+
+  ! trinfinite
+  if (allocated(ibool_trinfinite)) then
+    deallocate(my_neighbors_trinfinite,nibool_interfaces_trinfinite)
+    deallocate(ibool_interfaces_trinfinite)
+    deallocate(ibool_trinfinite)
+    deallocate(ibelm_bottom_trinfinite,ibelm_top_trinfinite)
+    deallocate(ibelm_xmin_trinfinite,ibelm_xmax_trinfinite)
+    deallocate(ibelm_ymin_trinfinite,ibelm_ymax_trinfinite)
+  endif
 
+  ! infinite
   deallocate(my_neighbors_infinite,nibool_interfaces_infinite)
   deallocate(ibool_interfaces_infinite)
-  !  deallocate(phase_ispec_inner_infinite)
-  !  deallocate(num_elem_colors_infinite)
-  !  deallocate(buffer_send_scalar_infinite,buffer_recv_scalar_infinite, &
-  !  request_send_scalar_infinite,request_recv_scalar_infinite)
+  deallocate(ibool_infinite)
+  deallocate(ibelm_bottom_infinite,ibelm_top_infinite)
+  deallocate(ibelm_xmin_infinite,ibelm_xmax_infinite)
+  deallocate(ibelm_ymin_infinite,ibelm_ymax_infinite)
+
+  ! coupling
+  deallocate(ibelm_top_inner_core)
+  deallocate(ibelm_bottom_outer_core,ibelm_top_outer_core)
+  deallocate(ibelm_bottom_crust_mantle,ibelm_top_crust_mantle)
 
   ! for NGLL=3
   deallocate(gnf1,isgnf1)
diff --git a/src/gindex3D/gindex3D_par.f90 b/src/gindex3D/gindex3D_par.f90
index e6bd31ba6..3c064e15c 100644
--- a/src/gindex3D/gindex3D_par.f90
+++ b/src/gindex3D/gindex3D_par.f90
@@ -29,7 +29,7 @@ module gindex_par
 
   use constants, only: myrank,NGLLX,NGLLY,NGLLZ
 
-  use shared_parameters, only: NPROC
+  use shared_parameters, only: NPROCTOT
 
   use specfem_par
 
diff --git a/src/gindex3D/initialize_gindex.f90 b/src/gindex3D/initialize_gindex.f90
index 21f6aa42f..1c3c9a5b9 100644
--- a/src/gindex3D/initialize_gindex.f90
+++ b/src/gindex3D/initialize_gindex.f90
@@ -35,6 +35,7 @@ subroutine initialize_gindex()
 
   ! local parameters
   character(len = 20) :: snproc
+  integer :: number_of_solver_processes
   ! mpi
   integer :: sizeprocs
   integer :: ier
@@ -70,7 +71,7 @@ subroutine initialize_gindex()
   endif
 
   call get_command_argument(1,snproc)
-  read(snproc,*) nproc
+  read(snproc,*) number_of_solver_processes
 
   ! open main output file, only written to by process 0
   if (myrank == 0) then
@@ -94,12 +95,51 @@ subroutine initialize_gindex()
   !allocate(ndof_p2p(nproc,nproc))
   !ndof_p2p=0
 
+  if (myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'input number of solver processes: ',number_of_solver_processes
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+
   ! initializes simulation parameters
-  if (myrank == 0) write(IMAIN,'(a)',advance='no') '   initialising...'
+  !if (myrank == 0) write(IMAIN,'(a)',advance='no') '   initialising...'
 
   ! reads in Par_file and sets compute parameters
   call read_compute_parameters()
 
+  ! check if anything to do, full gravity flag must be set
+  if (.not. FULL_GRAVITY) then
+    print *,'Error: FULL_GRAVITY flag must be set to .true. in Par_file for xgindex3D!'
+    print *
+    print *,'Please edit your DATA/Par_file accordingly and re-run xgindex3D.'
+    print *
+    print *,'nothing to do, exiting...'
+    call exit_MPI(myrank,'FULL_GRAVITY flag must be turned on for xgindex3D')
+  endif
+
+  ! user output
+  if (myrank == 0) then
+    write(IMAIN,*) 'Par_file setting:'
+    write(IMAIN,*) '  NPROC_ETA / NPROC_XI = ',NPROC_ETA,'/',NPROC_XI
+    write(IMAIN,*) '  NCHUNKS              = ',NCHUNKS
+    write(IMAIN,*) '  NPROCTOT             = ',NPROCTOT
+    write(IMAIN,*)
+    call flush_IMAIN()
+  endif
+
+  ! check if desired number of solver processes matches Par_file setting
+  if (number_of_solver_processes /= NPROCTOT) then
+    print *,'Error: Input number of solver processes =',number_of_solver_processes,', must match Par_file setting!'
+    print *,'       Par_file requires ',NPROCTOT,' solver processes.'
+    print *
+    print *,'Please edit either your DATA/Par_file accordingly and re-run the mesher,'
+    print *,'or update your input processes and re-run xgindex3D.'
+    print *
+    print *,'exiting...'
+    call exit_MPI(myrank,'Invalid input number of solver processes for DATA/Par_file')
+  endif
+
   ! read the mesh parameters for all array setup
   call read_mesh_parameters()
 
diff --git a/src/specfem3D/SIEM_prepare_solver.F90 b/src/specfem3D/SIEM_prepare_solver.F90
index 7d2e1a453..48832382b 100644
--- a/src/specfem3D/SIEM_prepare_solver.F90
+++ b/src/specfem3D/SIEM_prepare_solver.F90
@@ -422,6 +422,22 @@ subroutine prepare_solver_poisson()
   ! indexify regions
   call get_index_region()
 
+  ! allocate inode arrays
+  allocate(inode_elmt_cm(NGLLCUBE,NSPEC_CRUST_MANTLE))
+  allocate(inode_elmt_cm1(NGLLCUBE_INF,NSPEC_CRUST_MANTLE))
+  allocate(inode_elmt_ic(NGLLCUBE,NSPEC_INNER_CORE))
+  allocate(inode_elmt_ic1(NGLLCUBE_INF,NSPEC_INNER_CORE))
+  allocate(inode_elmt_oc(NGLLCUBE,NSPEC_OUTER_CORE))
+  allocate(inode_elmt_oc1(NGLLCUBE_INF,NSPEC_OUTER_CORE))
+  ! trinfinite arrays
+  if (ADD_TRINF) then
+    allocate(inode_elmt_trinf(NGLLCUBE,NSPEC_TRINFINITE))
+    allocate(inode_elmt_trinf1(NGLLCUBE_INF,NSPEC_TRINFINITE))
+  endif
+  ! infinite arrays
+  allocate(inode_elmt_inf(NGLLCUBE,NSPEC_INFINITE))
+  allocate(inode_elmt_inf1(NGLLCUBE_INF,NSPEC_INFINITE))
+
   ! Level-1 solver-------------------
   allocate(storekmat_crust_mantle1(NGLLCUBE_INF,NGLLCUBE_INF,NSPEC_CRUST_MANTLE), &
            dprecon_crust_mantle1(nnode_cm1))
diff --git a/src/specfem3D/SIEM_solver_mpi.F90 b/src/specfem3D/SIEM_solver_mpi.F90
index 98c9bacd5..df7218afe 100644
--- a/src/specfem3D/SIEM_solver_mpi.F90
+++ b/src/specfem3D/SIEM_solver_mpi.F90
@@ -481,20 +481,20 @@ subroutine product_stiffness_vector(neq,p_g,kp)
   if (ADD_TRINF) then
   kp_trinf = zero
   do i_elmt = 1,NSPEC_TRINFINITE
-    inode_trinf=inode_elmt_trinf(:,i_elmt)
-    igdof_trinf=gdof_trinf(inode_trinf)
-    km_trinf=storekmat_trinfinite(:,:,i_elmt)
-    kp_trinf(inode_trinf)=kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
+    inode_trinf = inode_elmt_trinf(:,i_elmt)
+    igdof_trinf = gdof_trinf(inode_trinf)
+    km_trinf = storekmat_trinfinite(:,:,i_elmt)
+    kp_trinf(inode_trinf) = kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
   enddo
   endif
 
   ! infinite
   kp_inf = zero
   do i_elmt = 1,NSPEC_INFINITE
-    inode_inf=inode_elmt_inf(:,i_elmt)
-    igdof_inf=gdof_inf(inode_inf)
-    km_inf=storekmat_infinite(:,:,i_elmt)
-    kp_inf(inode_inf)=kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
+    inode_inf = inode_elmt_inf(:,i_elmt)
+    igdof_inf = gdof_inf(inode_inf)
+    km_inf = storekmat_infinite(:,:,i_elmt)
+    kp_inf(inode_inf) = kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
   enddo
 
   ! assemble acroos the regions but not across the MPIs
@@ -682,20 +682,20 @@ subroutine product_stiffness_vector3(neq,p_g,kp)
   if (ADD_TRINF) then
   kp_trinf = zero
   do i_elmt = 1,NSPEC_TRINFINITE
-    inode_trinf=inode_elmt_trinf1(:,i_elmt)
-    igdof_trinf=gdof_trinf1(inode_trinf)
-    km_trinf=storekmat_trinfinite1(:,:,i_elmt)
-    kp_trinf(inode_trinf)=kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
+    inode_trinf = inode_elmt_trinf1(:,i_elmt)
+    igdof_trinf = gdof_trinf1(inode_trinf)
+    km_trinf = storekmat_trinfinite1(:,:,i_elmt)
+    kp_trinf(inode_trinf) = kp_trinf(inode_trinf)+matmul(km_trinf,p_g(igdof_trinf))
   enddo
   endif
 
   ! infinite
   kp_inf = zero
   do i_elmt = 1,NSPEC_INFINITE
-    inode_inf=inode_elmt_inf1(:,i_elmt)
-    igdof_inf=gdof_inf1(inode_inf)
-    km_inf=storekmat_infinite1(:,:,i_elmt)
-    kp_inf(inode_inf)=kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
+    inode_inf = inode_elmt_inf1(:,i_elmt)
+    igdof_inf = gdof_inf1(inode_inf)
+    km_inf = storekmat_infinite1(:,:,i_elmt)
+    kp_inf(inode_inf) = kp_inf(inode_inf)+matmul(km_inf,p_g(igdof_inf))
   enddo
 
   ! assemble acroos the regions but not across the MPIs
diff --git a/src/specfem3D/SIEM_solver_petsc.F90 b/src/specfem3D/SIEM_solver_petsc.F90
index 7f6093aab..e915a9460 100644
--- a/src/specfem3D/SIEM_solver_petsc.F90
+++ b/src/specfem3D/SIEM_solver_petsc.F90
@@ -956,16 +956,16 @@ subroutine petsc_set_matrix1()
 
   use math_library_mpi, only: maxscal,minscal
   use specfem_par, only: IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
-  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
   use specfem_par, only: NEDOF
   use specfem_par, only: NGLLCUBE_INF,NEDOF1
   use specfem_par_innercore, only: ggdof_ic1,storekmat_inner_core1, &
-  idoubling_inner_core,inode_elmt_ic1
+    idoubling_inner_core,inode_elmt_ic1
   use specfem_par_outercore, only: ggdof_oc1,storekmat_outer_core1,inode_elmt_oc1
   use specfem_par_crustmantle, only: ggdof_cm1,storekmat_crust_mantle1, &
-  inode_elmt_cm1
+    inode_elmt_cm1
   use specfem_par_trinfinite, only: ggdof_trinf1,storekmat_trinfinite1, &
-  inode_elmt_trinf1
+    inode_elmt_trinf1
   use specfem_par_infinite, only: ggdof_inf1,storekmat_infinite1,inode_elmt_inf1
 
   implicit none
@@ -1250,19 +1250,20 @@ subroutine petsc_set_vector1(rload1)
 
   use specfem_par, only: l2gdof1
   use specfem_par, only: IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
-  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
   use specfem_par, only: NEDOF
   use specfem_par, only: NGLLCUBE_INF,NEDOF1
   use specfem_par_innercore, only: ggdof_ic1,storekmat_inner_core1, &
-  idoubling_inner_core,inode_elmt_ic1
+    idoubling_inner_core,inode_elmt_ic1
   use specfem_par_outercore, only: ggdof_oc1,storekmat_outer_core1, &
-  inode_elmt_oc1
+    inode_elmt_oc1
   use specfem_par_crustmantle, only: ggdof_cm1,storekmat_crust_mantle1, &
-  inode_elmt_cm1
+    inode_elmt_cm1
   use specfem_par_trinfinite, only: ggdof_trinf1,storekmat_trinfinite1, &
-  inode_elmt_trinf1
+    inode_elmt_trinf1
   use specfem_par_infinite, only: ggdof_inf1,storekmat_infinite1,inode_elmt_inf1
   implicit none
+
   PetscScalar,intent(in) :: rload1(0:)
   PetscScalar      zero
 
@@ -1651,9 +1652,9 @@ end subroutine petsc_initialize
   subroutine petsc_set_matrix()
 
   use specfem_par, only: NEDOF,IFLAG_IN_FICTITIOUS_CUBE,NSPEC_INNER_CORE, &
-  NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
+    NSPEC_OUTER_CORE,NSPEC_CRUST_MANTLE,NSPEC_TRINFINITE,NSPEC_INFINITE
   use specfem_par_innercore, only: ggdof_ic,storekmat_inner_core, &
-  idoubling_inner_core,inode_elmt_ic
+    idoubling_inner_core,inode_elmt_ic
   use specfem_par_outercore, only: ggdof_oc,storekmat_outer_core,inode_elmt_oc
   use specfem_par_crustmantle, only: ggdof_cm,storekmat_crust_mantle,inode_elmt_cm
   use specfem_par_trinfinite, only: ggdof_trinf,storekmat_trinfinite,inode_elmt_trinf

From f604d7c992bc5f9048a383ebd84c465e6beffe8b Mon Sep 17 00:00:00 2001
From: Daniel Peter <daniel.b.peter@gmail.com>
Date: Fri, 17 May 2024 22:31:59 +0200
Subject: [PATCH 11/11] adding some more explicit type conversions

---
 src/meshfem3D/model_EMC.f90                   |  26 +--
 src/meshfem3D/save_model_meshfiles_adios.F90  |   4 +-
 .../write_AVS_DX_global_chunks_data.f90       | 111 +++++++-----
 .../write_AVS_DX_global_chunks_data_adios.f90 | 161 ++++++++----------
 4 files changed, 151 insertions(+), 151 deletions(-)

diff --git a/src/meshfem3D/model_EMC.f90 b/src/meshfem3D/model_EMC.f90
index b922b2a6b..11edd79ee 100644
--- a/src/meshfem3D/model_EMC.f90
+++ b/src/meshfem3D/model_EMC.f90
@@ -797,7 +797,7 @@ subroutine scale_Brocher_rho_from_vp()
         rho = fac1 * vp + fac2 * vp_p2 + fac3 * vp_p3 + fac4 * vp_p4 + fac5 * vp_p5
 
         ! Density
-        EMC_rho(ix,iy,iz) = rho
+        EMC_rho(ix,iy,iz) = real(rho,kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -861,7 +861,7 @@ subroutine scale_Brocher_vs_from_vp()
         vs = fac1 + fac2 * vp + fac3 * vp_p2 + fac4 * vp_p3 + fac5 * vp_p4
 
         ! Density
-        EMC_vs(ix,iy,iz) = vs
+        EMC_vs(ix,iy,iz) = real(vs,kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -872,7 +872,7 @@ subroutine scale_Brocher_vs_from_vp()
   ! unit scaling to convert to same unit as vp
   if (EMC_vp_unit == 3) then
     ! use same unit as vp km/s -> m/s
-    EMC_vs = EMC_vs * 1000.d0
+    EMC_vs = EMC_vs * 1000.0_CUSTOM_REAL
     EMC_vs_unit = 3
   endif
 
@@ -932,7 +932,7 @@ subroutine scale_Brocher_vp_from_vs()
         vp = fac1 + fac2 * vs + fac3 * vs_p2 + fac4 * vs_p3 + fac5 * vs_p4
 
         ! Vp
-        EMC_vp(ix,iy,iz) = vp
+        EMC_vp(ix,iy,iz) = real(vp,kind=CUSTOM_REAL)
       enddo
     enddo
   enddo
@@ -943,7 +943,7 @@ subroutine scale_Brocher_vp_from_vs()
   ! unit scaling to convert to same unit as vs
   if (EMC_vs_unit == 3) then
     ! use same unit as vp km/s -> m/s
-    EMC_vp = EMC_vp * 1000.d0
+    EMC_vp = EMC_vp * 1000.0_CUSTOM_REAL
     EMC_vp_unit = 3
   endif
 
@@ -1674,9 +1674,9 @@ subroutine fill_EMC_missing_values_interpolated()
           end select
 
           ! stores interpolated value
-          EMC_vp(ilon,ilat,idep) = vp_interp
-          EMC_vs(ilon,ilat,idep) = vs_interp
-          EMC_rho(ilon,ilat,idep) = rho_interp
+          EMC_vp(ilon,ilat,idep) = real(vp_interp,kind=CUSTOM_REAL)
+          EMC_vs(ilon,ilat,idep) = real(vs_interp,kind=CUSTOM_REAL)
+          EMC_rho(ilon,ilat,idep) = real(rho_interp,kind=CUSTOM_REAL)
 
           ! update mask flag
           tmp_mask(ilon,ilat,idep) = .false.
@@ -2553,7 +2553,7 @@ subroutine read_emc_model()
   ! units: 1==m, 2==km, 3==m/s, 4==km/s, 5==g/cm^3, 6==kg/cm^3, 7==kg/m^3
   if (EMC_dep_unit == 1) then
     ! converts to km
-    EMC_dep(:) = EMC_dep(:) / 1000.d0
+    EMC_dep(:) = EMC_dep(:) / 1000.0_CUSTOM_REAL
     EMC_dep_unit = 2    ! in km
   endif
   ! converts depth reference direction to positive being down (positive depth below sealevel, negative depth above)
@@ -3030,23 +3030,23 @@ subroutine read_emc_model()
   if (EMC_rho_unit == 5) then
     ! converts to kg/m^3
     ! rho [kg/m^3] = rho * 1000 [g/cm^3]
-    EMC_rho(:,:,:) = EMC_rho(:,:,:) * 1000.d0
+    EMC_rho(:,:,:) = EMC_rho(:,:,:) * 1000.0_CUSTOM_REAL
     EMC_rho_unit = 7 ! kg/m^3
   else if (EMC_rho_unit == 6) then
     ! converts to kg/m^3
     ! rho [kg/m^3] = rho * 1000000 [kg/cm^3]
-    EMC_rho(:,:,:) = EMC_rho(:,:,:) * 1.d6
+    EMC_rho(:,:,:) = EMC_rho(:,:,:) * 1.e6_CUSTOM_REAL
     EMC_rho_unit = 7 ! kg/m^3
   endif
   ! converts velocity to default m/s
   if (EMC_vp_unit == 4) then
     ! converts to m/s
-    EMC_vp(:,:,:) = EMC_vp(:,:,:) * 1000.d0
+    EMC_vp(:,:,:) = EMC_vp(:,:,:) * 1000.0_CUSTOM_REAL
     EMC_vp_unit = 3
   endif
   if (EMC_vs_unit == 4) then
     ! converts to m/s
-    EMC_vs(:,:,:) = EMC_vs(:,:,:) * 1000.d0
+    EMC_vs(:,:,:) = EMC_vs(:,:,:) * 1000.0_CUSTOM_REAL
     EMC_vs_unit = 3
   endif
 
diff --git a/src/meshfem3D/save_model_meshfiles_adios.F90 b/src/meshfem3D/save_model_meshfiles_adios.F90
index 48941ca44..258f2438c 100644
--- a/src/meshfem3D/save_model_meshfiles_adios.F90
+++ b/src/meshfem3D/save_model_meshfiles_adios.F90
@@ -299,8 +299,8 @@ subroutine save_model_meshfiles_adios()
   ! anisotropic values
   if (ANISOTROPIC_3D_MANTLE .and. iregion_code == IREGION_CRUST_MANTLE) then
     ! the scale of GPa--[g/cm^3][(km/s)^2]
-    scaleval = dsqrt(PI*GRAV*RHOAV)
-    scale_GPa = (RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2)
+    scaleval = real(sqrt(PI*GRAV*RHOAV),kind=CUSTOM_REAL)
+    scale_GPa = real((RHOAV/1000.d0)*((R_PLANET*scaleval/1000.d0)**2),kind=CUSTOM_REAL)
 
     allocate(temp_store_mu0(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
     if (ier /= 0) stop 'Error allocating temp mu0 array'
diff --git a/src/meshfem3D/write_AVS_DX_global_chunks_data.f90 b/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
index 2df4f137d..ada7e7a6b 100644
--- a/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
+++ b/src/meshfem3D/write_AVS_DX_global_chunks_data.f90
@@ -179,8 +179,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           numpoin = numpoin + 1
           num_ibool_AVS_DX(iglobval(1)) = numpoin
           write(10,*) numpoin,sngl(xstore(1,1,1,ispec)), &
-                  sngl(ystore(1,1,1,ispec)),sngl(zstore(1,1,1,ispec))
-          vmax = sqrt((kappavstore(1,1,1,ispec)+4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
+                      sngl(ystore(1,1,1,ispec)),sngl(zstore(1,1,1,ispec))
+          vmax = sqrt((kappavstore(1,1,1,ispec) &
+                       +4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
           vmin = sqrt(muvstore(1,1,1,ispec)/rhostore(1,1,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -189,7 +190,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
             vmax = real(vp,kind=CUSTOM_REAL)
             vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           write(11,*) numpoin,vmin,vmax
         endif
 
@@ -197,8 +198,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           numpoin = numpoin + 1
           num_ibool_AVS_DX(iglobval(4)) = numpoin
           write(10,*) numpoin,sngl(xstore(1,NGLLY,1,ispec)), &
-                  sngl(ystore(1,NGLLY,1,ispec)),sngl(zstore(1,NGLLY,1,ispec))
-          vmax = sqrt((kappavstore(1,NGLLY,1,ispec)+4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
+                      sngl(ystore(1,NGLLY,1,ispec)),sngl(zstore(1,NGLLY,1,ispec))
+          vmax = sqrt((kappavstore(1,NGLLY,1,ispec) &
+                       +4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
           vmin = sqrt(muvstore(1,NGLLY,1,ispec)/rhostore(1,NGLLY,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -207,7 +209,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
             vmax = real(vp,kind=CUSTOM_REAL)
             vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           write(11,*) numpoin,vmin,vmax
         endif
 
@@ -215,8 +217,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           numpoin = numpoin + 1
           num_ibool_AVS_DX(iglobval(8)) = numpoin
           write(10,*) numpoin,sngl(xstore(1,NGLLY,NGLLZ,ispec)), &
-                  sngl(ystore(1,NGLLY,NGLLZ,ispec)),sngl(zstore(1,NGLLY,NGLLZ,ispec))
-          vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec)+4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.)/rhostore(1,NGLLY,NGLLZ,ispec))
+                      sngl(ystore(1,NGLLY,NGLLZ,ispec)),sngl(zstore(1,NGLLY,NGLLZ,ispec))
+          vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec) &
+                       +4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.)/rhostore(1,NGLLY,NGLLZ,ispec))
           vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec)/rhostore(1,NGLLY,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
         if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -225,7 +228,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           vmax = real(vp,kind=CUSTOM_REAL)
           vmin = real(vp,kind=CUSTOM_REAL)
         endif
-        if (vmin == 0.0) vmin=vmax
+        if (vmin == 0.0) vmin = vmax
         write(11,*) numpoin,vmin,vmax
       endif
 
@@ -233,8 +236,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
         numpoin = numpoin + 1
         num_ibool_AVS_DX(iglobval(5)) = numpoin
         write(10,*) numpoin,sngl(xstore(1,1,NGLLZ,ispec)), &
-                sngl(ystore(1,1,NGLLZ,ispec)),sngl(zstore(1,1,NGLLZ,ispec))
-        vmax = sqrt((kappavstore(1,1,NGLLZ,ispec)+4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
+                    sngl(ystore(1,1,NGLLZ,ispec)),sngl(zstore(1,1,NGLLZ,ispec))
+        vmax = sqrt((kappavstore(1,1,NGLLZ,ispec) &
+                     +4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
         vmin = sqrt(muvstore(1,1,NGLLZ,ispec)/rhostore(1,1,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
         if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -243,7 +247,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
           vmax = real(vp,kind=CUSTOM_REAL)
           vmin = real(vp,kind=CUSTOM_REAL)
         endif
-        if (vmin == 0.0) vmin=vmax
+        if (vmin == 0.0) vmin = vmax
         write(11,*) numpoin,vmin,vmax
       endif
 
@@ -260,8 +264,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(2)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,1,1,ispec)), &
-              sngl(ystore(NGLLX,1,1,ispec)),sngl(zstore(NGLLX,1,1,ispec))
-      vmax = sqrt((kappavstore(NGLLX,1,1,ispec)+4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
+                  sngl(ystore(NGLLX,1,1,ispec)),sngl(zstore(NGLLX,1,1,ispec))
+      vmax = sqrt((kappavstore(NGLLX,1,1,ispec) &
+                   +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
       vmin = sqrt(muvstore(NGLLX,1,1,ispec)/rhostore(NGLLX,1,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -270,7 +275,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -278,8 +283,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(3)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,NGLLY,1,ispec)), &
-              sngl(ystore(NGLLX,NGLLY,1,ispec)),sngl(zstore(NGLLX,NGLLY,1,ispec))
-      vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec)+4.*muvstore(NGLLX,NGLLY,1,ispec)/3.)/rhostore(NGLLX,NGLLY,1,ispec))
+                  sngl(ystore(NGLLX,NGLLY,1,ispec)),sngl(zstore(NGLLX,NGLLY,1,ispec))
+      vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec) &
+                   +4.*muvstore(NGLLX,NGLLY,1,ispec)/3.)/rhostore(NGLLX,NGLLY,1,ispec))
       vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec)/rhostore(NGLLX,NGLLY,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -288,7 +294,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -296,8 +302,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(7)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,NGLLY,NGLLZ,ispec)), &
-              sngl(ystore(NGLLX,NGLLY,NGLLZ,ispec)),sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec)+4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+                  sngl(ystore(NGLLX,NGLLY,NGLLZ,ispec)),sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec) &
+                   +4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
       vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -306,7 +313,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -314,8 +321,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(6)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,1,NGLLZ,ispec)), &
-              sngl(ystore(NGLLX,1,NGLLZ,ispec)),sngl(zstore(NGLLX,1,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec)+4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.)/rhostore(NGLLX,1,NGLLZ,ispec))
+                  sngl(ystore(NGLLX,1,NGLLZ,ispec)),sngl(zstore(NGLLX,1,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec) &
+                   +4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.)/rhostore(NGLLX,1,NGLLZ,ispec))
       vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec)/rhostore(NGLLX,1,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -324,7 +332,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -341,8 +349,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(1)) = numpoin
       write(10,*) numpoin,sngl(xstore(1,1,1,ispec)), &
-              sngl(ystore(1,1,1,ispec)),sngl(zstore(1,1,1,ispec))
-      vmax = sqrt((kappavstore(1,1,1,ispec)+4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
+                  sngl(ystore(1,1,1,ispec)),sngl(zstore(1,1,1,ispec))
+      vmax = sqrt((kappavstore(1,1,1,ispec) &
+                   +4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
       vmin = sqrt(muvstore(1,1,1,ispec)/rhostore(1,1,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -351,7 +360,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -359,8 +368,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(2)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,1,1,ispec)), &
-              sngl(ystore(NGLLX,1,1,ispec)),sngl(zstore(NGLLX,1,1,ispec))
-      vmax = sqrt((kappavstore(NGLLX,1,1,ispec)+4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
+                  sngl(ystore(NGLLX,1,1,ispec)),sngl(zstore(NGLLX,1,1,ispec))
+      vmax = sqrt((kappavstore(NGLLX,1,1,ispec) &
+                   +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
       vmin = sqrt(muvstore(NGLLX,1,1,ispec)/rhostore(NGLLX,1,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -369,7 +379,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -377,8 +387,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(6)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,1,NGLLZ,ispec)), &
-              sngl(ystore(NGLLX,1,NGLLZ,ispec)),sngl(zstore(NGLLX,1,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec)+4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.)/rhostore(NGLLX,1,NGLLZ,ispec))
+                  sngl(ystore(NGLLX,1,NGLLZ,ispec)),sngl(zstore(NGLLX,1,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec) &
+                   +4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.)/rhostore(NGLLX,1,NGLLZ,ispec))
       vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec)/rhostore(NGLLX,1,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -387,7 +398,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -395,8 +406,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(5)) = numpoin
       write(10,*) numpoin,sngl(xstore(1,1,NGLLZ,ispec)), &
-              sngl(ystore(1,1,NGLLZ,ispec)),sngl(zstore(1,1,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(1,1,NGLLZ,ispec)+4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
+                  sngl(ystore(1,1,NGLLZ,ispec)),sngl(zstore(1,1,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(1,1,NGLLZ,ispec) &
+                   +4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
       vmin = sqrt(muvstore(1,1,NGLLZ,ispec)/rhostore(1,1,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -405,7 +417,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -423,7 +435,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       num_ibool_AVS_DX(iglobval(4)) = numpoin
       write(10,*) numpoin,sngl(xstore(1,NGLLY,1,ispec)), &
               sngl(ystore(1,NGLLY,1,ispec)),sngl(zstore(1,NGLLY,1,ispec))
-      vmax = sqrt((kappavstore(1,NGLLY,1,ispec)+4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
+      vmax = sqrt((kappavstore(1,NGLLY,1,ispec) &
+                   +4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
       vmin = sqrt(muvstore(1,NGLLY,1,ispec)/rhostore(1,NGLLY,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -432,7 +445,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -440,8 +453,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(3)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,NGLLY,1,ispec)), &
-              sngl(ystore(NGLLX,NGLLY,1,ispec)),sngl(zstore(NGLLX,NGLLY,1,ispec))
-      vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec)+4.*muvstore(NGLLX,NGLLY,1,ispec)/3.)/rhostore(NGLLX,NGLLY,1,ispec))
+                  sngl(ystore(NGLLX,NGLLY,1,ispec)),sngl(zstore(NGLLX,NGLLY,1,ispec))
+      vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec) &
+                   +4.*muvstore(NGLLX,NGLLY,1,ispec)/3.)/rhostore(NGLLX,NGLLY,1,ispec))
       vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec)/rhostore(NGLLX,NGLLY,1,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -450,7 +464,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -458,8 +472,9 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       numpoin = numpoin + 1
       num_ibool_AVS_DX(iglobval(7)) = numpoin
       write(10,*) numpoin,sngl(xstore(NGLLX,NGLLY,NGLLZ,ispec)), &
-              sngl(ystore(NGLLX,NGLLY,NGLLZ,ispec)),sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec)+4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+                  sngl(ystore(NGLLX,NGLLY,NGLLZ,ispec)),sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec) &
+                   +4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
       vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec)/rhostore(NGLLX,NGLLY,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -468,7 +483,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -477,7 +492,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
       num_ibool_AVS_DX(iglobval(8)) = numpoin
       write(10,*) numpoin,sngl(xstore(1,NGLLY,NGLLZ,ispec)), &
               sngl(ystore(1,NGLLY,NGLLZ,ispec)),sngl(zstore(1,NGLLY,NGLLZ,ispec))
-      vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec)+4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.)/rhostore(1,NGLLY,NGLLZ,ispec))
+      vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec) &
+                   +4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.)/rhostore(1,NGLLY,NGLLZ,ispec))
       vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec)/rhostore(1,NGLLY,NGLLZ,ispec))
 ! particular case of the outer core (muvstore contains 1/rho)
   if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
@@ -486,7 +502,7 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
     vmax = real(vp,kind=CUSTOM_REAL)
     vmin = real(vp,kind=CUSTOM_REAL)
   endif
-      if (vmin == 0.0) vmin=vmax
+      if (vmin == 0.0) vmin = vmax
       write(11,*) numpoin,vmin,vmax
     endif
 
@@ -582,7 +598,8 @@ subroutine write_AVS_DX_global_chunks_data(prname,nspec,iboun,ibool, &
               print *,' attention: vs close to zero',sngl(vs),r,i,j,k,ispec
               dvs = 0.0
             else
-              dvp = dvp + (sqrt((kappavstore(i,j,k,ispec)+4.*muvstore(i,j,k,ispec)/3.)/rhostore(i,j,k,ispec)) - sngl(vp))/sngl(vp)
+              dvp = dvp + (sqrt((kappavstore(i,j,k,ispec) &
+                                 +4.*muvstore(i,j,k,ispec)/3.)/rhostore(i,j,k,ispec)) - sngl(vp))/sngl(vp)
               dvs = dvs + (sqrt(muvstore(i,j,k,ispec)/rhostore(i,j,k,ispec)) - sngl(vs))/sngl(vs)
             endif
 
diff --git a/src/meshfem3D/write_AVS_DX_global_chunks_data_adios.f90 b/src/meshfem3D/write_AVS_DX_global_chunks_data_adios.f90
index 5805483d5..0a41495d6 100644
--- a/src/meshfem3D/write_AVS_DX_global_chunks_data_adios.f90
+++ b/src/meshfem3D/write_AVS_DX_global_chunks_data_adios.f90
@@ -395,17 +395,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,1,1,ispec))
 
           vmax = sqrt((kappavstore(1,1,1,ispec) &
-              + 4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
+                       + 4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
           vmin = sqrt(muvstore(1,1,1,ispec)/rhostore(1,1,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,1,1,ispec)**2 + ystore(1,1,1,ispec)**2 &
                 + zstore(1,1,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -418,17 +418,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,NGLLY,1,ispec))
 
           vmax = sqrt((kappavstore(1,NGLLY,1,ispec) &
-              +4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
+                       +4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
           vmin = sqrt(muvstore(1,NGLLY,1,ispec)/rhostore(1,NGLLY,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,NGLLY,1,ispec)**2 + ystore(1,NGLLY,1,ispec)**2 &
                 + zstore(1,NGLLY,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -441,20 +441,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,NGLLY,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec) &
-              +4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.) &
-              / rhostore(1,NGLLY,NGLLZ,ispec))
-          vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec) &
-              / rhostore(1,NGLLY,NGLLZ,ispec))
+                       +4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.) / rhostore(1,NGLLY,NGLLZ,ispec))
+          vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec) / rhostore(1,NGLLY,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,NGLLY,NGLLZ,ispec)**2 &
                 + ystore(1,NGLLY,NGLLZ,ispec)**2 &
                 + zstore(1,NGLLY,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
 
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
@@ -468,17 +466,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,1,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(1,1,NGLLZ,ispec) &
-              +4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
+                       +4.*muvstore(1,1,NGLLZ,ispec)/3.)/rhostore(1,1,NGLLZ,ispec))
           vmin = sqrt(muvstore(1,1,NGLLZ,ispec)/rhostore(1,1,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,1,NGLLZ,ispec)**2 + ystore(1,1,NGLLZ,ispec)**2 &
                 + zstore(1,1,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -500,17 +498,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,1,1,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,1,1,ispec) &
-              +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
+                       +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
           vmin = sqrt(muvstore(NGLLX,1,1,ispec)/rhostore(NGLLX,1,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,1,1,ispec)**2 + ystore(NGLLX,1,1,ispec)**2 &
                 + zstore(NGLLX,1,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -523,20 +521,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,NGLLY,1,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec) &
-              + 4.*muvstore(NGLLX,NGLLY,1,ispec)/3.) &
-              / rhostore(NGLLX,NGLLY,1,ispec))
-          vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec) &
-              / rhostore(NGLLX,NGLLY,1,ispec))
+                       + 4.*muvstore(NGLLX,NGLLY,1,ispec)/3.) / rhostore(NGLLX,NGLLY,1,ispec))
+          vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec) / rhostore(NGLLX,NGLLY,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,NGLLY,1,ispec)**2 &
                 + ystore(NGLLX,NGLLY,1,ispec)**2 &
                 + zstore(NGLLX,NGLLY,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -549,20 +545,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec) &
-              + 4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.) &
-              / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
-          vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec) &
-              / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+                       + 4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.) / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+          vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec) / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,NGLLY,NGLLZ,ispec)**2 &
                 + ystore(NGLLX,NGLLY,NGLLZ,ispec)**2 &
                 + zstore(NGLLX,NGLLY,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -575,20 +569,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,1,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec) &
-              + 4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.) &
-              / rhostore(NGLLX,1,NGLLZ,ispec))
-          vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec) &
-              / rhostore(NGLLX,1,NGLLZ,ispec))
+                       + 4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.) / rhostore(NGLLX,1,NGLLZ,ispec))
+          vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec) / rhostore(NGLLX,1,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,1,NGLLZ,ispec)**2 &
                 + ystore(NGLLX,1,NGLLZ,ispec)**2 &
                 + zstore(NGLLX,1,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -610,17 +602,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,1,1,ispec))
 
           vmax = sqrt((kappavstore(1,1,1,ispec) &
-              + 4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
+                       + 4.*muvstore(1,1,1,ispec)/3.)/rhostore(1,1,1,ispec))
           vmin = sqrt(muvstore(1,1,1,ispec)/rhostore(1,1,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,1,1,ispec)**2 &
                 + ystore(1,1,1,ispec)**2 + zstore(1,1,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -633,17 +625,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,1,1,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,1,1,ispec) &
-              +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
+                       +4.*muvstore(NGLLX,1,1,ispec)/3.)/rhostore(NGLLX,1,1,ispec))
           vmin = sqrt(muvstore(NGLLX,1,1,ispec)/rhostore(NGLLX,1,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,1,1,ispec)**2 &
                 + ystore(NGLLX,1,1,ispec)**2 + zstore(NGLLX,1,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin = vmin
           avs_dx_adios%vmax = vmax
         endif
@@ -656,20 +648,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,1,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,1,NGLLZ,ispec) &
-              + 4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.) &
-              / rhostore(NGLLX,1,NGLLZ,ispec))
-          vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec) &
-              / rhostore(NGLLX,1,NGLLZ,ispec))
+                       + 4.*muvstore(NGLLX,1,NGLLZ,ispec)/3.) / rhostore(NGLLX,1,NGLLZ,ispec))
+          vmin = sqrt(muvstore(NGLLX,1,NGLLZ,ispec) / rhostore(NGLLX,1,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,1,NGLLZ,ispec)**2 &
                 + ystore(NGLLX,1,NGLLZ,ispec)**2 &
                 + zstore(NGLLX,1,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -682,18 +672,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,1,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(1,1,NGLLZ,ispec) &
-              + 4.*muvstore(1,1,NGLLZ,ispec)/3.) &
-              / rhostore(1,1,NGLLZ,ispec))
+                       + 4.*muvstore(1,1,NGLLZ,ispec)/3.) / rhostore(1,1,NGLLZ,ispec))
           vmin = sqrt(muvstore(1,1,NGLLZ,ispec)/rhostore(1,1,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,1,NGLLZ,ispec)**2 &
                 + ystore(1,1,NGLLZ,ispec)**2 + zstore(1,1,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -715,17 +704,17 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,NGLLY,1,ispec))
 
           vmax = sqrt((kappavstore(1,NGLLY,1,ispec) &
-              + 4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
+                       + 4.*muvstore(1,NGLLY,1,ispec)/3.)/rhostore(1,NGLLY,1,ispec))
           vmin = sqrt(muvstore(1,NGLLY,1,ispec)/rhostore(1,NGLLY,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,NGLLY,1,ispec)**2 &
                 + ystore(1,NGLLY,1,ispec)**2 + zstore(1,NGLLY,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -738,21 +727,19 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,NGLLY,1,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,NGLLY,1,ispec) &
-              + 4.*muvstore(NGLLX,NGLLY,1,ispec)/3.) &
-              / rhostore(NGLLX,NGLLY,1,ispec))
-          vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec) &
-              / rhostore(NGLLX,NGLLY,1,ispec))
+                       + 4.*muvstore(NGLLX,NGLLY,1,ispec)/3.) / rhostore(NGLLX,NGLLY,1,ispec))
+          vmin = sqrt(muvstore(NGLLX,NGLLY,1,ispec) / rhostore(NGLLX,NGLLY,1,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,NGLLY,1,ispec)**2 &
                 + ystore(NGLLX,NGLLY,1,ispec)**2 &
                 + zstore(NGLLX,NGLLY,1,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
 
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
 
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
@@ -766,20 +753,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(NGLLX,NGLLY,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(NGLLX,NGLLY,NGLLZ,ispec) &
-              + 4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.) &
-              / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
-          vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec) &
-              / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+                       + 4.*muvstore(NGLLX,NGLLY,NGLLZ,ispec)/3.) / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
+          vmin = sqrt(muvstore(NGLLX,NGLLY,NGLLZ,ispec) / rhostore(NGLLX,NGLLY,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(NGLLX,NGLLY,NGLLZ,ispec)**2 &
                 + ystore(NGLLX,NGLLY,NGLLZ,ispec)**2 &
                 + zstore(NGLLX,NGLLY,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-          vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax
         endif
@@ -792,20 +777,18 @@ subroutine prepare_AVS_DX_global_chunks_data_adios(prname,nspec, &
           avs_dx_adios%z_adios(numpoin) = sngl(zstore(1,NGLLY,NGLLZ,ispec))
 
           vmax = sqrt((kappavstore(1,NGLLY,NGLLZ,ispec) &
-              + 4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.) &
-              / rhostore(1,NGLLY,NGLLZ,ispec))
-          vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec) &
-              / rhostore(1,NGLLY,NGLLZ,ispec))
+                       + 4.*muvstore(1,NGLLY,NGLLZ,ispec)/3.) / rhostore(1,NGLLY,NGLLZ,ispec))
+          vmin = sqrt(muvstore(1,NGLLY,NGLLZ,ispec) / rhostore(1,NGLLY,NGLLZ,ispec))
           ! particular case of the outer core (muvstore contains 1/rho)
           if (idoubling(ispec) == IFLAG_OUTER_CORE_NORMAL) then
             r = dsqrt(xstore(1,NGLLY,NGLLZ,ispec)**2 &
                 + ystore(1,NGLLY,NGLLZ,ispec)**2 &
                 + zstore(1,NGLLY,NGLLZ,ispec)**2)
             call prem_display_outer_core(r,rho,vp,vs,Qkappa,Qmu,idoubling(ispec))
-            vmax = vp
-            vmin = vp
+            vmax = real(vp,kind=CUSTOM_REAL)
+            vmin = real(vp,kind=CUSTOM_REAL)
           endif
-          if (vmin == 0.0) vmin=vmax
+          if (vmin == 0.0) vmin = vmax
 
           avs_dx_adios%vmin(numpoin) = vmin
           avs_dx_adios%vmax(numpoin) = vmax