Skip to content

Commit

Permalink
added full tracer porting
Browse files Browse the repository at this point in the history
  • Loading branch information
seshadri levante committed Dec 4, 2023
1 parent 623e143 commit 097d7f4
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/fesom_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ subroutine fesom_runloop(current_nsteps)
!$ACC CREATE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO) &
!$ACC CREATE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad) &
!$ACC CREATE (f%tracers%work%del_ttf)
!$ACC DATA CREATE(tr_xy, tr_z, relax2clim, Tclim, Sclim)
do n=nstart, ntotal
if (use_global_tides) then
call foreph(f%partit, f%mesh)
Expand Down Expand Up @@ -465,6 +466,7 @@ subroutine fesom_runloop(current_nsteps)
!$ACC EXIT DATA DELETE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO)
!$ACC EXIT DATA DELETE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad)
!$ACC EXIT DATA DELETE (f%tracers%work%del_ttf)
!$ACC END DATA ! deleting tr_xy, tr_z, relax2clim, Sclim, Tclim
!$ACC EXIT DATA DELETE (f%tracers%data, f%tracers%work)
!$ACC EXIT DATA DELETE (f%dynamics%w, f%dynamics%w_e, f%dynamics%uv)

Expand Down
11 changes: 8 additions & 3 deletions src/oce_ale_tracer.F90
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,21 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh)
! do tracer AB (Adams-Bashfort) interpolation only for advectiv part
! needed
if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call init_tracers_AB'//achar(27)//'[0m'
!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) &
!$ACC DEVICE(mesh%nlevels, mesh%ulevels, mesh%nlevels_nod2D, mesh%ulevels_nod2D, mesh%gradient_sca)
call init_tracers_AB(tr_num, tracers, partit, mesh)

! advect tracers
if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call adv_tracers_ale'//achar(27)//'[0m'


!here update only those initialized in the init_tracers. (values, valuesAB, edge_up_dn_grad, ...)
!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) &
!$ACC DEVICE(tracers%work%edge_up_dn_grad) !!&
! it will update del_ttf with contributions from horizontal and vertical advection parts (del_ttf_advhoriz and del_ttf_advvert)
!!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) &
!!$ACC DEVICE(tracers%work%edge_up_dn_grad) !!&
!removing the updates for values and valuesAB, since I updated before init_tracers_AB and will continue
!exist in gpu. for del_ttf, del_ttf_advvert and del_ttf_advhoriz, we initialize them directly on gpu
! in the init_tracers_AB.
!!! it will update del_ttf with contributions from horizontal and vertical advection parts (del_ttf_advhoriz and del_ttf_advvert)
!$ACC wait(1)
call do_oce_adv_tra(dt, UV, Wvel, Wvel_i, Wvel_e, tr_num, dynamics, tracers, partit, mesh)

Expand Down
60 changes: 59 additions & 1 deletion src/oce_tracer_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh)
type(t_tracer), intent(inout), target :: tracers
integer :: n,nz

!$ACC parallel loop collapse(2) default(present) async(1)

#ifdef ENABLE_OPENACC
!$ACC parallel loop collapse(2)
#endif
do n=1, partit%myDim_nod2D+partit%eDim_nod2D
do nz=1, mesh%nl-1
! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning!
Expand All @@ -34,13 +37,24 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh)
tracers%work%del_ttf_advvert (nz, n) = 0.0_WP
end do
end do
#ifdef ENABLE_OPENACC
!$ACC end parallel loop
#endif

#ifdef ENABLE_OPENACC
!$ACC PARALLEL LOOP
#else
!$OMP PARALLEL DO
#endif
do n=1, partit%myDim_nod2D+partit%eDim_nod2D
! AB interpolation
tracers%data(tr_num)%valuesAB(:, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesAB(:, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(:, n)
end do
#ifdef ENABLE_OPENACC
!$ACC END PARALLEL LOOP
#else
!$OMP END PARALLEL DO
#endif

if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_elements'//achar(27)//'[0m'
call tracer_gradient_elements(tracers%data(tr_num)%valuesAB, partit, mesh)
Expand Down Expand Up @@ -85,7 +99,15 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh)
#include "associate_mesh_def.h"
#include "associate_part_ass.h"
#include "associate_mesh_ass.h"

!we have to remove this update in the final version, when we write these variables
!directly on gpu. For now, we update from host.
#ifdef ENABLE_OPENACC
!$ACC UPDATE DEVICE(tr_xy)
!$ACC PARALLEL LOOP
#else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(elem, elnodes, nz, nzmin, nzmax)
#endif
DO elem=1, myDim_elem2D
elnodes=elem2D_nodes(:,elem)
nzmin = ulevels(elem)
Expand All @@ -96,7 +118,12 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh)
tr_xy(2,nz, elem)=sum(gradient_sca(4:6,elem)*ttf(nz,elnodes))
END DO
END DO
#ifdef ENABLE_OPENACC
!$ACC END PARALLEL LOOP
#else
!$OMP END PARALLEL DO
#endif

END SUBROUTINE tracer_gradient_elements
!
!
Expand All @@ -121,7 +148,13 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh)
#include "associate_mesh_def.h"
#include "associate_part_ass.h"
#include "associate_mesh_ass.h"

#ifdef ENABLE_OPENACC
!$ACC UPDATE DEVICE(tr_z)
!$ACC parallel loop
#else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nz, nzmin, nzmax, dz)
#endif
DO n=1, myDim_nod2D+eDim_nod2D
!!PS nlev=nlevels_nod2D(n)
nzmax=nlevels_nod2D(n)
Expand All @@ -136,7 +169,12 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh)
tr_z(nzmin, n)=0.0_WP
tr_z(nzmax, n)=0.0_WP
END DO
#ifdef ENABLE_OPENACC
!$ACC END PARALLEL LOOP
#else
!$OMP END PARALLEL DO
#endif

END SUBROUTINE tracer_gradient_z
!
!
Expand All @@ -161,10 +199,16 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh)
#include "associate_mesh_def.h"
#include "associate_part_ass.h"
#include "associate_mesh_ass.h"

trarr=>tracers%data(tr_num)%values(:,:)

if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==1)) then
#ifdef ENABLE_OPENACC
!$ACC UPDATE DEVICE(relax2clim, Tclim, Sclim)
!$ACC parallel loop
#else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax)
#endif
DO n=1, myDim_nod2D
nzmin = ulevels_nod2D(n)
nzmax = nlevels_nod2D(n)
Expand All @@ -173,17 +217,31 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh)
trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+&
relax2clim(n)*dt*(Tclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n))
END DO
#ifdef ENABLE_OPENACC
!$ACC end parallel loop
#else
!$OMP END PARALLEL DO
#endif

END if
if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==2)) then
#ifdef ENABLE_OPENACC
!$ACC parallel loop
#else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax)
#endif
DO n=1, myDim_nod2D
nzmin = ulevels_nod2D(n)
nzmax = nlevels_nod2D(n)
trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+&
relax2clim(n)*dt*(Sclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n))
END DO
#ifdef ENABLE_OPENACC
!$ACC end parallel loop
#else
!$OMP END PARALLEL DO
#endif

END IF
END SUBROUTINE relax_to_clim
END MODULE o_tracers
72 changes: 72 additions & 0 deletions work/job_levante_running_tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
#SBATCH --job-name=fesom_gpu_test
#SBATCH --partition=gpu
#SBATCH --nodes=2 # Specify number of nodes
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=4
#SBATCH --gpus=2 # 4 # 8 for 2 nodes
##SBATCH --gpus-per-task=1 #specific case when tasks=gpues
#SBATCH --exclusive
#SBATCH --mem=0 # Request all memory available on all nodes
#SBATCH --time=00:20:00 # Set a limit on the total run time
#SBATCH -o slurm.out
#SBATCH -e slurm.err
#SBATCH --account=ab0995
set -e

source /sw/etc/profile.levante
#source ../env/levante.dkrz.de/shell
#read -r USED_SHELL < ../bin/current_shell_path
USED_SHELL="/work/ab0995/a270232/tracer_porting_fesom/env/levante.dkrz.de/shell.nvhpc"
source $USED_SHELL

#source /work/ab0995/a270232/refactoring/fesom2/env/levante.dkrz.de/shell.nvhpc
echo "using environment from" $USED_SHELL

ulimit -s 204800 # https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html

echo Submitted job: $jobid
squeue -u $USER

# Check GPUs available for the job
nvidia-smi

# determine JOBID
JOBID=`echo $SLURM_JOB_ID |cut -d"." -f1`

rm -f fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x
ln -s /work/ab0995/a270232/tracer_porting_fesom/build/src/fesom fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x

export OMP_NUM_THREADS=4
#cp -n ../config/namelist.config .
#cp -n ../config/namelist.forcing .
#cp -n ../config/namelist.oce .
#cp -n ../config/namelist.ice .
#cp -n ../config/namelist.icepack .

## levante specific gpu env used for ICON otherwise segfault
export OMPI_MCA_pml=ucx # Use UCX to support InfiniBand devices and CUDA [1]

export OMPI_MCA_btl="self" # Only use self transport to reduce overhead [2]

export UCX_RNDV_SCHEME=put_zcopy # Preferred communication scheme with Rendezvous protocol
export UCX_RNDV_THRESH=16384 # Threshold when to switch transport from TCP to NVLINK [3]

export UCX_IB_GPU_DIRECT_RDMA=yes # Allow remote direct memory access from/to GPU

export UCX_TLS=cma,rc,mm,cuda_ipc,cuda_copy,gdr_copy # Include cuda and gdr based transport layers for communication [4]

export UCX_MEMTYPE_CACHE=n

date
#srun -l fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1
srun -l nsys profile fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1
date

# qstat -f $PBS_JOBID
#export EXITSTATUS=$?
#if [ ${EXITSTATUS} -eq 0 ] || [ ${EXITSTATUS} -eq 127 ] ; then
#sbatch job_mistral
#fi

0 comments on commit 097d7f4

Please sign in to comment.